-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathconfig.py
78 lines (61 loc) · 1.75 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
This is the main config file for Ver.
All properties of this config file should be written without using _ so they are easy to "get"
"""
from pathlib import Path
# Overlap parameters
join_overlap_th = 0.4
# Schema similarity parameters
max_distance_schema_similarity = 10
# Serde parameters
serdepath = "./data"
signcollectionfile = "sigcolfile.pickle"
graphfile = "graphfile.pickle"
graphcachedfile = "graphcachedfile.pickle"
datasetcolsfile = "datasetcols.pickle"
simrankfile = "simrankfile.pickle"
jgraphfile = "jgraphfile.pickle"
# DB connection
db_host = 'localhost'
db_port = '9200'
###########
## minhash
###########
k = 512
###########
## DoD
###########
separator = ','
join_chunksize = 1000
memory_limit_join_processing = 0.6 # 60% of total memory
####################################
## New, after refactoring, configs
ver_base_path = Path(__file__).parent
##########
## Input Data
##########
input_data_type = "json"
text_csv_delimiter = ','
unified_profile_schema_name = "profile_schema.yml"
profile_schema_name = "dindex_builder/profile_index_schema_duckdb.txt"
graph_schema_name = "dindex_builder/graph_index_schema_kuzu.txt"
fts_schema_name = "dindex_builder/fts_index_schema_duckdb.txt"
##########
## DIndex
##########
profile_index = "duckdb"
content_index = "simpleminhash"
fts_index = "duckdb"
graph_index = "kuzu"
# Profile index config
profile_table_name = "profiles"
profile_duckdb_database_name = "profiles"
# FTS index configs
fts_data_table_name = "fts_data"
fts_index_column = "data"
fts_duckdb_database_name = "profiles" # naming it the same as profile_duckdb_database_name places all in one instance
# content index configs
minhash_lsh_threshold = 0.7
minhash_lsh_num_perm = 512
# graph index configs
graph_kuzu_database_name = "graph_index"