-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdefault_config.yml
53 lines (49 loc) · 2.77 KB
/
default_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
### train
device: -2 # -1 for using cpu, 0 for single gpu, -2 for dynamically picking single gpu, [0,1,2,3] for multiple gpus
num_epochs: 25
batch_size: 8192
learning_rate: 0.0001
load_pretrained_model: False
reset_best_eval_acc: True
model_type: MLR
use_language_model_encoder: False
language_model_encoder_name: roberta-base
encoder_hidden_dim: 768
l2_normalization: 0.00001
loss_function: binary_crossentropy
eval_metrics: ["acc"] # ['binary_crossentropy', "auc", "acc"]
# weight
downvote_weight: 3
user_normalization: equal_total # null for no normalization, equal_total for assigning user weight = 1/votes, equal_upvote_downvote for equal upvote weight and downvote weight for each user!
minority_vote_normalization: False
# data processing
posts_data_path: data/reddit/submission_info.txt
votes_data_path: data/reddit/44_million_votes.txt
save_and_load_prepared_data: True
sample_ratio: 1
sample_method: ['most_votes'] # "SUBMISSION_ID", "USERNAME", "equal_up_down_votes", "add_weak_downvote", "selected_subreddits"
testset_proportion: 0.2
validset_proportion: 0.2
selected_subreddits: [] # "r/politics"
categorical_features: ['USERNAME', 'AUTHOR'] # 'SUBMISSION_ID' # make sure "USERNAME" is the first one # if "USERNAME" is not in categorical_features, we use the majority vote as the target vote for training
string_features: ['SUBREDDIT', "CREATED_TIME", 'NSFW', "SUBMISSION_URL", "SUBMISSION_TEXT"]
use_voted_users_feature: True # will add "UPVOTED_USERS" and "DOWNVOTED_USERS"
sample_part_voted_users: True
add_target_user_ratio: 0.35
prepared_data_path: data/reddit/prepared_data.pt
train_at_least_n_votes: 0 # for each posts, train with "train_at_least_n_votes" votes, and test on other votes
train_test_different_submissions: False
submission_text_dict_path: data/reddit/submission_text_dict.pt
### curation modeling
selected_subreddit: null # Select a Subreddit, e.g., "r/politics", or select multiple subreddits, e.g., "r/politics_r/Conservative_r/Liberal_r/Republican_r/democrats_r/VoteBlue". null: select later.
upvote_ratio_thres: 0.5
upvote_confidence_thres: 0.5
submission_source: "test_data" # "test_data" or "custom"
user_grouping_method: "votes" # or "neural", "all_user_as_group", "random_user_as_group", "single_user_as_group", "manual", "interest_r/Conservative_r/Liberal_r/Republican_r/democrats_r/VoteBlue", "predict_all_submissions", "none", "votes", or their combination (separate them with space). "vote" for using voting matrix and PCA for clustering; "none" for no curation (broadcast)
manual_user_groups: null
analyze_post: True # whether to do sentiment analysis, classification and entities analysis
max_test_submissions_per_subreddit: 500
active_user_votes_thres: 5
group_user_num_lower_thres: 1
group_user_num_upper_thres: 40
preferred_submissions_venn_figure_dir: output/figures/preferred_subs