forked from cahya-wirawan/luganda-asr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinetuning_common_voice.json
35 lines (35 loc) · 1.12 KB
/
finetuning_common_voice.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
{
"model_name_or_path": "facebook/wav2vec2-large-xlsr-53",
"dataset_name": "dataset/common_voice",
"dataset_config_name": "lg",
"dataset_data_dir": "/content/data/cv-corpus-7.0-2021-07-21",
"dataset_min_filesize": 10000,
"dataset_min_textlength": 10,
"output_dir": "output/wav2vec2-common_voice-lg",
"overwrite_output_dir": true,
"num_train_epochs": 200,
"per_device_eval_batch_size": 8,
"per_device_train_batch_size": 8,
"evaluation_strategy": "steps",
"learning_rate": 1e-4,
"warmup_steps": 300,
"fp16": true,
"freeze_feature_extractor": true,
"save_steps": 2000,
"eval_steps": 2000,
"save_total_limit": 1,
"logging_steps": 2000,
"group_by_length": true,
"feat_proj_dropout": 0.04,
"layerdrop": 0.041,
"attention_dropout": 0.094,
"activation_dropout": 0.055,
"hidden_dropout": 0.047,
"mask_time_prob": 0.4,
"do_train": true,
"do_eval": true,
"gradient_accumulation_steps": 2,
"dataloader_num_workers": 8,
"train_split_name": "train+validation+other+invalidated",
"chars_to_ignore": [",", "?", ".", "!", "-", ";", ":", "\"\"", "%", "'", "\"", "�", "'", "\u2018", "\u2019"]
}