-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathopus2m-2020-08-02.yml
136 lines (136 loc) · 3.27 KB
/
opus2m-2020-08-02.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
release: eng-tut/opus2m-2020-08-02.zip
release-date: 2020-08-02
dataset-name: opus2m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- mon
- nog
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
- xal
use-target-labels:
- ">>aze_Latn<<"
- ">>bak<<"
- ">>chv<<"
- ">>crh<<"
- ">>crh_Latn<<"
- ">>kaz_Cyrl<<"
- ">>kaz_Latn<<"
- ">>kir_Cyrl<<"
- ">>kjh<<"
- ">>kum<<"
- ">>mon<<"
- ">>nog<<"
- ">>ota_Arab<<"
- ">>ota_Latn<<"
- ">>sah<<"
- ">>tat<<"
- ">>tat_Arab<<"
- ">>tat_Latn<<"
- ">>tuk<<"
- ">>tuk_Latn<<"
- ">>tur<<"
- ">>tyv<<"
- ">>uig_Arab<<"
- ">>uig_Cyrl<<"
- ">>uzb_Cyrl<<"
- ">>uzb_Latn<<"
- ">>xal<<"
test-data:
Tatoeba-test.eng-aze: 2659/10046
Tatoeba-test.eng-bak: 39/140
Tatoeba-test.eng-chv: 333/1358
Tatoeba-test.eng-crh: 22/81
Tatoeba-test.eng-kaz: 397/1668
Tatoeba-test.eng-kir: 118/428
Tatoeba-test.eng-kjh: 17/48
Tatoeba-test.eng-kum: 8/25
Tatoeba-test.eng-mon: 337/1971
Tatoeba-test.eng-multi: 10000/46477
Tatoeba-test.eng-nog: 83/250
Tatoeba-test.eng-ota: 678/3328
Tatoeba-test.eng-sah: 39/131
Tatoeba-test.eng-tat: 1451/6996
Tatoeba-test.eng-tuk: 2500/12809
Tatoeba-test.eng-tur: 10000/49076
Tatoeba-test.eng-tyv: 5/19
Tatoeba-test.eng-uig: 3024/13084
Tatoeba-test.eng-uzb: 457/1514
Tatoeba-test.eng-xal: 281/1366
newsdev2016-entr.eng-tur: 1001/14044
newstest2016-entr.eng-tur: 3000/44195
newstest2017-entr.eng-tur: 3007/45049
newstest2018-entr.eng-tur: 3000/45944
BLEU-scores:
Tatoeba-test.eng-aze: 27.2
Tatoeba-test.eng-bak: 5.8
Tatoeba-test.eng-chv: 4.6
Tatoeba-test.eng-crh: 6.5
Tatoeba-test.eng-kaz: 11.8
Tatoeba-test.eng-kir: 24.6
Tatoeba-test.eng-kjh: 2.2
Tatoeba-test.eng-kum: 8.0
Tatoeba-test.eng-mon: 10.3
Tatoeba-test.eng-multi: 19.5
Tatoeba-test.eng-nog: 1.5
Tatoeba-test.eng-ota: 0.2
Tatoeba-test.eng-sah: 0.7
Tatoeba-test.eng-tat: 10.8
Tatoeba-test.eng-tuk: 5.6
Tatoeba-test.eng-tur: 34.2
Tatoeba-test.eng-tyv: 8.1
Tatoeba-test.eng-uig: 0.1
Tatoeba-test.eng-uzb: 4.2
Tatoeba-test.eng-xal: 0.1
newsdev2016-entr.eng-tur: 10.4
newstest2016-entr.eng-tur: 9.1
newstest2017-entr.eng-tur: 9.5
newstest2018-entr.eng-tur: 9.5
chr-F-scores:
Tatoeba-test.eng-aze: 0.580
Tatoeba-test.eng-bak: 0.298
Tatoeba-test.eng-chv: 0.301
Tatoeba-test.eng-crh: 0.342
Tatoeba-test.eng-kaz: 0.360
Tatoeba-test.eng-kir: 0.499
Tatoeba-test.eng-kjh: 0.052
Tatoeba-test.eng-kum: 0.229
Tatoeba-test.eng-mon: 0.362
Tatoeba-test.eng-multi: 0.451
Tatoeba-test.eng-nog: 0.117
Tatoeba-test.eng-ota: 0.035
Tatoeba-test.eng-sah: 0.080
Tatoeba-test.eng-tat: 0.320
Tatoeba-test.eng-tuk: 0.323
Tatoeba-test.eng-tur: 0.623
Tatoeba-test.eng-tyv: 0.192
Tatoeba-test.eng-uig: 0.158
Tatoeba-test.eng-uzb: 0.298
Tatoeba-test.eng-xal: 0.061
newsdev2016-entr.eng-tur: 0.438
newstest2016-entr.eng-tur: 0.414
newstest2017-entr.eng-tur: 0.414
newstest2018-entr.eng-tur: 0.415