-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathopus-2020-07-27.yml
136 lines (136 loc) · 3.27 KB
/
opus-2020-07-27.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
release: eng-tut/opus-2020-07-27.zip
release-date: 2020-07-27
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- mon
- nog
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
- xal
use-target-labels:
- ">>aze_Latn<<"
- ">>bak<<"
- ">>chv<<"
- ">>crh<<"
- ">>crh_Latn<<"
- ">>kaz_Cyrl<<"
- ">>kaz_Latn<<"
- ">>kir_Cyrl<<"
- ">>kjh<<"
- ">>kum<<"
- ">>mon<<"
- ">>nog<<"
- ">>ota_Arab<<"
- ">>ota_Latn<<"
- ">>sah<<"
- ">>tat<<"
- ">>tat_Arab<<"
- ">>tat_Latn<<"
- ">>tuk<<"
- ">>tuk_Latn<<"
- ">>tur<<"
- ">>tyv<<"
- ">>uig_Arab<<"
- ">>uig_Cyrl<<"
- ">>uzb_Cyrl<<"
- ">>uzb_Latn<<"
- ">>xal<<"
test-data:
Tatoeba-test.eng-aze: 2659/10046
Tatoeba-test.eng-bak: 39/140
Tatoeba-test.eng-chv: 333/1358
Tatoeba-test.eng-crh: 22/81
Tatoeba-test.eng-kaz: 397/1668
Tatoeba-test.eng-kir: 118/428
Tatoeba-test.eng-kjh: 17/48
Tatoeba-test.eng-kum: 8/25
Tatoeba-test.eng-mon: 337/1971
Tatoeba-test.eng-multi: 10000/46477
Tatoeba-test.eng-nog: 83/250
Tatoeba-test.eng-ota: 678/3328
Tatoeba-test.eng-sah: 39/131
Tatoeba-test.eng-tat: 1451/6996
Tatoeba-test.eng-tuk: 2500/12809
Tatoeba-test.eng-tur: 10000/49076
Tatoeba-test.eng-tyv: 5/19
Tatoeba-test.eng-uig: 3024/13084
Tatoeba-test.eng-uzb: 457/1514
Tatoeba-test.eng-xal: 281/1366
newsdev2016-entr.eng-tur: 1001/14044
newstest2016-entr.eng-tur: 3000/44195
newstest2017-entr.eng-tur: 3007/45049
newstest2018-entr.eng-tur: 3000/45944
BLEU-scores:
Tatoeba-test.eng-aze: 27.5
Tatoeba-test.eng-bak: 5.5
Tatoeba-test.eng-chv: 3.3
Tatoeba-test.eng-crh: 11.9
Tatoeba-test.eng-kaz: 12.0
Tatoeba-test.eng-kir: 24.6
Tatoeba-test.eng-kjh: 2.2
Tatoeba-test.eng-kum: 8.4
Tatoeba-test.eng-mon: 9.8
Tatoeba-test.eng-multi: 18.6
Tatoeba-test.eng-nog: 1.6
Tatoeba-test.eng-ota: 0.3
Tatoeba-test.eng-sah: 0.8
Tatoeba-test.eng-tat: 10.1
Tatoeba-test.eng-tuk: 5.7
Tatoeba-test.eng-tur: 33.2
Tatoeba-test.eng-tyv: 6.6
Tatoeba-test.eng-uig: 0.1
Tatoeba-test.eng-uzb: 3.9
Tatoeba-test.eng-xal: 0.1
newsdev2016-entr.eng-tur: 9.6
newstest2016-entr.eng-tur: 8.4
newstest2017-entr.eng-tur: 8.6
newstest2018-entr.eng-tur: 8.6
chr-F-scores:
Tatoeba-test.eng-aze: 0.575
Tatoeba-test.eng-bak: 0.306
Tatoeba-test.eng-chv: 0.284
Tatoeba-test.eng-crh: 0.358
Tatoeba-test.eng-kaz: 0.366
Tatoeba-test.eng-kir: 0.493
Tatoeba-test.eng-kjh: 0.049
Tatoeba-test.eng-kum: 0.176
Tatoeba-test.eng-mon: 0.359
Tatoeba-test.eng-multi: 0.441
Tatoeba-test.eng-nog: 0.079
Tatoeba-test.eng-ota: 0.035
Tatoeba-test.eng-sah: 0.061
Tatoeba-test.eng-tat: 0.308
Tatoeba-test.eng-tuk: 0.310
Tatoeba-test.eng-tur: 0.616
Tatoeba-test.eng-tyv: 0.184
Tatoeba-test.eng-uig: 0.151
Tatoeba-test.eng-uzb: 0.286
Tatoeba-test.eng-xal: 0.057
newsdev2016-entr.eng-tur: 0.427
newstest2016-entr.eng-tur: 0.402
newstest2017-entr.eng-tur: 0.402
newstest2018-entr.eng-tur: 0.404