-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathopus-2020-07-27.yml
121 lines (121 loc) · 2.92 KB
/
opus-2020-07-27.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
release: eng-trk/opus-2020-07-27.zip
release-date: 2020-07-27
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
use-target-labels:
- ">>aze_Latn<<"
- ">>bak<<"
- ">>chv<<"
- ">>crh<<"
- ">>crh_Latn<<"
- ">>kaz_Cyrl<<"
- ">>kaz_Latn<<"
- ">>kir_Cyrl<<"
- ">>kjh<<"
- ">>kum<<"
- ">>ota_Arab<<"
- ">>ota_Latn<<"
- ">>sah<<"
- ">>tat<<"
- ">>tat_Arab<<"
- ">>tat_Latn<<"
- ">>tuk<<"
- ">>tuk_Latn<<"
- ">>tur<<"
- ">>tyv<<"
- ">>uig_Arab<<"
- ">>uig_Cyrl<<"
- ">>uzb_Cyrl<<"
- ">>uzb_Latn<<"
test-data:
Tatoeba-test.eng-aze: 2659/10046
Tatoeba-test.eng-bak: 39/140
Tatoeba-test.eng-chv: 333/1358
Tatoeba-test.eng-crh: 22/81
Tatoeba-test.eng-kaz: 397/1668
Tatoeba-test.eng-kir: 118/428
Tatoeba-test.eng-kjh: 17/48
Tatoeba-test.eng-kum: 8/25
Tatoeba-test.eng-multi: 10000/46183
Tatoeba-test.eng-ota: 678/3328
Tatoeba-test.eng-sah: 39/131
Tatoeba-test.eng-tat: 1451/6996
Tatoeba-test.eng-tuk: 2500/12809
Tatoeba-test.eng-tur: 10000/49076
Tatoeba-test.eng-tyv: 5/19
Tatoeba-test.eng-uig: 3024/13084
Tatoeba-test.eng-uzb: 457/1514
newsdev2016-entr.eng-tur: 1001/14044
newstest2016-entr.eng-tur: 3000/44195
newstest2017-entr.eng-tur: 3007/45049
newstest2018-entr.eng-tur: 3000/45944
BLEU-scores:
Tatoeba-test.eng-aze: 26.0
Tatoeba-test.eng-bak: 9.2
Tatoeba-test.eng-chv: 3.9
Tatoeba-test.eng-crh: 7.6
Tatoeba-test.eng-kaz: 10.4
Tatoeba-test.eng-kir: 26.9
Tatoeba-test.eng-kjh: 2.0
Tatoeba-test.eng-kum: 2.7
Tatoeba-test.eng-multi: 18.8
Tatoeba-test.eng-ota: 0.4
Tatoeba-test.eng-sah: 0.7
Tatoeba-test.eng-tat: 9.6
Tatoeba-test.eng-tuk: 5.5
Tatoeba-test.eng-tur: 33.4
Tatoeba-test.eng-tyv: 3.6
Tatoeba-test.eng-uig: 0.1
Tatoeba-test.eng-uzb: 3.3
newsdev2016-entr.eng-tur: 9.5
newstest2016-entr.eng-tur: 8.0
newstest2017-entr.eng-tur: 7.8
newstest2018-entr.eng-tur: 8.2
chr-F-scores:
Tatoeba-test.eng-aze: 0.568
Tatoeba-test.eng-bak: 0.320
Tatoeba-test.eng-chv: 0.266
Tatoeba-test.eng-crh: 0.347
Tatoeba-test.eng-kaz: 0.352
Tatoeba-test.eng-kir: 0.508
Tatoeba-test.eng-kjh: 0.052
Tatoeba-test.eng-kum: 0.073
Tatoeba-test.eng-multi: 0.447
Tatoeba-test.eng-ota: 0.064
Tatoeba-test.eng-sah: 0.028
Tatoeba-test.eng-tat: 0.309
Tatoeba-test.eng-tuk: 0.309
Tatoeba-test.eng-tur: 0.617
Tatoeba-test.eng-tyv: 0.125
Tatoeba-test.eng-uig: 0.152
Tatoeba-test.eng-uzb: 0.268
newsdev2016-entr.eng-tur: 0.423
newstest2016-entr.eng-tur: 0.397
newstest2017-entr.eng-tur: 0.394
newstest2018-entr.eng-tur: 0.396