-
Notifications
You must be signed in to change notification settings - Fork 92
/
opus2m-2020-08-01.yml
155 lines (155 loc) · 4.19 KB
/
opus2m-2020-08-01.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
release: eng-sla/opus2m-2020-08-01.zip
release-date: 2020-08-01
dataset-name: opus2m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- bel
- bos
- bul
- ces
- csb
- dsb
- hrv
- hsb
- mkd
- orv
- pol
- rue
- rus
- slv
- srp
- ukr
use-target-labels:
- ">>bel<<"
- ">>bel_Latn<<"
- ">>bos_Latn<<"
- ">>bul<<"
- ">>bul_Latn<<"
- ">>ces<<"
- ">>csb_Latn<<"
- ">>dsb<<"
- ">>hrv<<"
- ">>hsb<<"
- ">>mkd<<"
- ">>orv_Cyrl<<"
- ">>pol<<"
- ">>rue<<"
- ">>rus<<"
- ">>slv<<"
- ">>srp_Cyrl<<"
- ">>srp_Latn<<"
- ">>ukr<<"
test-data:
Tatoeba-test.eng-bel: 2500/12908
Tatoeba-test.eng-bul: 10000/56740
Tatoeba-test.eng-ces: 10000/52129
Tatoeba-test.eng-csb: 27/196
Tatoeba-test.eng-dsb: 34/140
Tatoeba-test.eng-hbs: 10000/52571
Tatoeba-test.eng-hsb: 40/156
Tatoeba-test.eng-mkd: 10000/51245
Tatoeba-test.eng-multi: 10000/52312
Tatoeba-test.eng-orv: 322/1364
Tatoeba-test.eng-pol: 10000/52166
Tatoeba-test.eng-rue: 120/369
Tatoeba-test.eng-rus: 10000/53367
Tatoeba-test.eng-slv: 2007/9410
Tatoeba-test.eng-ukr: 10000/47921
news-test2008.eng-ces: 2051/36403
newssyscomb2009.eng-ces: 502/8592
newstest2009.eng-ces: 2525/47071
newstest2010.eng-ces: 2489/45422
newstest2011.eng-ces: 3003/55301
newstest2012.eng-ces: 3003/54732
newstest2012.eng-rus: 3003/53938
newstest2013.eng-ces: 3000/48472
newstest2013.eng-rus: 3000/48639
newstest2015-encs.eng-ces: 2656/39201
newstest2015-enru.eng-rus: 2818/46352
newstest2016-encs.eng-ces: 2999/48915
newstest2016-enru.eng-rus: 2998/51923
newstest2017-encs.eng-ces: 3005/46951
newstest2017-enru.eng-rus: 3001/50385
newstest2018-encs.eng-ces: 2983/47229
newstest2018-enru.eng-rus: 3000/51988
newstest2019-encs.eng-ces: 1997/37506
newstest2019-enru.eng-rus: 1997/40379
BLEU-scores:
Tatoeba-test.eng-bel: 22.9
Tatoeba-test.eng-bul: 46.7
Tatoeba-test.eng-ces: 42.7
Tatoeba-test.eng-csb: 1.4
Tatoeba-test.eng-dsb: 1.4
Tatoeba-test.eng-hbs: 40.3
Tatoeba-test.eng-hsb: 14.3
Tatoeba-test.eng-mkd: 44.1
Tatoeba-test.eng-multi: 41.0
Tatoeba-test.eng-orv: 0.3
Tatoeba-test.eng-pol: 42.0
Tatoeba-test.eng-rue: 0.3
Tatoeba-test.eng-rus: 40.5
Tatoeba-test.eng-slv: 18.8
Tatoeba-test.eng-ukr: 38.8
news-test2008.eng-ces: 17.7
newssyscomb2009.eng-ces: 20.1
newstest2009.eng-ces: 19.1
newstest2010.eng-ces: 19.3
newstest2011.eng-ces: 20.4
newstest2012.eng-ces: 18.3
newstest2012.eng-rus: 27.4
newstest2013.eng-ces: 21.5
newstest2013.eng-rus: 20.9
newstest2015-encs.eng-ces: 21.1
newstest2015-enru.eng-rus: 24.5
newstest2016-encs.eng-ces: 23.6
newstest2016-enru.eng-rus: 23.0
newstest2017-encs.eng-ces: 19.2
newstest2017-enru.eng-rus: 25.0
newstest2018-encs.eng-ces: 19.3
newstest2018-enru.eng-rus: 22.3
newstest2019-encs.eng-ces: 20.4
newstest2019-enru.eng-rus: 24.0
chr-F-scores:
Tatoeba-test.eng-bel: 0.489
Tatoeba-test.eng-bul: 0.652
Tatoeba-test.eng-ces: 0.624
Tatoeba-test.eng-csb: 0.210
Tatoeba-test.eng-dsb: 0.165
Tatoeba-test.eng-hbs: 0.616
Tatoeba-test.eng-hsb: 0.344
Tatoeba-test.eng-mkd: 0.635
Tatoeba-test.eng-multi: 0.610
Tatoeba-test.eng-orv: 0.014
Tatoeba-test.eng-pol: 0.637
Tatoeba-test.eng-rue: 0.012
Tatoeba-test.eng-rus: 0.612
Tatoeba-test.eng-slv: 0.357
Tatoeba-test.eng-ukr: 0.600
news-test2008.eng-ces: 0.461
newssyscomb2009.eng-ces: 0.484
newstest2009.eng-ces: 0.479
newstest2010.eng-ces: 0.483
newstest2011.eng-ces: 0.486
newstest2012.eng-ces: 0.461
newstest2012.eng-rus: 0.551
newstest2013.eng-ces: 0.489
newstest2013.eng-rus: 0.490
newstest2015-encs.eng-ces: 0.496
newstest2015-enru.eng-rus: 0.536
newstest2016-encs.eng-ces: 0.515
newstest2016-enru.eng-rus: 0.519
newstest2017-encs.eng-ces: 0.474
newstest2017-enru.eng-rus: 0.541
newstest2018-encs.eng-ces: 0.479
newstest2018-enru.eng-rus: 0.526
newstest2019-encs.eng-ces: 0.486
newstest2019-enru.eng-rus: 0.506