-
Notifications
You must be signed in to change notification settings - Fork 90
/
opus-2020-07-06.yml
116 lines (116 loc) · 2.66 KB
/
opus-2020-07-06.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
release: eng-inc/opus-2020-07-06.zip
release-date: 2020-07-06
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- asm
- awa
- ben
- bho
- gom
- guj
- hif
- hin
- mai
- mar
- npi
- ori
- pan
- pnb
- rom
- san
- sin
- snd
- urd
use-target-labels:
- ">>asm<<"
- ">>awa<<"
- ">>ben<<"
- ">>bho<<"
- ">>gom<<"
- ">>guj<<"
- ">>hif_Latn<<"
- ">>hin<<"
- ">>mai<<"
- ">>mar<<"
- ">>npi<<"
- ">>ori<<"
- ">>pan_Guru<<"
- ">>pnb<<"
- ">>rom<<"
- ">>san_Deva<<"
- ">>sin<<"
- ">>snd_Arab<<"
- ">>urd<<"
test-data:
Tatoeba-test.eng-asm: 117/530
Tatoeba-test.eng-awa: 279/1087
Tatoeba-test.eng-ben: 2500/10942
Tatoeba-test.eng-bho: 42/224
Tatoeba-test.eng-guj: 154/680
Tatoeba-test.eng-hif: 36/177
Tatoeba-test.eng-hin: 5000/31361
Tatoeba-test.eng-kok: 1/5
Tatoeba-test.eng-lah: 32/180
Tatoeba-test.eng-mai: 8/16
Tatoeba-test.eng-mar: 10000/47665
Tatoeba-test.eng-multi: 10000/53001
Tatoeba-test.eng-nep: 115/398
Tatoeba-test.eng-ori: 33/190
Tatoeba-test.eng-pan: 87/570
Tatoeba-test.eng-rom: 671/4283
Tatoeba-test.eng-san: 144/366
Tatoeba-test.eng-sin: 45/189
Tatoeba-test.eng-snd: 4/17
Tatoeba-test.eng-urd: 1663/12106
BLEU-scores:
Tatoeba-test.eng-asm: 3.6
Tatoeba-test.eng-awa: 0.4
Tatoeba-test.eng-ben: 15.9
Tatoeba-test.eng-bho: 0.6
Tatoeba-test.eng-guj: 20.9
Tatoeba-test.eng-hif: 0.6
Tatoeba-test.eng-hin: 17.2
Tatoeba-test.eng-kok: 3.3
Tatoeba-test.eng-lah: 0.3
Tatoeba-test.eng-mai: 8.9
Tatoeba-test.eng-mar: 20.1
Tatoeba-test.eng-multi: 16.8
Tatoeba-test.eng-nep: 0.6
Tatoeba-test.eng-ori: 2.2
Tatoeba-test.eng-pan: 9.6
Tatoeba-test.eng-rom: 0.4
Tatoeba-test.eng-san: 1.5
Tatoeba-test.eng-sin: 9.1
Tatoeba-test.eng-snd: 1.9
Tatoeba-test.eng-urd: 12.7
chr-F-scores:
Tatoeba-test.eng-asm: 0.277
Tatoeba-test.eng-awa: 0.144
Tatoeba-test.eng-ben: 0.466
Tatoeba-test.eng-bho: 0.152
Tatoeba-test.eng-guj: 0.380
Tatoeba-test.eng-hif: 0.032
Tatoeba-test.eng-hin: 0.461
Tatoeba-test.eng-kok: 0.022
Tatoeba-test.eng-lah: 0.007
Tatoeba-test.eng-mai: 0.392
Tatoeba-test.eng-mar: 0.463
Tatoeba-test.eng-multi: 0.439
Tatoeba-test.eng-nep: 0.058
Tatoeba-test.eng-ori: 0.187
Tatoeba-test.eng-pan: 0.351
Tatoeba-test.eng-rom: 0.188
Tatoeba-test.eng-san: 0.111
Tatoeba-test.eng-sin: 0.370
Tatoeba-test.eng-snd: 0.235
Tatoeba-test.eng-urd: 0.412