-
Notifications
You must be signed in to change notification settings - Fork 92
/
opus-2021-02-23.yml
54 lines (54 loc) · 1.39 KB
/
opus-2021-02-23.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
release: ukr-hbs/opus-2021-02-23.zip
release-date: 2021-02-23
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
use-target-labels:
- ">>bos_Cyrl<<"
- ">>bos_Latn<<"
- ">>hbs_Cyrl<<"
- ">>hrv<<"
- ">>srp_Cyrl<<"
- ">>srp_Latn<<"
source-languages:
- ukr
target-languages:
- bos
- hbs
- hrv
- srp
training-data:
ukr-bos_Cyrl: Tatoeba-train (2)
ukr-bos_Latn: Tatoeba-train (204582)
ukr-hbs_Cyrl: Tatoeba-train (11)
ukr-hrv: Tatoeba-train (1398103)
ukr-srp_Cyrl: Tatoeba-train (152997)
ukr-srp_Latn: Tatoeba-train (381969)
validation-data:
bos_Latn-ukr: Tatoeba-dev, 95
hrv-ukr: Tatoeba-dev, 628
srp_Cyrl-ukr: Tatoeba-dev, 74
srp_Latn-ukr: Tatoeba-dev, 202
total-size-shuffled: 998
devset-selected: top 998 lines of Tatoeba-dev.src.shuffled!
test-data:
Tatoeba-test.ukr-hbs: 941/5128
Tatoeba-test.ukr-hrv: 389/2302
Tatoeba-test.ukr-srp_Cyrl: 204/1110
Tatoeba-test.ukr-srp_Latn: 348/1716
BLEU-scores:
Tatoeba-test.ukr-hbs: 44.5
Tatoeba-test.ukr-hrv: 43.8
Tatoeba-test.ukr-srp_Cyrl: 47.3
Tatoeba-test.ukr-srp_Latn: 42.4
chr-F-scores:
Tatoeba-test.ukr-hbs: 0.643
Tatoeba-test.ukr-hrv: 0.635
Tatoeba-test.ukr-srp_Cyrl: 0.641
Tatoeba-test.ukr-srp_Latn: 0.651