-
Notifications
You must be signed in to change notification settings - Fork 90
/
opusTCv20210807+bt_transformer-big_2022-02-25.yml
69 lines (69 loc) · 3.03 KB
/
opusTCv20210807+bt_transformer-big_2022-02-25.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
release: hbs-eng/opusTCv20210807+bt_transformer-big_2022-02-25.zip
release-date: 2022-02-25
dataset-name: opusTCv20210807+bt
modeltype: transformer-big
vocabulary:
source: opusTCv20210807+bt.spm32k-spm32k.vocab.yml
target: opusTCv20210807+bt.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- bos_Cyrl
- bos_Latn
- cnr
- cnr_Latn
- hbs
- hbs_Cyrl
- hrv
- srp_Cyrl
- srp_Latn
target-languages:
- eng
raw-source-languages:
- bos
- cnr
- hbs
- hrv
- srp
raw-target-languages:
- eng
training-data:
bos_Cyrl-eng: Tatoeba-train-v2021-08-07.bos_Cyrl-eng.strict (300)
bos_Latn-eng: Tatoeba-train-v2021-08-07.bos_Latn-eng.strict (12936307) wikibooks.aa.eng-bos_Latn (991831) wikinews.aa.eng-bos_Latn (457123) wikipedia.aa.eng-bos_Latn (982309) wikipedia.ab.eng-bos_Latn (982484) wikipedia.ac.eng-bos_Latn (982532) wikipedia.ad.eng-bos_Latn (982323) wikiquote.aa.eng-bos_Latn (996992)
cnr-eng: Tatoeba-train-v2021-08-07.cnr-eng.strict (13)
cnr_Latn-eng: Tatoeba-train-v2021-08-07.cnr_Latn-eng.strict (55322)
hbs-eng: Tatoeba-train-v2021-08-07.eng-hbs.strict (14905)
hbs_Cyrl-eng: Tatoeba-train-v2021-08-07.eng-hbs_Cyrl.strict (533)
hrv-eng: Tatoeba-train-v2021-08-07.eng-hrv.strict (71621531) wikibooks.aa (991933) wikinews.aa (457344) wikipedia.aa (983358) wikipedia.ab (983564) wikipedia.ac (983550) wikipedia.ad (983346) wikiquote.aa (997151)
srp_Cyrl-eng: Tatoeba-train-v2021-08-07.eng-srp_Cyrl.strict (16716175) wikibooks.aa (991933) wikinews.aa (457344) wikipedia.aa (983358) wikipedia.ab (983564) wikipedia.ac (983550) wikipedia.ad (983346) wikiquote.aa (997151)
srp_Latn-eng: Tatoeba-train-v2021-08-07.eng-srp_Latn.strict (48913754) wikibooks.aa (991933) wikinews.aa (457344) wikipedia.aa (983358) wikipedia.ab (983564) wikipedia.ac (983550) wikipedia.ad (983346) wikiquote.aa (997151)
validation-data:
bos_Latn-eng: Tatoeba-dev-v2021-08-07, 200
eng-hrv: Tatoeba-dev-v2021-08-07, 941
eng-srp_Cyrl: Tatoeba-dev-v2021-08-07, 8753
eng-srp_Latn: Tatoeba-dev-v2021-08-07, 4203
total-size-shuffled: 3126
devset-selected: top 3126 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.bos_Latn-eng: 301/1824
Tatoeba-test-v2021-08-07.hrv-eng: 1480/10620
Tatoeba-test-v2021-08-07.multi-eng: 10000/68833
Tatoeba-test-v2021-08-07.srp_Cyrl-eng: 1580/10180
Tatoeba-test-v2021-08-07.srp_Latn-eng: 6656/46303
BLEU-scores:
Tatoeba-test-v2021-08-07.bos_Latn-eng: 66.4
Tatoeba-test-v2021-08-07.hrv-eng: 58.7
Tatoeba-test-v2021-08-07.multi-eng: 56.3
Tatoeba-test-v2021-08-07.srp_Cyrl-eng: 44.5
Tatoeba-test-v2021-08-07.srp_Latn-eng: 58.3
chr-F-scores:
Tatoeba-test-v2021-08-07.bos_Latn-eng: 0.80006
Tatoeba-test-v2021-08-07.hrv-eng: 0.73512
Tatoeba-test-v2021-08-07.multi-eng: 0.71724
Tatoeba-test-v2021-08-07.srp_Cyrl-eng: 0.68137
Tatoeba-test-v2021-08-07.srp_Latn-eng: 0.71699