-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathopusTCv20210807_transformer-align_2022-03-14.yml
62 lines (62 loc) · 2 KB
/
opusTCv20210807_transformer-align_2022-03-14.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
release: zle-bat/opusTCv20210807_transformer-align_2022-03-14.zip
release-date: 2022-03-14
dataset-name: opusTCv20210807
modeltype: transformer-align
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- rus
- ukr
target-languages:
- lav
- lit
- ltg
raw-source-languages:
- rus
- ukr
raw-target-languages:
- lav
- lit
- ltg
use-target-labels:
training-data:
rus-lav: Tatoeba-train-v2021-08-07.lav-rus.strict (8547116)
rus-lit: Tatoeba-train-v2021-08-07.lit-rus.strict (17915943)
rus-ltg: Tatoeba-train-v2021-08-07.ltg-rus.strict (5826)
ukr-lav: Tatoeba-train-v2021-08-07.lav-ukr.strict (3957815)
ukr-lit: Tatoeba-train-v2021-08-07.lit-ukr.strict (5371979)
ukr-ltg: Tatoeba-train-v2021-08-07.ltg-ukr.strict (6424)
validation-data:
lav-rus: Tatoeba-dev-v2021-08-07, 665
lit-rus: Tatoeba-dev-v2021-08-07, 5222
ltg-rus: Tatoeba-dev-v2021-08-07, 1
lav-ukr: Tatoeba-dev-v2021-08-07, 999
lit-ukr: Tatoeba-dev-v2021-08-07, 1000
total-size-shuffled: 3665
devset-selected: top 3665 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.multi-multi: 3876/22206
Tatoeba-test-v2021-08-07.rus-lav: 274/1516
Tatoeba-test-v2021-08-07.rus-lit: 3598/20605
Tatoeba-test-v2021-08-07.ukr-lav: 1/6
Tatoeba-test-v2021-08-07.ukr-lit: 3/20
BLEU-scores:
Tatoeba-test-v2021-08-07.multi-multi: 47.8
Tatoeba-test-v2021-08-07.rus-lav: 55.7
Tatoeba-test-v2021-08-07.rus-lit: 46.8
Tatoeba-test-v2021-08-07.ukr-lav: 2.0
Tatoeba-test-v2021-08-07.ukr-lit: 68.0
chr-F-scores:
Tatoeba-test-v2021-08-07.multi-multi: 0.70999
Tatoeba-test-v2021-08-07.rus-lav: 0.74105
Tatoeba-test-v2021-08-07.rus-lit: 0.70382
Tatoeba-test-v2021-08-07.ukr-lav: 0.13943
Tatoeba-test-v2021-08-07.ukr-lit: 0.84108