-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathopusTCv20210807_transformer-big_2022-07-22.yml
44 lines (44 loc) · 1.32 KB
/
opusTCv20210807_transformer-big_2022-07-22.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
release: zlw-fin/opusTCv20210807_transformer-big_2022-07-22.zip
release-date: 2022-07-22
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- ces
- pol
target-languages:
- fin
raw-source-languages:
- ces
- pol
raw-target-languages:
- fin
training-data:
ces-fin: Tatoeba-train-v2021-08-07.ces-fin.strict (43470575)
pol-fin: Tatoeba-train-v2021-08-07.fin-pol.strict (42191613)
validation-data:
ces-fin: Tatoeba-dev-v2021-08-07, 1001
fin-pol: Tatoeba-dev-v2021-08-07, 926
total-size-shuffled: 1923
devset-selected: top 1923 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.ces-fin: 88/408
Tatoeba-test-v2021-08-07.multi-fin: 697/3701
Tatoeba-test-v2021-08-07.pol-fin: 609/3293
BLEU-scores:
Tatoeba-test-v2021-08-07.ces-fin: 62.2
Tatoeba-test-v2021-08-07.multi-fin: 46.3
Tatoeba-test-v2021-08-07.pol-fin: 44.4
chr-F-scores:
Tatoeba-test-v2021-08-07.ces-fin: 0.72184
Tatoeba-test-v2021-08-07.multi-fin: 0.66927
Tatoeba-test-v2021-08-07.pol-fin: 0.66230