-
Notifications
You must be signed in to change notification settings - Fork 90
/
opus+bt-2021-04-14.yml
30 lines (30 loc) · 1.26 KB
/
opus+bt-2021-04-14.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
release: eng-ukr/opus+bt-2021-04-14.zip
release-date: 2021-04-14
dataset-name: opus+bt
modeltype: transformer-align
vocabulary:
source: opus+bt.spm32k-spm32k.vocab.yml
target: opus+bt.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- ukr
training-data:
eng-ukr: Tatoeba-train (2195284) wiki.aa.ukr-eng (960713) wiki.ab.ukr-eng (960450) wiki.ac.ukr-eng (960970) wiki.ad.ukr-eng (961148) wiki.ae.ukr-eng (961007) wiki.af.ukr-eng (960701) wiki.ag.ukr-eng (960804) wiki.ah.ukr-eng (960881) wiki.ai.ukr-eng (960638) wiki.aj.ukr-eng (960902) wiki.ak.ukr-eng (960695) wiki.al.ukr-eng (960737) wiki.am.ukr-eng (960473) wiki.an.ukr-eng (960731) wiki.ao.ukr-eng (960602) wiki.ap.ukr-eng (202348) wikibooks.aa.ukr-eng (62199) wikinews.aa.ukr-eng (24777) wikiquote.aa.ukr-eng (59079) wikisource.aa.ukr-eng (633093)
validation-data:
eng-ukr: Tatoeba-dev, 157764
total-size-shuffled: 157764
devset-selected: top 5000 lines of Tatoeba-dev.src.shuffled!
test-data:
Tatoeba-test.eng-ukr: 10000/60756
BLEU-scores:
Tatoeba-test.eng-ukr: 38.9
chr-F-scores:
Tatoeba-test.eng-ukr: 0.597