Skip to content

Commit

Permalink
latest models added
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgtied committed Jul 23, 2022
1 parent 48b32bb commit e4b140d
Show file tree
Hide file tree
Showing 67 changed files with 2,478 additions and 9 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ upload-models:
swift list ${MODEL_CONTAINER} > index.txt
swift upload ${MODEL_CONTAINER} index.txt
rm -f index.txt
for m in ${RELEASED_MODELS}; do \
rm -f ${MODEL_RELEASEDIR}/$$m/*.zip; \
done

.PHONY: upload-model-index
upload-model-index:
Expand Down
1 change: 1 addition & 0 deletions models/bat-deu/opusTCv20210807_transformer-big.yml
1 change: 1 addition & 0 deletions models/bat-gmq/opusTCv20210807_transformer-big.yml
66 changes: 66 additions & 0 deletions models/bat-gmq/opusTCv20210807_transformer-big_2022-07-23.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
release: bat-gmq/opusTCv20210807_transformer-big_2022-07-23.zip
release-date: 2022-07-23
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- lav
- lit
- ltg
target-languages:
- dan
- isl
- swe
raw-source-languages:
- lav
- lit
- ltg
raw-target-languages:
- dan
- isl
- swe
use-target-labels:
training-data:
lav-dan: Tatoeba-train-v2021-08-07.dan-lav.strict (16721223)
lav-isl: Tatoeba-train-v2021-08-07.isl-lav.strict (1845675)
lav-swe: Tatoeba-train-v2021-08-07.lav-swe.strict (17135312)
lit-swe: Tatoeba-train-v2021-08-07.lit-swe.strict (19167587)
ltg-dan: Tatoeba-train-v2021-08-07.dan-ltg.strict (5352)
ltg-isl: Tatoeba-train-v2021-08-07.isl-ltg.strict (4740)
ltg-swe: Tatoeba-train-v2021-08-07.ltg-swe.strict (259)
validation-data:
dan-lav: Tatoeba-dev-v2021-08-07, 999
isl-lav: Tatoeba-dev-v2021-08-07, 997
lav-swe: Tatoeba-dev-v2021-08-07, 1000
lit-swe: Tatoeba-dev-v2021-08-07, 1000
dan-ltg: Tatoeba-dev-v2021-08-07, 1
isl-ltg: Tatoeba-dev-v2021-08-07, 3
total-size-shuffled: 3972
devset-selected: top 3972 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.lav-dan: 1/22
Tatoeba-test-v2021-08-07.lav-isl: 1/22
Tatoeba-test-v2021-08-07.lav-swe: 1/22
Tatoeba-test-v2021-08-07.lit-swe: 1/6
Tatoeba-test-v2021-08-07.multi-multi: 4/72
BLEU-scores:
Tatoeba-test-v2021-08-07.lav-dan: 11.6
Tatoeba-test-v2021-08-07.lav-isl: 35.8
Tatoeba-test-v2021-08-07.lav-swe: 19.7
Tatoeba-test-v2021-08-07.lit-swe: 100.0
Tatoeba-test-v2021-08-07.multi-multi: 20.1
chr-F-scores:
Tatoeba-test-v2021-08-07.lav-dan: 0.29314
Tatoeba-test-v2021-08-07.lav-isl: 0.46242
Tatoeba-test-v2021-08-07.lav-swe: 0.51192
Tatoeba-test-v2021-08-07.lit-swe: 10.00000
Tatoeba-test-v2021-08-07.multi-multi: 0.46168
1 change: 1 addition & 0 deletions models/bat-itc/opusTCv20210807_transformer-big.yml
105 changes: 105 additions & 0 deletions models/bat-itc/opusTCv20210807_transformer-big_2022-07-23.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
release: bat-itc/opusTCv20210807_transformer-big_2022-07-23.zip
release-date: 2022-07-23
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- lav
- lit
- ltg
- prg_Latn
- sgs
target-languages:
- cat
- fra
- glg
- ita
- lat_Latn
- pob
- por
- spa
raw-source-languages:
- lav
- lit
- ltg
- prg
- sgs
raw-target-languages:
- cat
- fra
- glg
- ita
- lat
- pob
- por
- spa
use-target-labels:
validation-data:
fra-lav: Tatoeba-dev-v2021-08-07, 998
ita-lav: Tatoeba-dev-v2021-08-07, 1000
lav-spa: Tatoeba-dev-v2021-08-07, 998
cat-lit: Tatoeba-dev-v2021-08-07, 1000
fra-lit: Tatoeba-dev-v2021-08-07, 934
glg-lit: Tatoeba-dev-v2021-08-07, 1000
ita-lit: Tatoeba-dev-v2021-08-07, 942
lit-pob: Tatoeba-dev-v2021-08-07, 90
lit-por: Tatoeba-dev-v2021-08-07, 910
lit-spa: Tatoeba-dev-v2021-08-07, 943
fra-ltg: Tatoeba-dev-v2021-08-07, 2
ltg-spa: Tatoeba-dev-v2021-08-07, 2
total-size-shuffled: 8791
devset-selected: top 5000 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.lav-fra: 36/191
Tatoeba-test-v2021-08-07.lav-ita: 21/108
Tatoeba-test-v2021-08-07.lav-spa: 58/344
Tatoeba-test-v2021-08-07.lit-cat: 1/6
Tatoeba-test-v2021-08-07.lit-fra: 142/1036
Tatoeba-test-v2021-08-07.lit-glg: 1/6
Tatoeba-test-v2021-08-07.lit-ita: 224/1505
Tatoeba-test-v2021-08-07.lit-lat: 4/17
Tatoeba-test-v2021-08-07.lit-por: 73/514
Tatoeba-test-v2021-08-07.lit-spa: 454/2751
Tatoeba-test-v2021-08-07.multi-multi: 1012/6466
Tatoeba-test-v2021-08-07.prg-fra: 13/104
Tatoeba-test-v2021-08-07.prg-spa: 24/175
Tatoeba-test-v2021-08-07.sgs-spa: 2/5
BLEU-scores:
Tatoeba-test-v2021-08-07.lav-fra: 30.2
Tatoeba-test-v2021-08-07.lav-ita: 26.9
Tatoeba-test-v2021-08-07.lav-spa: 20.8
Tatoeba-test-v2021-08-07.lit-cat: 5.5
Tatoeba-test-v2021-08-07.lit-fra: 19.9
Tatoeba-test-v2021-08-07.lit-glg: 9.7
Tatoeba-test-v2021-08-07.lit-ita: 18.0
Tatoeba-test-v2021-08-07.lit-lat: 2.9
Tatoeba-test-v2021-08-07.lit-por: 22.5
Tatoeba-test-v2021-08-07.lit-spa: 25.5
Tatoeba-test-v2021-08-07.multi-multi: 22.8
Tatoeba-test-v2021-08-07.prg-fra: 0.8
Tatoeba-test-v2021-08-07.prg-spa: 0.4
Tatoeba-test-v2021-08-07.sgs-spa: 0.0
chr-F-scores:
Tatoeba-test-v2021-08-07.lav-fra: 0.46469
Tatoeba-test-v2021-08-07.lav-ita: 0.41275
Tatoeba-test-v2021-08-07.lav-spa: 0.45169
Tatoeba-test-v2021-08-07.lit-cat: 0.21680
Tatoeba-test-v2021-08-07.lit-fra: 0.41453
Tatoeba-test-v2021-08-07.lit-glg: 0.13250
Tatoeba-test-v2021-08-07.lit-ita: 0.40197
Tatoeba-test-v2021-08-07.lit-lat: 0.12399
Tatoeba-test-v2021-08-07.lit-por: 0.41762
Tatoeba-test-v2021-08-07.lit-spa: 0.45980
Tatoeba-test-v2021-08-07.multi-multi: 0.43709
Tatoeba-test-v2021-08-07.prg-fra: 0.12353
Tatoeba-test-v2021-08-07.prg-spa: 0.13449
Tatoeba-test-v2021-08-07.sgs-spa: 0.11209
1 change: 1 addition & 0 deletions models/cel-itc/opusTCv20210807_transformer-big.yml
1 change: 1 addition & 0 deletions models/deu-fra/opusTCv20210807_transformer-big.yml
1 change: 1 addition & 0 deletions models/deu-sqi/opusTCv20210807_transformer-big.yml
34 changes: 34 additions & 0 deletions models/deu-sqi/opusTCv20210807_transformer-big_2022-07-23.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
release: deu-sqi/opusTCv20210807_transformer-big_2022-07-23.zip
release-date: 2022-07-23
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- deu
target-languages:
- sqi
raw-source-languages:
- deu
raw-target-languages:
- sqi
training-data:
deu-sqi: Tatoeba-train-v2021-08-07.deu-sqi.strict (11647962)
validation-data:
deu-sqi: Tatoeba-dev-v2021-08-07, 1000
total-size-shuffled: 1000
devset-selected: top 1000 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.deu-sqi: 1/6
BLEU-scores:
Tatoeba-test-v2021-08-07.deu-sqi: 76.0
chr-F-scores:
Tatoeba-test-v2021-08-07.deu-sqi: 0.87955
1 change: 1 addition & 0 deletions models/eus-gmq/opusTCv20210807_transformer-big.yml
42 changes: 42 additions & 0 deletions models/eus-gmq/opusTCv20210807_transformer-big_2022-07-23.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
release: eus-gmq/opusTCv20210807_transformer-big_2022-07-23.zip
release-date: 2022-07-23
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eus
target-languages:
- nno
- nob
raw-source-languages:
- eus
raw-target-languages:
- nno
- nob
use-target-labels:
training-data:
eus-nno: Tatoeba-train-v2021-08-07.eus-nno.strict (533000)
eus-nob: Tatoeba-train-v2021-08-07.eus-nob.strict (1484824)
validation-data:
eus-nno: Tatoeba-dev-v2021-08-07, 261
eus-nob: Tatoeba-dev-v2021-08-07, 739
total-size-shuffled: 995
devset-selected: top 995 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.eus-nor: 6/52
Tatoeba-test-v2021-08-07.eus-multi: 6/52
BLEU-scores:
Tatoeba-test-v2021-08-07.eus-nor: 28.1
Tatoeba-test-v2021-08-07.eus-multi: 28.0
chr-F-scores:
Tatoeba-test-v2021-08-07.eus-nor: 0.43894
Tatoeba-test-v2021-08-07.eus-multi: 0.43460
1 change: 1 addition & 0 deletions models/eus-itc/opusTCv20210807_transformer-big.yml
75 changes: 75 additions & 0 deletions models/eus-itc/opusTCv20210807_transformer-big_2022-07-23.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
release: eus-itc/opusTCv20210807_transformer-big_2022-07-23.zip
release-date: 2022-07-23
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eus
target-languages:
- fra
- ita
- lat_Latn
- mwl
- pob
- por
- spa
raw-source-languages:
- eus
raw-target-languages:
- fra
- ita
- lat
- mwl
- pob
- por
- spa
use-target-labels:
training-data:
eus-fra: Tatoeba-train-v2021-08-07.eus-fra.strict (1535891)
eus-ita: Tatoeba-train-v2021-08-07.eus-ita.strict (3136335)
eus-mwl: Tatoeba-train-v2021-08-07.eus-mwl.strict (2763)
eus-pob: Tatoeba-train-v2021-08-07.eus-pob.strict (750313)
eus-por: Tatoeba-train-v2021-08-07.eus-por.strict (3047546)
eus-spa: Tatoeba-train-v2021-08-07.eus-spa.strict (13354717)
validation-data:
eus-fra: Tatoeba-dev-v2021-08-07, 876
eus-ita: Tatoeba-dev-v2021-08-07, 1000
eus-mwl: Tatoeba-dev-v2021-08-07, 1000
eus-pob: Tatoeba-dev-v2021-08-07, 186
eus-por: Tatoeba-dev-v2021-08-07, 814
eus-spa: Tatoeba-dev-v2021-08-07, 992
total-size-shuffled: 4867
devset-selected: top 4867 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.eus-fra: 144/1013
Tatoeba-test-v2021-08-07.eus-ita: 62/412
Tatoeba-test-v2021-08-07.eus-lat: 1/16
Tatoeba-test-v2021-08-07.eus-mwl: 1/4
Tatoeba-test-v2021-08-07.eus-por: 1/6
Tatoeba-test-v2021-08-07.eus-spa: 1850/12465
Tatoeba-test-v2021-08-07.eus-multi: 2058/13904
BLEU-scores:
Tatoeba-test-v2021-08-07.eus-fra: 35.1
Tatoeba-test-v2021-08-07.eus-ita: 48.0
Tatoeba-test-v2021-08-07.eus-lat: 2.1
Tatoeba-test-v2021-08-07.eus-mwl: 9.7
Tatoeba-test-v2021-08-07.eus-por: 100.0
Tatoeba-test-v2021-08-07.eus-spa: 49.5
Tatoeba-test-v2021-08-07.eus-multi: 49.3
chr-F-scores:
Tatoeba-test-v2021-08-07.eus-fra: 0.55642
Tatoeba-test-v2021-08-07.eus-ita: 0.65291
Tatoeba-test-v2021-08-07.eus-lat: 0.13899
Tatoeba-test-v2021-08-07.eus-mwl: 0.23196
Tatoeba-test-v2021-08-07.eus-por: 10.00000
Tatoeba-test-v2021-08-07.eus-spa: 0.67352
Tatoeba-test-v2021-08-07.eus-multi: 0.67092
1 change: 1 addition & 0 deletions models/fas-gmq/opusTCv20210807_transformer-big.yml
Loading

0 comments on commit e4b140d

Please sign in to comment.