From 8d1686b7f63b42f8e3547ab39f5a977df072be72 Mon Sep 17 00:00:00 2001 From: Ona De Gibert Bonet Date: Tue, 1 Oct 2024 10:16:28 +0300 Subject: [PATCH] added configuration for korean-english --- configs/hplt/config.hplt.kor-eng.yml | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 configs/hplt/config.hplt.kor-eng.yml diff --git a/configs/hplt/config.hplt.kor-eng.yml b/configs/hplt/config.hplt.kor-eng.yml new file mode 100644 index 000000000..4ff87ecbd --- /dev/null +++ b/configs/hplt/config.hplt.kor-eng.yml @@ -0,0 +1,39 @@ +#### +# Example of a production config +# Change language pair, experiment name, datasets and other settings if needed +# Training low resource languages might require more tuning of pipeline/training/configs +### + +experiment: + dirname: hplt + name: kor-eng + langpairs: + - ko-en + + #URL to the OPUS-MT model to use as the teacher + opusmt-teacher: "https://object.pouta.csc.fi/Tatoeba-MT-models/kor-eng/opusTCv20210807-sepvoc_transformer-big_2022-07-28.zip" + + # Specify if the teacher and the student are many2one + one2many-teacher: False + one2many-student: False + + #URL to the OPUS-MT model to use as the backward model + opusmt-backward: "" + + teacher-ensemble: 1 + + split-length: 20000000 + spm-sample-size: 2000000 + + best-model: perplexity + + opusfilter: + config: default + +datasets: + train: + - tc_Tatoeba-Challenge-v2023-09-26 + devtest: + - flores_dev + test: + - flores_devtest \ No newline at end of file