From 050f7b1ad13b5c165138bbd41e62d836417ef9ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stig-Arne=20Gr=C3=B6nroos?= Date: Mon, 11 Mar 2024 16:34:47 +0200 Subject: [PATCH] Demo configs. hydra-L-train-config.yml is a modified version of the training config, with sentencepiece transform added. server.json is required by the backend. expected_files lists the expected directory structure, with model and vocab files that are too large to include in this repo. --- tools/demo/configs/expected_files | 26 +++ tools/demo/configs/hydra-L-train-config.yml | 187 ++++++++++++++++++++ tools/demo/configs/server.json | 165 +++++++++++++++++ 3 files changed, 378 insertions(+) create mode 100644 tools/demo/configs/expected_files create mode 100644 tools/demo/configs/hydra-L-train-config.yml create mode 100644 tools/demo/configs/server.json diff --git a/tools/demo/configs/expected_files b/tools/demo/configs/expected_files new file mode 100644 index 00000000..ff79d710 --- /dev/null +++ b/tools/demo/configs/expected_files @@ -0,0 +1,26 @@ +./vocabs +./vocabs/mammoth-hydra.64k.spm.vocab +./vocabs/mammoth-hydra.64k.spm.model +./server.json +./hydra-L-train-config.yml +./models +./models/hydra-L-ckpt_step_460000_generator_fr.pt +./models/hydra-L-ckpt_step_460000_generator_en.pt +./models/hydra-L-ckpt_step_460000_decoder_1_mt.pt +./models/hydra-L-ckpt_step_460000_decoder_1_ts.pt +./models/hydra-L-ckpt_step_460000_attention_bridge.pt +./models/hydra-L-ckpt_step_460000_src_embeddings_all.pt +./models/hydra-L-ckpt_step_460000_decoder_2_fr2.pt +./models/hydra-L-ckpt_step_460000_decoder_2_ru2.pt +./models/hydra-L-ckpt_step_460000_decoder_2_en2.pt +./models/hydra-L-ckpt_step_460000_tgt_embeddings_fr.pt +./models/hydra-L-ckpt_step_460000_tgt_embeddings_en.pt +./models/hydra-L-ckpt_step_460000_tgt_embeddings_ru.pt +./models/hydra-L-ckpt_step_460000_decoder_0_ru1.pt +./models/hydra-L-ckpt_step_460000_decoder_0_fr1.pt +./models/hydra-L-ckpt_step_460000_frame.pt +./models/hydra-L-ckpt_step_460000_decoder_1_pg.pt +./models/hydra-L-ckpt_step_460000_decoder_0_en1.pt +./models/hydra-L-ckpt_step_460000_encoder_0_all.pt +./models/hydra-L-ckpt_step_460000_generator_ru.pt +./models/hydra-L-ckpt_step_460000_decoder_1_dm.pt diff --git a/tools/demo/configs/hydra-L-train-config.yml b/tools/demo/configs/hydra-L-train-config.yml new file mode 100644 index 00000000..65010cc4 --- /dev/null +++ b/tools/demo/configs/hydra-L-train-config.yml @@ -0,0 +1,187 @@ +src_subword_model: vocabs/mammoth-hydra.64k.spm.model +tgt_subword_model: vocabs/mammoth-hydra.64k.spm.model +src_vocab: + 'all': vocabs/mammoth-hydra.64k.spm.vocab +tgt_vocab: + 'en': vocabs/mammoth-hydra.64k.spm.vocab + 'fr': vocabs/mammoth-hydra.64k.spm.vocab + 'ru': vocabs/mammoth-hydra.64k.spm.vocab + +overwrite: False +tasks: + # GPU 0:0 + defmod_en: + src_tgt: all-en + enc_sharing_group: [all] + dec_sharing_group: [en1, dm, en2] + node_gpu: 0:0 + path_src: /scratch/project_2005099/data/mammoth-hydra/codwoe/en.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/codwoe/en.tgt.sp + transforms: [sentencepiece, filtertoolong] + pargen_en: + src_tgt: all-en + enc_sharing_group: [all] + dec_sharing_group: [en1, pg, en2] + node_gpu: 0:0 + path_src: /scratch/project_2005099/data/mammoth-hydra/tapaco/en.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/tapaco/en.tgt.sp + transforms: [sentencepiece, filtertoolong] + texsim_en: + src_tgt: all-en + enc_sharing_group: [all] + dec_sharing_group: [en1, ts, en2] + node_gpu: 0:0 + path_src: /scratch/project_2005099/data/mammoth-hydra/wikilarge/en.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/wikilarge/en.tgt.sp + transforms: [sentencepiece, filtertoolong] + translate_fr-en: + src_tgt: all-en + enc_sharing_group: [all] + dec_sharing_group: [en1, mt, en2] + node_gpu: 0:0 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.fr.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.en.sp + transforms: [sentencepiece, filtertoolong] + translate_ru-en: + src_tgt: all-en + enc_sharing_group: [all] + dec_sharing_group: [en1, mt, en2] + node_gpu: 0:0 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.ru.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.en.sp + transforms: [sentencepiece, filtertoolong] + + # GPU 0:1 + defmod_fr: + src_tgt: all-fr + enc_sharing_group: [all] + dec_sharing_group: [fr1, dm, fr2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/codwoe/fr.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/codwoe/fr.tgt.sp + transforms: [sentencepiece, filtertoolong] + pargen_fr: + src_tgt: all-fr + enc_sharing_group: [all] + dec_sharing_group: [fr1, pg, fr2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/tapaco/fr.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/tapaco/fr.tgt.sp + transforms: [sentencepiece, filtertoolong] + texsim_fr: + src_tgt: all-fr + enc_sharing_group: [all] + dec_sharing_group: [fr1, ts, fr2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/wikilarge/fr.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/wikilarge/fr.tgt.sp + transforms: [sentencepiece, filtertoolong] + translate_en-fr: + src_tgt: all-fr + enc_sharing_group: [all] + dec_sharing_group: [fr1, mt, fr2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.en.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.fr.sp + transforms: [sentencepiece, filtertoolong] + translate_ru-fr: + src_tgt: all-fr + enc_sharing_group: [all] + dec_sharing_group: [fr1, mt, fr2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.ru.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.fr.sp + transforms: [sentencepiece, filtertoolong] + + # GPU 0:2 + defmod_ru: + src_tgt: all-ru + enc_sharing_group: [all] + dec_sharing_group: [ru1, dm, ru2] + node_gpu: 0:2 + path_src: /scratch/project_2005099/data/mammoth-hydra/codwoe/ru.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/codwoe/ru.tgt.sp + transforms: [sentencepiece, filtertoolong] + pargen_ru: + src_tgt: all-ru + enc_sharing_group: [all] + dec_sharing_group: [ru1, pg, ru2] + node_gpu: 0:2 + path_src: /scratch/project_2005099/data/mammoth-hydra/tapaco/ru.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/tapaco/ru.tgt.sp + transforms: [sentencepiece, filtertoolong] + texsim_ru: + src_tgt: all-ru + enc_sharing_group: [all] + dec_sharing_group: [ru1, ts, ru2] + node_gpu: 0:2 + path_src: /scratch/project_2005099/data/mammoth-hydra/ruadapt/ru.src.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/ruadapt/ru.tgt.sp + transforms: [sentencepiece, filtertoolong] + translate_fr-ru: + src_tgt: all-ru + enc_sharing_group: [all] + dec_sharing_group: [ru1, mt, ru2] + node_gpu: 0:2 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.fr.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.ru.sp + transforms: [sentencepiece, filtertoolong] + translate_en-ru: + src_tgt: all-ru + enc_sharing_group: [all] + dec_sharing_group: [ru1, mt, ru2] + node_gpu: 0:1 + path_src: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.en.sp + path_tgt: /scratch/project_2005099/data/mammoth-hydra/unpc/UNv1.0.6way.ru.sp + transforms: [sentencepiece, filtertoolong] + + +### Transform related opts: +#### Filter +src_seq_length: 200 +tgt_seq_length: 200 +#### Bart +src_subword_type: sentencepiece +tgt_subword_type: sentencepiece +mask_ratio: 0.2 +replace_length: 1 + +# silently ignore empty lines in the data +skip_empty_level: silent + +batch_size: 4096 +batch_type: tokens +normalization: tokens +valid_batch_size: 4096 +max_generator_batches: 2 +src_vocab_size: 100000 +tgt_vocab_size: 100000 +encoder_type: transformer +decoder_type: transformer +model_dim: 512 +transformer_ff: 2048 +heads: 8 +enc_layers: [12] +dec_layers: [2, 2, 2] +dropout: 0.1 +label_smoothing: 0.1 +param_init: 0.0 +param_init_glorot: true +position_encoding: true +valid_steps: 10000 +warmup_steps: 10000 +report_every: 100 +save_checkpoint_steps: 10000 +keep_checkpoint: -1 +accum_count: 1 +optim: adafactor +decay_method: none +learning_rate: 3.0 +max_grad_norm: 0.0 +seed: 3435 +model_type: text +save_all_gpus: false + +world_size: 3 +gpu_ranks: [0, 1, 2] +node_rank: 0 diff --git a/tools/demo/configs/server.json b/tools/demo/configs/server.json new file mode 100644 index 00000000..9b19972b --- /dev/null +++ b/tools/demo/configs/server.json @@ -0,0 +1,165 @@ +{ + "models_root": "models/", + "models": [ + { + "id": 0, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "defmod_en" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 1, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "defmod_fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 2, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "defmod_ru" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 3, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "pargen_en" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 4, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "pargen_fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 5, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "pargen_ru" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 6, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "texsim_en" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 7, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "texsim_fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 8, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "texsim_ru" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 9, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "texsim_fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 10, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_fr-en" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 11, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_ru-en" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 12, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_en-fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 13, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_ru-fr" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 14, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_fr-ru" + }, + "models": ["hydra-L-ckpt_step_460000"] + }, + { + "id": 15, + "opts": { + "config": "hydra-L-train-config.yml", + "model": "models/hydra-L-ckpt_step_460000", + "transforms": "sentencepiece", + "task_id": "translate_en-ru" + }, + "models": ["hydra-L-ckpt_step_460000"] + } + ] +}