From 16488501bb35f55641b56fca66c7f8f7b34a5af8 Mon Sep 17 00:00:00 2001
From: JS Task-specific encoders and decoders
+In this example, we create a model with encoders and decoders **unshared** for the specified languages. This is defined by `enc_sharing_group` and `enc_sharing_group`.
+Note that the configs expect you have access to 2 GPUs.
+
```yaml
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ bg: vocab/opusTC.mul.vocab.onmt
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
tasks:
train_bg-en:
src_tgt: bg-en
enc_sharing_group: [bg]
dec_sharing_group: [en]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: cs-en
enc_sharing_group: [cs]
dec_sharing_group: [en]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: en-cs
enc_sharing_group: [en]
dec_sharing_group: [cs]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [6]
dec_layers: [6]
@@ -58,29 +88,52 @@ dec_layers: [6]
Arbitrarily shared layers in encoders and task-specific decoders
+The training and vocab config is the same as in the previous example.
+
```yaml
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ bg: vocab/opusTC.mul.vocab.onmt
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
tasks:
train_bg-en:
src_tgt: bg-en
enc_sharing_group: [bg, all]
dec_sharing_group: [en]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: cs-en
enc_sharing_group: [cs, all]
dec_sharing_group: [en]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: en-cs
enc_sharing_group: [en, all]
dec_sharing_group: [cs]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [4, 4]
dec_layers: [4]
@@ -90,61 +143,71 @@ dec_layers: [4]
Non-modular multilingual system
+In this example, we share the input/output vocabulary over all languages. Hence, we define a vocabulary for an `all` language, that we use in the definition of the model.
+
```yaml
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ all: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ all: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
tasks:
train_bg-en:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [6]
dec_layers: [6]
```
2. Training Command:config.yaml file specifies the desired sharing scheme.
The training can be run on a single GPU in which case the wrapper wouldn’t be necessary. In this case, you can train with the following command.
+python -u $MAMMOTH/train.py -config $CONFIG
+
After training, use the following command to test the model:
python3 -u $MAMMOTH/translate.py \
--config $CONFIG \
diff --git a/examples/train_mammoth_101.html b/examples/train_mammoth_101.html
index 8d154029..7c417d9c 100644
--- a/examples/train_mammoth_101.html
+++ b/examples/train_mammoth_101.html
@@ -38,7 +38,7 @@
-
+
@@ -122,7 +122,7 @@
-
git clone https://github.com/Helsinki-NLP/mammoth.git
-cd mammoth
-pip3 install -e .
-pip3 install sentencepiece==0.1.97 sacrebleu==2.3.1
+pip install mammoth-nlp
Check out the installation guide to install in specific clusters.
diff --git a/modular_model.html b/modular_model.html
index 503c336d..cb795a79 100644
--- a/modular_model.html
+++ b/modular_model.html
@@ -114,7 +114,7 @@
UNPC consists of manually translated UN documents from the last 25 years (1990 to 2014) for the six official UN languages, Arabic, Chinese, English, French, Russian, and Spanish. -We preprocess the data. You can download the processed data by:
-wget https://mammoth-share.a3s.fi/unpc.tar
-
Or you can use the scripts provided by the tarball to process the data yourself.
-For references, please cite this reference: Ziemski, M., Junczys-Dowmunt, M., and Pouliquen, B., (2016), The United Nations Parallel Corpus, Language Resources and Evaluation (LREC’16), Portorož, Slovenia, May 2016.
-Before running these scripts, make sure that you have installed Mamooth, which includes the dependencies required below.
+In the Quickstart tutorial, we assume that you will download and preprocess the Europarl data by following the steps below.
Europarl parallel corpus is a multilingual resource extracted from European Parliament proceedings and contains texts in 21 European languages. Download the Release v7 - a further expanded and improved version of the Europarl corpus on 15 May 2012 - from the original website or download the processed data by us:
wget https://mammoth101.a3s.fi/europarl.tar.gz
+mkdir europarl_data
+tar –xvzf europarl.tar.gz -C europarl_data
+
Note that the extracted dataset will require around 30GB of memory. Alternatively, you can only download the data for the three example languages (666M).
+wget https://mammoth101.a3s.fi/europarl-3langs.tar.gz
+mkdir europarl_data
+tar –xvzf europarl-3langs.tar.gz -C europarl_data
We use a SentencePiece model trained on OPUS Tatoeba Challenge data with 64k vocabulary size. Download the SentencePiece model and the vocabulary:
+We use a SentencePiece tokenizer trained on OPUS Tatoeba Challenge data with 64k vocabulary size. Download the SentencePiece model and the vocabulary:
# Download the SentencePiece model
wget https://mammoth101.a3s.fi/opusTC.mul.64k.spm
# Download the vocabulary
wget https://mammoth101.a3s.fi/opusTC.mul.vocab.onmt
+
+mkdir vocab
+mv opusTC.mul.64k.spm vocab/.
+mv opusTC.mul.vocab.onmt vocab/.
If you would like to create and use a custom sentencepiece tokenizer, take a look at the OPUS tutorial below.
Then, read parallel text data, processes it, and generate output files for training and validation sets. +
Then, read parallel text data, processes it, and generates output files for training and validation sets. Here’s a high-level summary of the main processing steps. For each language in ‘langs,’
read parallel data files.
shuffle the data randomly.
tokenizes the text using SentencePiece and writes the tokenized data to separate output files for training and validation sets.
We use a positional argument ‘lang’ that can accept one or more values, for specifying the languages (e.g., bg
and cs
as used in Europarl) to process.
You’re free to skip this step if you directly download the processed data.
-import argparse
-import random
+import random
+import pathlib
import tqdm
import sentencepiece as sp
-parser = argparse.ArgumentParser()
-parser.add_argument('lang', nargs='+')
-langs = parser.parse_args().lang
+langs = ["bg", "cs"]
sp_path = 'vocab/opusTC.mul.64k.spm'
spm = sp.SentencePieceProcessor(model_file=sp_path)
+input_dir = 'europarl_data/europarl'
+output_dir = 'europarl_data/encoded'
+
for lang in tqdm.tqdm(langs):
- en_side_in = f'{lang}-en/europarl-v7.{lang}-en.en'
- xx_side_in = f'{lang}-en/europarl-v7.{lang}-en.{lang}'
+ en_side_in = f'{input_dir}/{lang}-en/europarl-v7.{lang}-en.en'
+ xx_side_in = f'{input_dir}/{lang}-en/europarl-v7.{lang}-en.{lang}'
with open(xx_side_in) as xx_stream, open(en_side_in) as en_stream:
data = zip(map(str.strip, xx_stream), map(str.strip, en_stream))
data = [(xx, en) for xx, en in tqdm.tqdm(data, leave=False, desc=f'read {lang}') if xx and en] # drop empty lines
random.shuffle(data)
- en_side_out = f'{lang}-en/valid.{lang}-en.en.sp'
- xx_side_out = f'{lang}-en/valid.{lang}-en.{lang}.sp'
+ pathlib.Path(output_dir).mkdir(exist_ok=True)
+ en_side_out = f'{output_dir}/valid.{lang}-en.en.sp'
+ xx_side_out = f'{output_dir}/valid.{lang}-en.{lang}.sp'
with open(xx_side_out, 'w') as xx_stream, open(en_side_out, 'w') as en_stream:
for xx, en in tqdm.tqdm(data[:1000], leave=False, desc=f'valid {lang}'):
print(*spm.encode(xx, out_type=str), file=xx_stream)
print(*spm.encode(en, out_type=str), file=en_stream)
- en_side_out = f'{lang}-en/train.{lang}-en.en.sp'
- xx_side_out = f'{lang}-en/train.{lang}-en.{lang}.sp'
+ en_side_out = f'{output_dir}/train.{lang}-en.en.sp'
+ xx_side_out = f'{output_dir}/train.{lang}-en.{lang}.sp'
with open(xx_side_out, 'w') as xx_stream, open(en_side_out, 'w') as en_stream:
for xx, en in tqdm.tqdm(data[1000:], leave=False, desc=f'train {lang}'):
print(*spm.encode(xx, out_type=str), file=xx_stream)
print(*spm.encode(en, out_type=str), file=en_stream)
+The script will produce encoded datasets in europarl_data/encoded
that you can further use for the training.
+
UNPC consists of manually translated UN documents from the last 25 years (1990 to 2014) for the six official UN languages, Arabic, Chinese, English, French, Russian, and Spanish. +We preprocess the data. You can download the processed data by:
+wget https://mammoth-share.a3s.fi/unpc.tar
+
Or you can use the scripts provided by the tarball to process the data yourself.
+For references, please cite this reference: Ziemski, M., Junczys-Dowmunt, M., and Pouliquen, B., (2016), The United Nations Parallel Corpus, Language Resources and Evaluation (LREC’16), Portorož, Slovenia, May 2016.
To get started, download the opus 100 dataset from OPUS 100
+In this guideline, we will also create our custom sentencepiece tokenizer.
+To do that, you will also need to compile a sentencepiece installation in your environment (not just pip install). +Follow the instructions on sentencepiece github.
+After that, download the opus 100 dataset from OPUS 100
SP_PATH=your/sentencepiece/path/build/src
diff --git a/quickstart.html b/quickstart.html
index f47e47ed..59053c24 100644
--- a/quickstart.html
+++ b/quickstart.html
@@ -90,9 +90,11 @@
MAMMOTH is specifically designed for distributed training of modular systems in multi-GPUs SLURM environments.
+In the example below, we will show you how to configure Mammoth to train a machine translation model with language-specific encoders and decoders.
git clone https://github.com/Helsinki-NLP/mammoth.git
-cd mammoth
-pip3 install -e .
-pip3 install sentencepiece==0.1.97 sacrebleu==2.3.1
+pip install mammoth-nlp
Check out the installation guide to install in specific clusters.
Prepare the data for training. You can refer to the data preparation tutorial for more details.
+Before running the training, we will download data for chosen pairs of languages and create a sentencepiece tokenizer for the model.
+Refer to the data preparation tutorial for more details.
+In the following steps, we assume that you already have an encoded dataset containing *.sp
file for europarl
dataset, and languages cs
and bg
. Thus, your data directory europarl_data/encoded
should contain 8 files in a format {train/valid}.{cs/bg}-en.{cs/bg}.sp
. If you use other datasets, please update the paths in the configurations below.
You will need to configure your training settings. -Below is a list of configuration examples:
+Mamooth uses configurations to build a new transformer model and configure your training settings, such as which modules are trained with the data from which languages.
+Below are a few examples of training configurations that will work for you out-of-box in a one-node, two-GPU environment.
tasks:
+Task-specific encoders and decoders
In this example, we create a model with encoders and decoders unshared for the specified languages. This is defined by enc_sharing_group
and enc_sharing_group
.
+Note that the configs expect you have access to 2 GPUs.
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ bg: vocab/opusTC.mul.vocab.onmt
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
+tasks:
train_bg-en:
src_tgt: bg-en
enc_sharing_group: [bg]
dec_sharing_group: [en]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: cs-en
enc_sharing_group: [cs]
dec_sharing_group: [en]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: en-cs
enc_sharing_group: [en]
dec_sharing_group: [cs]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [6]
dec_layers: [6]
-Arbitrarily shared layers in encoders and task-specific decoders
tasks:
+Arbitrarily shared layers in encoders and task-specific decoders
The training and vocab config is the same as in the previous example.
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ bg: vocab/opusTC.mul.vocab.onmt
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ cs: vocab/opusTC.mul.vocab.onmt
+ en: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
+tasks:
train_bg-en:
src_tgt: bg-en
enc_sharing_group: [bg, all]
dec_sharing_group: [en]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: cs-en
enc_sharing_group: [cs, all]
dec_sharing_group: [en]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: en-cs
enc_sharing_group: [en, all]
dec_sharing_group: [cs]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [4, 4]
dec_layers: [4]
-Non-modular multilingual system
tasks:
+Non-modular multilingual system
In this example, we share the input/output vocabulary over all languages. Hence, we define a vocabulary for an all
language, that we use in the definition of the model.
+# TRAINING CONFIG
+world_size: 2
+gpu_ranks: [0, 1]
+
+batch_type: tokens
+batch_size: 4096
+
+# INPUT/OUTPUT VOCABULARY CONFIG
+
+src_vocab:
+ all: vocab/opusTC.mul.vocab.onmt
+tgt_vocab:
+ all: vocab/opusTC.mul.vocab.onmt
+
+# MODEL CONFIG
+
+model_dim: 512
+
+tasks:
train_bg-en:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:0"
- path_src: /path/to/train.bg-en.bg
- path_tgt: /path/to/train.bg-en.en
+ path_src: europarl_data/encoded/train.bg-en.bg.sp
+ path_tgt: europarl_data/encoded/train.bg-en.en.sp
train_cs-en:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.cs
- path_tgt: /path/to/train.cs-en.en
+ path_src: europarl_data/encoded/train.cs-en.cs.sp
+ path_tgt: europarl_data/encoded/train.cs-en.en.sp
train_en-cs:
src_tgt: all-all
- enc_sharing_group: [all]
- dec_sharing_group: [all]
+ enc_sharing_group: [shared_enc]
+ dec_sharing_group: [shared_dec]
node_gpu: "0:1"
- path_src: /path/to/train.cs-en.en
- path_tgt: /path/to/train.cs-en.cs
+ path_src: europarl_data/encoded/train.cs-en.en.sp
+ path_tgt: europarl_data/encoded/train.cs-en.cs.sp
enc_layers: [6]
dec_layers: [6]
-We recommend our automatic configuration generation tool for generating your configurations.
+To proceed, copy-paste one of these configurations into a new file named my_config.yaml
.
+For further information, check out the documentation of all parameters in train.py.
+For more complex scenarios, we recommend our automatic configuration generation tool for generating your configurations.
Step 3: Start training¶
-Now that you’ve prepared your data and configured the settings, it’s time to initiate the training of your multilingual machine translation model using Mammoth. Follow these steps to launch the training script, for example, through the Slurm manager:
-python -u "$@" --node_rank $SLURM_NODEID -u ${PATH_TO_MAMMOTH}/train.py \
- -config ${CONFIG_DIR}/your_config.yml \
- -save_model ${SAVE_DIR}/models/${EXP_ID} \
- -master_port 9974 -master_ip $SLURMD_NODENAME \
- -tensorboard -tensorboard_log_dir ${LOG_DIR}/${EXP_ID}
+You can start your training on a single machine, by simply running a python script train.py
, possibly with a definition of your desired GPUs.
+Note that the example config above assumes two GPUs available on one machine.
+CUDA_VISIBLE_DEVICES=0,1 python3 train.py -config my_config.yaml -save_model output_dir -tensorboard -tensorboard_log_dir log_dir
-Explanation of Command:
-
-python -u "$@"
: Initiates the training script using Python.
---node_rank $SLURM_NODEID
: Specifies the node rank using the environment variable provided by Slurm.
--u ${PATH_TO_MAMMOTH}/train.py
: Specifies the path to the Mammoth training script.
--config ${CONFIG_DIR}/your_config.yml
: Specifies the path to your configuration file.
--save_model ${SAVE_DIR}/models/${EXP_ID}
: Defines the directory to save the trained models, incorporating an experiment identifier (${EXP_ID}
).
--master_port 9974 -master_ip $SLURMD_NODENAME
: Sets the master port and IP for communication.
--tensorboard -tensorboard_log_dir ${LOG_DIR}/${EXP_ID}
: Enables TensorBoard logging, specifying the directory for TensorBoard logs.
-
-Your training process has been initiated through the Slurm manager, leveraging the specified configuration settings. Monitor the progress through the provided logging and visualization tools. Adjust parameters as needed for your specific training requirements. You can also run the command on other workstations by modifying the parameters accordingly.
-
+Note that when running train.py
, you can use all the parameters from train.py as cmd arguments. In the case of duplicate arguments, the cmd parameters override the ones found in your config.yaml.
-Step 4: Translate¶
+Step 4: Translate¶
Now that you have successfully trained your multilingual machine translation model using Mammoth, it’s time to put it to use for translation.
python3 -u $MAMMOTH/translate.py \
- --config "${CONFIG_DIR}/your_config.yml" \
+ --config "my_config.yml" \
--model "$model_checkpoint" \
--task_id "train_$src_lang-$tgt_lang" \
--src "$path_to_src_language/$lang_pair.$src_lang.sp" \
@@ -339,7 +394,7 @@ Step 4: Translate
-Configuration File: --config "${CONFIG_DIR}/your_config.yml"
+Configuration File: --config "my_config.yml"
Model Checkpoint: --model "$model_checkpoint"
Translation Task: --task_id "train_$src_lang-$tgt_lang"
@@ -357,9 +412,10 @@ Step 4: Translate
-Further reading¶
+Further reading¶
A complete example of training on the Europarl dataset is available at MAMMOTH101, and a complete example for configuring different sharing schemes is available at MAMMOTH sharing schemes.
+
diff --git a/searchindex.js b/searchindex.js
index 4e0febab..46dbfcf7 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/sharing_schemes","examples/train_mammoth_101","index","install","main","mammoth","mammoth.inputters","mammoth.modules","mammoth.translate.translation_server","mammoth.translation","modular_model","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/sharing_schemes.md","examples/train_mammoth_101.md","index.rst","install.md","main.md","mammoth.rst","mammoth.inputters.rst","mammoth.modules.rst","mammoth.translate.translation_server.rst","mammoth.translation.rst","modular_model.md","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"mammoth.Trainer":{train:[9,1,1,""],validate:[9,1,1,""]},"mammoth.inputters.dataloader":{DynamicDatasetIter:[10,0,1,""],InferenceBatcher:[10,0,1,""],LookAheadBucketing:[10,0,1,""]},"mammoth.inputters.dataloader.DynamicDatasetIter":{from_opts:[10,1,1,""]},"mammoth.inputters.dataloader.LookAheadBucketing":{bucket_is_empty:[10,1,1,""],is_empty:[10,1,1,""],maybe_replenish:[10,1,1,""]},"mammoth.inputters.dataset":{ParallelCorpus:[10,0,1,""]},"mammoth.inputters.vocab":{Vocab:[10,0,1,""]},"mammoth.inputters.vocab.Vocab":{merge:[10,1,1,""]},"mammoth.models":{NMTModel:[9,0,1,""]},"mammoth.models.NMTModel":{count_parameters:[9,1,1,""],forward:[9,1,1,""]},"mammoth.modules":{Embeddings:[11,0,1,""]},"mammoth.modules.Embeddings":{emb_luts:[11,1,1,""],forward:[11,1,1,""],load_pretrained_vectors:[11,1,1,""],word_lut:[11,1,1,""]},"mammoth.modules.attention_bridge":{AttentionBridge:[11,0,1,""]},"mammoth.modules.attention_bridge.AttentionBridge":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.modules.average_attn":{AverageAttention:[11,0,1,""]},"mammoth.modules.average_attn.AverageAttention":{cumulative_average:[11,1,1,""],cumulative_average_mask:[11,1,1,""],forward:[11,1,1,""]},"mammoth.modules.decoder":{DecoderBase:[11,0,1,""]},"mammoth.modules.decoder.DecoderBase":{from_opts:[11,1,1,""]},"mammoth.modules.decoder_ensemble":{EnsembleModel:[11,0,1,""]},"mammoth.modules.encoder":{EncoderBase:[11,0,1,""]},"mammoth.modules.encoder.EncoderBase":{forward:[11,1,1,""]},"mammoth.modules.layer_stack_decoder":{LayerStackDecoder:[11,0,1,""]},"mammoth.modules.layer_stack_decoder.LayerStackDecoder":{add_adapter:[11,1,1,""],forward:[11,1,1,""],from_opts:[11,1,1,""],from_trans_opt:[11,1,1,""],get_submodule:[11,1,1,""],init_state:[11,1,1,""]},"mammoth.modules.layer_stack_encoder":{LayerStackEncoder:[11,0,1,""]},"mammoth.modules.layer_stack_encoder.LayerStackEncoder":{add_adapter:[11,1,1,""],forward:[11,1,1,""],from_opts:[11,1,1,""],from_trans_opt:[11,1,1,""],get_submodule:[11,1,1,""]},"mammoth.modules.mean_encoder":{MeanEncoder:[11,0,1,""]},"mammoth.modules.mean_encoder.MeanEncoder":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.modules.multi_headed_attn":{MultiHeadedAttention:[11,0,1,""]},"mammoth.modules.multi_headed_attn.MultiHeadedAttention":{forward:[11,1,1,""]},"mammoth.modules.position_ffn":{PositionwiseFeedForward:[11,0,1,""]},"mammoth.modules.position_ffn.PositionwiseFeedForward":{forward:[11,1,1,""]},"mammoth.modules.transformer_decoder":{TransformerDecoder:[11,0,1,""]},"mammoth.modules.transformer_decoder.TransformerDecoder":{forward:[11,1,1,""]},"mammoth.modules.transformer_encoder":{TransformerEncoder:[11,0,1,""]},"mammoth.modules.transformer_encoder.TransformerEncoder":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.translate":{BeamSearch:[13,0,1,""],DecodeStrategy:[13,0,1,""],GNMTGlobalScorer:[13,0,1,""],GreedySearch:[13,0,1,""],Translation:[13,0,1,""],TranslationBuilder:[13,0,1,""],Translator:[13,0,1,""]},"mammoth.translate.BeamSearch":{initialize:[13,1,1,""]},"mammoth.translate.DecodeStrategy":{advance:[13,1,1,""],block_ngram_repeats:[13,1,1,""],initialize:[13,1,1,""],maybe_update_forbidden_tokens:[13,1,1,""],maybe_update_target_prefix:[13,1,1,""],target_prefixing:[13,1,1,""],update_finished:[13,1,1,""]},"mammoth.translate.GreedySearch":{advance:[13,1,1,""],initialize:[13,1,1,""],update_finished:[13,1,1,""]},"mammoth.translate.Translation":{log:[13,1,1,""]},"mammoth.translate.Translator":{translate_batch:[13,1,1,""]},"mammoth.translate.greedy_search":{sample_with_temperature:[13,2,1,""]},"mammoth.translate.penalties":{PenaltyBuilder:[13,0,1,""]},"mammoth.translate.penalties.PenaltyBuilder":{coverage_none:[13,1,1,""],coverage_summary:[13,1,1,""],coverage_wu:[13,1,1,""],length_average:[13,1,1,""],length_none:[13,1,1,""],length_wu:[13,1,1,""]},"mammoth.translate.translation_server":{ServerModel:[12,0,1,""],ServerModelError:[12,3,1,""],Timer:[12,0,1,""],TranslationServer:[12,0,1,""]},"mammoth.translate.translation_server.ServerModel":{build_tokenizer:[12,1,1,""],detokenize:[12,1,1,""],do_timeout:[12,1,1,""],maybe_convert_align:[12,1,1,""],maybe_detokenize:[12,1,1,""],maybe_detokenize_with_align:[12,1,1,""],maybe_postprocess:[12,1,1,""],maybe_preprocess:[12,1,1,""],maybe_tokenize:[12,1,1,""],parse_opt:[12,1,1,""],postprocess:[12,1,1,""],preprocess:[12,1,1,""],rebuild_seg_packages:[12,1,1,""],to_gpu:[12,1,1,""],tokenize:[12,1,1,""],tokenizer_marker:[12,1,1,""]},"mammoth.translate.translation_server.TranslationServer":{clone_model:[12,1,1,""],list_models:[12,1,1,""],load_model:[12,1,1,""],preload_model:[12,1,1,""],run:[12,1,1,""],start:[12,1,1,""],unload_model:[12,1,1,""]},"mammoth.utils":{Optimizer:[9,0,1,""],Statistics:[9,0,1,""]},"mammoth.utils.Optimizer":{amp:[9,1,1,""],backward:[9,1,1,""],from_opts:[9,1,1,""],learning_rate:[9,1,1,""],step:[9,1,1,""],training_step:[9,1,1,""],zero_grad:[9,1,1,""]},"mammoth.utils.Statistics":{accuracy:[9,1,1,""],all_gather_stats:[9,1,1,""],all_gather_stats_list:[9,1,1,""],elapsed_time:[9,1,1,""],log_tensorboard:[9,1,1,""],output:[9,1,1,""],ppl:[9,1,1,""],update:[9,1,1,""],xent:[9,1,1,""]},"mammoth.utils.loss":{LossComputeBase:[9,0,1,""]},mammoth:{Trainer:[9,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"],"3":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function","3":"py:exception"},terms:{"1node":5,"24th":8,"25g":7,"4gpu":5,"64k":[5,19],"abstract":11,"boolean":[9,13],"break":[14,19],"byte":17,"case":[4,6],"class":[0,6,9,10,11,12],"default":[12,15,16,17,18,19],"export":[4,5,7],"final":[2,5,11,13],"float":[3,11,13],"function":[0,2,3,4,6,9,11,12,13,17],"import":[0,19],"int":[9,10,11,12,13],"long":0,"new":[0,2],"portoro\u017e":19,"public":7,"return":[0,9,11,12,13],"short":14,"static":[9,17],"t\u00e4ttar":14,"throw":11,"true":[3,5,9,10,11,13,17,18,19],"try":[0,7,10],"while":[3,6,11,14],And:0,Are:6,But:7,EOS:13,For:[0,3,4,5,7,11,13,14,17,19],IDs:13,IFS:19,Its:1,LPs:3,Not:0,One:3,The:[2,4,5,7,9,11,12,13,14,17,19],Then:[0,5,19],There:[2,3],These:[2,3,4,13],Use:[3,17],Used:13,Will:3,__init__:12,_compute_loss:9,a3s:[4,5,19,20],aan:[11,17],aan_useffn:[11,17],ab_fixed_length:17,ab_lay:17,ab_layer_norm:17,abbrevi:0,abil:11,about:[0,6],abov:[0,11,13],abs:[2,17,18,21],acceler:[11,21],accept:[0,3,5,13,19],access:[2,3,7],accord:[3,4],accordingli:20,account:[3,7],accross:9,accum:[9,10],accum_count:[5,9,17],accum_step:[9,17],accumul:[9,17],accuraci:[5,9,13],achiev:[3,11,14],achin:21,acl:[8,21],aclantholog:[8,17],aclweb:17,across:[4,5,14],action:[11,13,17],activ:[1,2,7,11,17],activation_fn:11,activationfunct:11,actual:13,adadelta:17,adafactor:[5,17],adagrad:17,adagrad_accumulator_init:17,adam:17,adam_beta1:17,adam_beta2:17,adamoptim:17,adamw:17,adapt:[1,4,6,8,10,11,14],adapter_group:11,adapter_nam:3,add:[0,10,11],add_adapt:11,add_argu:19,added:3,adding:[0,1],addit:[0,4,11,15,17,18],addition:[4,11],address:[8,13],adjust:[3,4,20],adopt:[1,17],advanc:[13,17],advic:0,aforement:4,after:[0,2,4,13,17,18],afterward:11,again:0,against:11,aidan:21,alessandro:8,alexand:8,algorithm:[6,21],align:[6,9,11,12,13,18],align_debug:18,alignment_head:[11,17],alignment_lay:[11,17],aliv:13,alive_attn:13,alive_seq:13,all:[0,3,4,9,10,11,13,14,15,17,18,20,21],all_gather_stat:9,all_gather_stats_list:9,all_preprocess:12,allennlp:0,alloc:[3,5,14],allow:[0,1,2,3,4,14,17],almost:[13,17],alon:0,alpha:[13,18],alphabet:3,alreadi:[5,15,17,18],also:[0,3,4,5,7,9,11,14,17,20],altern:[4,11],although:11,alwai:[0,3,11],amd:7,among:[4,14],amp:[9,17],anatomi:6,ancestor:8,ancestri:14,ani:[0,3,6,13,15,17,18],anoth:[0,2,9],antholog:17,apex:17,apex_opt_level:17,api:[0,6],api_doc:17,appear:3,append:[7,19],appl:14,appli:[2,3,11,13,15,17,18],applic:18,approach:[6,14],appropri:[4,13],approxim:17,arab:[4,19],arbitrarili:20,architectur:[1,2,14],arg:[0,11,12],argmax:18,argpars:[12,19],argument:[0,5,6,19],argumentpars:19,arn:8,arxiv:[0,2,17,18,21],ashish:21,ask:6,aspect:14,assign:[3,5,18],assum:[11,13],att_typ:2,attend:2,attent:[0,6,9,13,14,18,21],attention_bridg:[9,11],attention_dropout:[11,17],attentionbridg:11,attentionbridgenorm:2,attn:[11,13,18],attn_debug:[13,18],attn_typ:11,attr:12,attribut:[13,14],attributeerror:11,augment:21,author:8,autodoc:0,autogener:17,automat:[1,5,20],avail:[4,9,12,14,17,18,20],available_model:16,averag:[11,17,18,21],average_attn:11,average_decai:[9,17],average_everi:[9,17],average_output:11,averageattent:11,avg:[11,18],avg_raw_prob:18,avoid:[0,3],aws:7,axi:13,back:9,backend:17,backward:9,bahdanau:17,balanc:14,ban_unk_token:[13,18],bank:11,barri:21,bart:[5,15,17,18],base:[0,1,2,3,4,5,7,8,9,10,11,12,13,14,15,17,18,20],baselin:17,basemodel:9,basenam:19,bash:7,batch:[2,5,9,10,11,13,17,18,20],batch_siz:[4,5,10,11,13,17,18,20],batch_size_multipl:[10,17],batch_typ:[5,10,17,18],beam:[6,13],beam_search:13,beam_siz:[13,18],beamsearch:13,beamsearchbas:13,becaus:[3,18],becom:3,been:[13,15,17,18,20],befor:[0,4,12,13,17,18],begin:[9,13],behavior:14,below:[0,4,14,20],ben:3,bengali:3,best:[13,18],beta1:17,beta2:17,beta:[13,18],better:[0,4,7,15,17,18],between:[2,4,6,11,14,15,17,18,21],beyond:9,bia:11,biao:21,bib:0,bibtex:0,bibtext:0,bidir_edg:17,bidirect:17,bin:[7,17],binari:11,bit:18,bitext:14,blank:0,block:[13,18],block_ngram_repeat:[13,18],boggia:8,booktitl:8,bool:[9,10,11,12,13],bos:13,both:[3,4,13,14,17,18],both_embed:[17,18],bound:11,boundari:[15,17,18],bpe:[15,17,18],bptt:[9,17],bridg:[6,21],bridge_extra_nod:17,broad:14,broadcast:14,browser:0,bucket:10,bucket_fn:10,bucket_is_empti:10,buffer:9,build:[0,1,6,9,11,12,13,14,18,19],build_token:12,build_vocab:15,built:[0,9],bulgarian:5,bytetensor:13,cach:11,calcul:[2,9,13],call:[5,11,13],callabl:13,callback:9,can:[2,3,4,5,6,7,9,12,13,14,15,17,18,19,20],cancel:12,candid:[3,15,17,18],cao:21,capabl:[6,14],capit:0,captur:2,care:11,cat:19,catalan:14,categor:13,categori:13,celebr:5,central:14,challeng:[5,14,19],chang:[0,3,9,17],channel:2,charact:0,character:14,character_coverag:19,characterist:14,check:[0,5,8,10,11,20],checklist:0,checkpoint:[4,9,17,20],chen:21,chines:[4,19],chmod:7,choic:[0,10,11,15,17,18],choos:[0,1,4,5,15,17,18],chosen:13,chronopoul:14,citat:[0,6],cite:[0,5,8,19],classmethod:[9,10,11],clean:[5,19],clear:0,clip:5,clone:[8,12,20],clone_model:12,close:0,cls:9,cluster:[3,7,8,14,20],clutter:0,code:[0,3,7],code_dir:7,codebas:[5,7],coder:14,cohes:14,collect:10,column:3,com:[8,20],combin:[14,18],comma:3,command:[6,20],commenc:4,comment:0,common:[0,6,14],commun:[0,1,5,14,20],compar:8,complet:[4,5,13,20],complex:[3,13],compon:[2,3,6,18],composit:17,comput:[3,4,8,9,11,14,17,18],concat:[11,17],conclus:14,condit:[13,17,18],conf:16,confer:8,config:[4,5,6,12,15,16,17,18,20],config_dir:[5,20],config_fil:12,configur:[1,3,4,6,14],congratul:20,connect:2,consid:[1,3,14,19],consider:17,consist:[0,4,19],constant:3,constructor:[0,11],consum:17,contain:[3,4,5,7,11,12,13,14,19],content:[0,18],context:[2,11,17],context_attn:11,continu:0,contribut:[0,2,6],contributor:6,control:[3,9],conv2d:11,conv:11,conveni:[3,4],convent:0,convers:13,convert:[10,12],coordin:14,copi:[0,3,7,11,17,18],copy_attn:[11,13,17],copy_attn_forc:17,copy_attn_typ:17,copy_loss_by_seqlength:17,core:[2,6,9],corpora:[10,14],corpora_info:10,corpu:[3,5,10,14,15,17,18,19],corr:[0,21],correct:[3,4],correspand:12,correspond:[4,10,14,18],could:[13,14],count:[3,9,10,13,15,17,18],count_paramet:9,cov:13,cov_pen:13,coverag:[13,17,18],coverage_attn:17,coverage_non:13,coverage_penalti:[13,18],coverage_summari:13,coverage_wu:13,cpu:[10,12,17,18],crai:7,crayon:17,creat:[3,5,7,9],creation:3,criteria:[14,17],criterion:[5,9],critic:[17,18],cross:[9,11,17],csc:[14,19],csv:3,ct2_model:12,ct2_translate_batch_arg:12,ct2_translator_arg:12,ctrl:0,cuda_visible_devic:5,cumbersom:3,cumul:[11,13,18],cumulative_averag:11,cumulative_average_mask:11,cur_dir:19,cur_len:13,current:[3,9,10,11,13,14,17],curricula:3,curriculum:3,custom:[4,6,12,17],custom_opt:12,customiz:4,cut:[0,19],cutoff:13,d_ff:11,d_model:11,dai:21,data:[1,2,3,4,6,9,13,14,21],data_path:19,data_typ:[9,10,13,17,18],databas:14,dataload:10,datapoint:14,dataset:[6,15,17,18,19,20],dataset_adapt:10,datasetadapt:10,datastructur:12,dblp:0,ddress:21,deal:[3,14],debug:[16,17,18],dec:[3,14],dec_lay:[5,17,20],dec_sharing_group:[4,5,20],decai:17,decay_method:[5,17],decay_step:17,decod:[2,3,6,9,14,20],decode_strategi:13,decoder_ensembl:11,decoder_typ:[5,17],decoderbas:[9,11],decodestrategi:13,deep:14,def:0,defin:[3,5,11,14,15,17,18,20],definit:[11,14],degre:11,delai:3,delet:[15,17,18],delimit:18,delv:4,deng:8,denois:[3,5,6],denoising_object:[15,17,18],denot:[2,4],depend:[0,3,7,9,12],deprec:[17,18],depth:14,desc:19,describ:[2,11,12,17,20],descript:0,design:[4,14],desir:[3,4],detail:[1,4,5,7,8,15,17,18,20],determin:[3,14,18],detoken:12,dev:[7,19],develop:[0,6],devic:[3,4,10,11,13,14,18],device_context:9,deyi:21,diagon:3,diagram:11,dict:[3,9,10,12,13,15,17,18],dict_kei:17,dictionari:[9,11,13,17],differ:[0,1,2,3,4,11,12,14,18,20],dim:11,dimens:[5,11,13,17],dimension:2,dir:19,direct:[0,3,8,13,14],directli:[0,5,18,19],directori:[3,4,7,12,17,20],disabl:17,discard:[15,17,18],discourag:17,disk:[17,18],displai:9,dist:9,distanc:[11,17],distinct:[4,14],distribut:[3,5,9,11,13,14,15,17,18],dive:4,divers:[15,17,18],divid:[2,3,17,18],divis:11,dmon:5,do_timeout:12,doc:0,document:[0,4,6,8,19],doe:[3,6,18],doesn:19,doi:8,doing:[3,18],domain:6,don:0,done:[5,13,14,19],dot:[2,11,17],dotprod:17,dowmunt:19,down:[13,14,15],download:[4,6,7],dozen:8,drop:19,dropout:[5,9,11,15,17,18],dropout_step:[9,17],due:17,dummi:[5,11],dump:[15,17,18],dump_beam:[13,18],dump_sampl:15,dump_transform:[17,18],dure:[11,12,14,17,18],dynam:[6,10,11,18],dynamicdatasetit:10,each:[3,4,5,11,13,14,15,17,18,19],earli:[5,17],earlier:[2,15,17,18],early_stop:[5,17],early_stopping_criteria:[5,17],earlystopp:9,eas:[3,14],easi:0,easili:3,echo:[5,19],ecosystem:1,edg:17,effect:[2,3,12,15],effici:[6,9,14,21],eight:[],either:[13,17],elaps:9,elapsed_tim:9,element:[2,3],els:19,emb_fil:11,emb_lut:11,embed:[2,6,14,15],embedding_s:11,embeddingless:17,embeddings_typ:[17,18],emerg:2,emploi:[2,4,9],empti:[5,10,11,13,15,17,18,19],en_side_in:19,en_side_out:19,en_stream:19,enabl:[1,5,11,14,17,18,20],enable_embeddingless:[11,17],enc:[3,14],enc_hidden:11,enc_lay:[5,17,20],enc_output:11,enc_sharing_group:[4,5,20],encod:[2,3,6,9,13,14,19,20],encoder_typ:[5,17],encoderbas:[9,11],encordec:[15,17,18],encount:[15,17,18],encout:[10,15,17,18],end:13,eng:3,english:[3,4,5,19],enhanc:[2,6,14],ensembl:18,ensemblemodel:11,ensur:[2,4,14],entir:[4,19],entri:0,entropi:9,env_dir:7,environ:[4,7,20],eos:13,epoch:17,epsilon:17,equal:[13,17],equat:11,equival:17,error:[0,11,15,17,18],especi:3,essenti:13,establish:2,etal:8,etc:5,eural:21,europarl:[5,6,20],european:[5,19],evalu:[9,19],even:3,event:13,everi:[9,11,17,18],everyth:14,exactli:0,exampl:[0,3,4,5,7,10,11,14,15,17,18,20],exce:17,except:[0,12,15,17,18],exclusion_token:13,execut:[4,14,15,17,18],exist:[5,11,15,17,18,19],exp:17,exp_host:17,exp_id:[5,20],expand:[5,19],expans:14,expect:[3,13],experi:[4,5,15,17,18,20],experiment:17,expert:1,explan:20,explicitli:14,explor:4,exponenti:17,extend:0,extern:0,extra:[7,11,17],extract:[5,19],facilit:[2,14],fail:13,fairseq:0,fals:[5,9,10,11,12,13,15,16,17,18,19],familiar:8,faro:8,faster:17,favorit:5,feat_0:18,feat_1:18,feat_dim_expon:11,feat_merg:[11,17],feat_merge_s:17,feat_padding_idx:11,feat_vec_expon:[11,17],feat_vec_s:[11,17],feat_vocab_s:11,feats0:18,feats1:18,featur:[2,6,9,11,14,15,18,21],fed:2,feed:[3,11,14,17],feedforward:[2,17],feedforwardattentionbridgelay:6,feel:[0,4],few:0,ffn:[11,17],field:10,fifth:5,figur:[11,14],file:[0,3,4,5,10,12,15,17,18,19,20],filenam:17,filter:[5,6,19],filterfeat:[15,17,18],filternonzeronumer:[15,17,18],filterrepetit:[15,17,18],filterterminalpunct:[15,17,18],filtertoolong:[3,5,15,17,18],filterwordratio:[15,17,18],find:[0,5],finer:14,firefox:0,first:[0,3,5,7,11,13,17],five:2,fix:[0,13,17],flag:[3,4,9],flake8:0,flexibl:[1,4],floattensor:[9,11,13],flow:[2,14],fnn:11,focu:[0,2],focus:14,folder:0,follow:[0,2,3,4,18,20],foo:0,forbidden:13,forbidden_token:13,forc:[13,18],format:[0,12,15,17,18,19],former:11,forward:[3,9,11,14,17],fotranmt:8,found:19,foundat:[2,4],four:[],fp16:[17,18],fp32:[9,17,18],frac:2,fraction:[15,17,18],framework:[4,6,17],free:[0,4,5,12,19],freez:[11,17],freeze_word_vec:11,freeze_word_vecs_dec:17,freeze_word_vecs_enc:17,french:[4,19],frequenc:[15,17,18],frequent:6,friendli:1,from:[1,2,3,5,7,9,10,11,13,14,17,18,19],from_opt:[9,10,11],from_trans_opt:11,from_xxx:3,frozenset:13,full:[0,3,11,12,14,15,17,18,19],full_context_align:[11,17],fulli:[1,3,11,14],further:[5,6,15,17,18,19],fusedadam:17,futur:[6,7],gao:21,gap:21,garg:17,gather:9,gating_output:11,gelu:17,gener:[0,1,2,3,4,5,6,9,10,13,18,19,20],generator_funct:17,ger:14,get:[6,7],get_submodul:11,git:[8,20],github:[8,17,20],give:[3,17,18],given:[2,3,11,12],global_attent:17,global_attention_funct:17,global_scor:13,glove:[17,18],gnmt:13,gnmtglobalscor:13,going:13,gold:13,gold_scor:13,gold_sent:13,gomez:21,gone:17,good:[0,17],googl:[0,13,18,21],gpu:[3,4,5,7,12,13,14,17,18,20],gpu_backend:17,gpu_load:5,gpu_rank:[5,17],gpu_verbose_level:[9,17],gpuid:17,grad:9,gradient:[5,9,14,17],graham:21,grain:14,gram:13,graph:17,gre:7,greater:13,greedy_search:13,greedysearch:13,group:[4,5,11,14,17,18],groupwis:[3,14],grow:13,gtx1080:18,guid:[4,5,8,11,20],guidelin:6,guillaum:8,hack:14,had:18,haddow:21,hand:[3,14],handl:[0,9,14],happen:13,hardwar:14,has:[2,3,4,6,11,13,14,15,17,18,20],has_cov_pen:13,has_len_pen:13,has_tgt:13,have:[0,3,11,13,14,17,18,20],head:[2,5,11,17],head_count:11,help:[0,2,18],helsinki:[8,20],here:[4,5,7,13,19],hidden:[9,11,17],hidden_ab_s:17,hidden_dim:2,hieu:21,high:[3,5,19],higher:[13,17,18],highest:18,hold:13,hook:11,hoorai:5,host:7,hour:5,how:[0,6,11],howev:[0,9],html:[0,17],http:[2,4,5,7,8,17,18,19,20,21],huge:17,human:[3,21],hyp:[4,20],hyper:1,hyphen:3,identifi:[5,18,20],idl:3,ids:3,ignor:[5,11,15,17,18],ignore_when_block:[13,18],iii:14,illia:21,illustr:14,ilya:21,imag:9,impact:17,implement:[2,9,14,17],improv:[1,5,11,13,17,19,21],in_config:3,in_featur:11,includ:[0,1,3,4,5,11,14,15,17,18],incompat:[15,17,18],incorpor:[17,20],increas:[3,14],independ:14,index:[7,11,17],indic:[4,5,9,11,13,15,17,18],individu:[3,11],inf:13,infer:[10,13,14],inferencebatch:10,inferfeat:6,info:[5,10,17,18],inform:[2,3,5,14,17,18],infrastructur:14,ingredi:13,init:17,init_st:[9,11],initi:[4,5,6,9,11,12,13,20],initial_accumulator_valu:17,initil:10,inner:11,innov:6,inp:13,inp_seq_len:13,inproceed:8,input:[2,6,9,10,11,12,13,15,17,18,19,21],input_len:11,input_sentence_s:19,inputs_len:11,inputt:[10,13],insert:[15,17,18],insert_ratio:[15,17,18],insid:5,instal:[0,5,6],instanc:[9,11,13],instanti:9,instead:[0,3,7,11,15,17,18],instruct:[7,17],int8:18,integ:13,integr:0,interact:7,interfac:[9,11],intermedi:2,intermediate_output:2,intern:12,interv:17,introduc:[2,3,6],introduct:3,invalid:[11,15,17,18],involv:[2,14],is_empti:10,is_finish:13,is_normform:11,is_on_top:11,is_train:10,island:8,isn:13,issu:6,item:[10,11],iter:[9,10],iterabledataset:10,its:[0,3,6,14],itself:[3,11],jakob:21,jean:8,jinsong:21,job:[4,7],johnson:14,joiner:[15,17,18],jone:21,journal:0,json:16,junczi:19,kaiser:21,keep:[12,13,17],keep_checkpoint:[5,17],keep_stat:17,keep_topk:13,keep_topp:13,kei:[6,11,14],kera:17,kernel_s:11,key_len:11,kim:8,klau:21,klein:8,koehn:5,krikun:21,kwarg:11,label:[5,17],label_smooth:[5,17],lakew:14,lambda:[15,17,18],lambda_align:17,lambda_coverag:17,lang:[3,5,19],lang_a:3,lang_b:3,lang_pair:[3,20],languag:[2,4,5,14,15,17,18,19,20],language_pair:19,larg:14,last:[3,4,17,18,19],latter:11,launch:20,layer:[2,5,11,14,17,18,20],layer_cach:11,layer_norm_modul:11,layer_stack_decod:11,layer_stack_encod:11,layer_stack_index:11,layer_type_to_cl:2,layernorm:17,layerstack:3,layerstackdecod:11,layerstackencod:11,layerwis:14,lead:13,learn:[2,5,9,14,17],learning_r:[5,9,17],learning_rate_decai:17,learning_rate_decay_fn:9,least:0,leav:[3,17,19],len:[9,11,13],length:[3,9,11,13,15,17,18,19],length_averag:13,length_non:13,length_pen:13,length_penalti:[13,18],length_wu:13,less:3,let:[3,4,11,14],level:[5,6,10,15,17,18,19],leverag:[6,14,20],lib:7,librari:[8,17],lightweight:14,like:[0,4,11,13,14,18],limit:18,lin:[2,14,17],linattentionbridgelay:6,line:[0,5,10,15,17,18,19],linear:[2,11],linear_warmup:17,linguist:[8,11,14,21],link:[0,2,7],list:[0,3,9,11,12,13,15,17,18,20],list_model:12,literatur:17,llion:21,load:[6,7,9,11,12,14,17,18],load_model:12,load_pretrained_vector:11,loader:6,local:[0,3],localhost:17,locat:5,log:[5,6,9,13,20],log_dir:[5,20],log_fil:[17,18],log_file_level:[17,18],log_prob:13,log_tensorboard:9,logger:13,login:7,logit:[13,18],logsumexp:13,longer:18,longest:13,longitudin:14,longtensor:[9,11,13],look:[0,8,10,11,18],look_ahead_s:10,lookaheadbucket:10,loop:9,loppi:8,loss:[6,17],loss_scal:17,losscomputebas:9,love:[0,1],low:14,lower:[3,17,18],lrec:19,lsl:[13,21],lstm:17,lua:12,lukasz:21,lumi:6,luong:[17,21],lust:7,lustrep1:7,lustrep2:7,macherei:21,machin:[1,5,8,11,13,14,17,18,20,21],made:[3,5,6],magic:13,mahti:6,mai:[3,5,8,9,10,12,13,15,17,19],main:[0,5,8,9,15,17,18,19],maintain:[4,13],make:[0,4,7,9,10,14,15,17,18],make_shard_st:9,mammoth101:[5,19,20],mammoth:[0,3,6,7,8,9,10,11,12,13,14,17,19],mammoth_transform:[15,17,18],manag:[4,5,9,14,20],mani:[9,13,17],manipul:9,manual:[4,5,12,13,19],map:[3,9,19],marian:17,mark:17,marker:12,mask:[11,15,17,18],mask_length:[15,17,18],mask_or_step:11,mask_ratio:[5,15,17,18],mass:[15,17,18],massiv:[1,3,8],master:[5,17,20],master_ip:[4,17,20],master_port:[4,5,17,20],match:12,mathbb:2,mathbf:2,matric:2,matrix:[2,11,17],max:[9,11,13,19],max_generator_batch:[5,17],max_grad_norm:[5,9,17],max_length:[13,18],max_relative_posit:[11,17],max_sent_length:18,max_sentence_length:19,max_siz:9,maxim:21,maximum:[15,17,18],maybe_convert_align:12,maybe_detoken:12,maybe_detokenize_with_align:12,maybe_postprocess:12,maybe_preprocess:12,maybe_replenish:10,maybe_token:12,maybe_update_forbidden_token:13,maybe_update_target_prefix:13,mean:[3,4,11,12,14,17,18],mean_encod:11,meanencod:11,mechan:[1,2,3,14],mem:7,memori:[11,12,17],memory_bank:[11,13],memory_length:11,merg:[10,11,17],meta:3,metadata:9,method:[9,11,17],metric:18,mi250:7,michel:8,micku:8,mike:21,million:8,min_length:[13,18],minh:21,minim:14,minimum:[15,17,18],mirror:17,mix:9,mixer:10,mixingstrategi:10,mixtur:1,mkdir:[5,7,19],mlp:[11,17],mnmt:14,mode:[3,15,17,18],model:[1,2,3,4,6,11,13,15,20],model_checkpoint:20,model_dim:[5,11,17],model_dtyp:[9,17],model_fil:19,model_id:12,model_kwarg:12,model_prefix:19,model_root:12,model_sav:9,model_task:17,model_typ:[5,17],modelsaverbas:9,modif:[6,9],modifi:[0,4,13,20],modul:[0,2,6,7,9,14,17],modular:[1,6,8,20],module_id:11,mohammad:21,moment:5,monitor:[5,20],monolingu:3,month:8,more:[0,1,3,5,7,10,13,14,15,17,18,19,20],most:[13,14,18],mostli:9,move:[12,17],moving_averag:[9,17],much:17,mul:[5,19],multi:[0,2,5,11],multi_headed_attn:11,multiheadedattent:[2,11],multilingu:[1,3,5,8,19,20],multinod:5,multipl:[0,2,3,9,10,11,14,17,18],multipli:[2,10],multplic:0,must:[3,11,12,14,17],mymodul:7,n_batch:9,n_best:[12,13,18],n_bucket:[10,17],n_correct:9,n_edge_typ:17,n_node:[5,17],n_sampl:[15,17,18],n_seg:12,n_src_word:9,n_step:17,n_word:9,naacl:17,name:[0,3,5,6,11,13,15,17,18,19],named_modul:11,namespac:12,napoleon:0,narg:19,nation:19,nccl:17,necessari:[0,4,5,7,9,13,17,18,20],necessit:3,need:[0,3,4,5,9,11,14,17,20,21],neg:[12,17],nest:11,net_b:11,net_c:11,network:[1,11,21],neubig:21,neural:[1,8,11,13,14,21],never:13,next:[3,9,13,18],nfeat:11,ngram:[13,18],nightmar:3,niki:[8,21],nlp:[8,20],nmt:[9,13,17,18],nmtmodel:[9,11],noam:[17,21],noamwd:17,nodalida:8,node:[3,5,7,9,14,17,20],node_gpu:[5,20],node_rank:[4,5,17,20],nois:3,non:[11,13,17,20],none:[5,9,10,11,12,13,15,17,18],nonetyp:13,nonzero_threshold:[15,17,18],nordic:8,norm:[11,17],norm_method:9,normal:[2,3,5,9,11,17],normalz:9,normform:[11,17],norouzi:21,note:[0,3,7,13],noth:[0,9],notset:[17,18],now:[1,4,5,20],nroo:8,ntask:7,nucleu:18,num_lay:11,num_step:9,num_thread:15,number:[3,9,10,11,13,14,15,17,18],numel_fn:10,numer:[15,17,18],nvidia:[5,7,17],obj:[0,9],object:[0,9,10,12,13,15,17,18,19],occur:4,oder:3,off:17,offer:[4,14],offici:[4,19],offset:10,ofi:7,often:[15,17,18],on_timemout:12,on_timeout:12,onc:[13,17],one:[0,2,3,4,5,9,10,11,15,17,18,19],onli:[3,4,9,10,13,14,15,17,18],onmt:[5,19],onmt_token:[15,17,18],onmttok:6,open:[1,8,19],opennmt:[0,3,6,8,9,16],oper:14,operatornam:2,opt:[5,9,10,11,12,17,18],opt_level:17,optim:[1,5,6,14],option:[0,3,4,5,7,9,10,11,12,13,15,17,18,19,20],opu:[5,6],opus100:19,opustc:[5,19],ord:21,order:[3,17],org:[2,7,8,17,18,21],organ:[10,14],origin:[5,6,17,19],oriol:21,other:[2,4,5,7,9,13,15,17,18,19,20,21],other_lang:19,otherwis:[3,11,17,18],our:[5,7,13,20],our_stat:9,out:[3,8,9,20],out_config:3,out_featur:11,out_fil:13,out_path:[4,20],out_typ:19,output:[2,3,4,5,9,11,12,13,15,17,18,19,20],output_model:18,over:[0,3,9,13,17,18,19],overal:2,overcom:14,overhead:14,overrid:[11,13,15,17,18],overridden:11,overse:14,overview:6,overwrit:[5,7,15,17,18],own:[6,9,14],ownership:9,p17:8,p18:17,packag:[5,7,12],pad:[9,11,13],page:[5,8],pair:[3,4,9,12,14,17,19],paper:[0,2,5,17],parallel:[4,5,6,11,13,15,17,19],parallel_path:13,parallelcorpu:[10,13],param:9,param_init:[5,17],param_init_glorot:[5,17],paramet:[1,4,5,6,8,9,10,11,12,13,15,17,18,20],parenthes:0,parliament:[5,19],parmar:21,pars:[10,12],parse_arg:19,parse_opt:12,parser:19,part:[2,13],partial:14,particular:[0,3],particularli:6,partit:7,pass:[2,3,9,11,12,17],past:[0,17],path:[3,4,5,7,10,11,12,13,15,17,18,20],path_src:[5,20],path_tgt:[5,20],path_to_codebas:5,path_to_europarl:5,path_to_mammoth:20,path_to_src_languag:20,path_to_vocab:5,path_to_your_env:5,path_valid_src:5,path_valid_tgt:5,patienc:[5,9],pattern:[3,15,17,18],pdf:17,pen:13,penalti:[6,13,15,17],penaltybuild:13,peopl:7,per:[0,3,15,17,18],perceiv:[2,17],perceiverattentionbridgelay:6,percentag:[15,17,18],perfom:17,perform:[2,11,17],permut:[15,17,18],permute_sent_ratio:[15,17,18],perplex:9,pertain:14,pfs:7,pham:21,philipp:5,phrase_t:[13,18],phylogenet:14,pip3:[7,8,20],pip:[0,7],pipelin:[15,17,18],plain:10,plan:6,platform:7,pleas:[0,5,8,19],plu:17,point:[5,20],pointer:5,poisson:[15,17,18],poisson_lambda:[15,17,18],polosukhin:21,pool:[11,17],pool_siz:[10,17],port:[5,16,17,20],portal:8,pos_ffn_activation_fn:[11,17],posit:[5,11,17,19],position_encod:[5,11,17],position_ffn:11,positionalencod:11,positionwisefeedforward:[11,17],possibl:[3,9,11,12,13,15,17,18],postprocess:12,postprocess_opt:12,potenti:13,pouliquen:19,pouta:19,ppid:5,ppl:9,pre:[9,12,13],pre_word_vecs_dec:17,pre_word_vecs_enc:17,preced:3,precis:9,pred:18,pred_scor:13,pred_sent:13,predict:[9,13,18],prefer:0,prefix:[3,9,15,17,18],prefix_seq_len:13,preload:12,preload_model:12,prepar:[6,13],preprint:21,preprocess:[4,12,14,19],preprocess_opt:12,presenc:3,present:5,presum:13,pretrain:[11,17],prevent:[13,18],previou:[3,11,13],previous:2,primari:3,prime:2,print:[9,17,18,19],prior_token:[15,17,18],priori:14,prob:13,proba:18,probabl:[11,13,15,17,18],problem:[13,14],proc:[8,21],procedur:3,proceed:[5,8,19],process:[2,4,5,9,12,14,15,17,19,20],processed_data:4,processu:12,produc:[2,13,15,17,18],product:2,progress:[5,20],projappl:7,project:[0,2,5,6,7,8],project_462000125:7,propag:9,proper:12,properli:7,properti:[9,11],proport:[3,15,17,18],provid:[4,5,7,8,18,19,20],prune:6,pty:7,publish:8,puhti:6,pull_request_chk:0,punct_threshold:[15,17,18],punctuat:[0,15,17,18],purason:14,put:[13,20],pwd:19,pyonmttok:[15,17,18],python3:[3,4,7,20],python:[0,3,4,5,7,17,20],pythonpath:7,pythonuserbas:[5,7],pytorch:[0,1,7],qin:21,qualifi:11,quantiz:18,queri:11,query_len:11,question:6,queue:[15,17],queue_siz:17,quickstart:[6,8],quoc:21,quot:0,raganato:8,rais:[11,15,17,18],random:[6,15,17,19],random_ratio:[15,17,18],random_sampling_temp:[13,18],random_sampling_topk:[13,18],random_sampling_topp:[13,18],randomli:[5,13,19],rang:18,rank:[5,13,14,17,20],ranslat:21,rare:13,rate:[5,6,9],rather:0,ratio:[13,15,17,18],raw:[10,13,18],raw_prob:11,rccl:7,reach:13,read:[0,3,5,6,12,19],readabl:[0,3,17,18],readm:17,real:11,rebuild:12,rebuild_seg_packag:12,receiv:3,recent:17,recip:11,recommend:[4,7,17,20],recommonmark:0,rectifi:2,recurr:11,reduc:14,redund:3,ref:0,refer:[0,1,2,4,6,7,11,19,20],referenc:11,regardless:[3,4],regist:11,regular:[15,17,18],rel:[11,14,17],relat:[5,14,15,17,18],relationship:[2,6],releas:[5,7,19],relev:[11,13,14],relu:[2,11,17],rememb:[0,4],remov:[3,5,15,17,18,19],renorm:17,reorder:13,rep_max_len:[15,17,18],rep_min_len:[15,17,18],rep_threshold:[15,17,18],repeat:[13,15,17,18],repetit:18,replac:[4,13,15,17,18],replace_length:[5,15,17,18],replace_unk:[13,18],replic:11,report:[6,8,9,17,18],report_align:[13,18],report_everi:[5,17],report_manag:9,report_scor:13,report_stats_from_paramet:[9,17],report_tim:[13,18],reportmgrbas:9,repositori:6,repres:[5,9],represent:[2,11,14,17],reproduc:6,requir:[0,4,9,11,14,17,20],research:[5,8],reservoir:10,reset:9,reset_optim:17,resett:17,residu:[11,14],resolv:11,resourc:[3,5,14,19],respect:[2,3],respons:9,rest:16,restrict:[15,17,18],result:[11,12,17],return_attent:13,reus:17,reuse_copy_attn:17,revers:[15,17,18],reversible_token:[15,17,18],rico:21,right:0,rmsnorm:17,rnn:[9,17],roblem:21,rocm5:7,rocm:7,root:[2,3],rotat:[15,17,18],rotate_ratio:[15,17,18],roundrobin:17,row:3,rshavn:8,rsqrt:17,rst:0,run:[0,3,5,9,11,12,17,18,20],runtim:11,rush:8,russian:[4,19],s_idx:10,sacrebleu:[7,8,20],sai:[3,11,14],samantao:7,same:[0,3,11,12,14,17],sampl:[6,13,15,17,19],sample_with_temperatur:13,sampling_temp:13,save:[5,9,14,15,17,18,19,20],save_all_gpu:[5,17],save_checkpoint_step:[5,9,17],save_config:[15,17,18],save_data:[15,17,18],save_dir:[5,20],save_model:[5,17,20],saver:9,scalabl:[1,14],scale:[11,13,14,17],scenario:1,schedul:[4,9,17],scheme:[5,6,14,20],schuster:21,score:[6,12,15,17,18],scorer:13,scratch:[1,7],script:[0,4,5,6,7,19,20],script_dir:5,seamless:4,search:[0,3,6,13],second:[2,3,11,12],section:[5,7],secur:[10,15,17,18],see:[3,5,11,12,13,15,17,18],seed:[5,13,15,17,18],seemingli:17,seen:2,segment:[3,12,15,17,18],select:[11,13],select_index:13,selector:3,self:[2,11,12,13,17],self_attn_typ:[11,17],send:[0,17],senellart:8,sennrich:21,sensibl:0,sent:[9,10,17,18],sent_numb:13,sentenc:[13,15,17,18,19],sentencepiec:[3,6,7,8,15,17,18,20],sentencepieceprocessor:19,separ:[3,5,11,14,19],seper:12,seq2seq:[13,17],seq:13,seq_len:[2,13],sequenc:[1,2,3,9,11,12,13,14,15,17,18],seri:14,serial:11,serv:[2,4],server:[6,17,19],servermodel:12,servermodelerror:12,session:7,set:[1,2,3,4,5,7,9,11,12,13,14,15,17,18,20],sever:[3,5,11,13],sgd:17,sh16:[11,21],shaham:17,shallow:14,shape:[0,11,13],shard:[9,17,18],shard_siz:[4,9,18,20],share:[1,5,6,7,8,11,15,17,18,19,20],share_decoder_embed:17,share_embed:17,share_vocab:[15,17,18],shazeer:21,shortest:13,shot:3,should:[3,5,11,13,17,18],show:11,shuf:19,shuffl:[5,19],shuffle_input_sent:19,side:[3,9,12,15,17,18],side_a:3,side_b:3,sign:[15,17,18],signifi:4,silent:[5,11,15,17,18],similar:[2,3,11,17],simpl:[2,9,11,17],simpleattentionbridgelay:6,simpli:11,simulatan:11,sin:17,sinc:11,singl:[0,5,12,14,17],single_pass:17,singular:7,site:7,six:[4,19],size:[3,5,9,10,11,13,15,17,18,19,20],skip:[3,5,15,17,18,19],skip_embed:11,skip_empty_level:[5,10,15,17,18],slovenia:19,slow:[15,18],slurm:[3,4,5,7,20],slurm_nodeid:[4,5,20],slurmd_nodenam:[4,20],smaller:[14,15,17,18],smi:5,smooth:[5,15,17,18],softmax:[2,17,18],solid:4,some:[0,3,9,11,18],someth:[0,11],sometim:0,soon:5,sort:[12,19],sorted_pair:3,sourc:[0,1,3,4,5,7,8,9,10,11,12,13,14,15,17,18,20],sp_path:19,space:[0,2,14,17],spacer:[15,17,18],span:[4,15,17,18],spanish:[4,19],spars:[1,11],special:10,specif:[2,3,4,5,6,8,13,14,15,17,18,20],specifi:[4,5,11,15,17,18,19,20],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm:[5,19],spm_encod:19,spm_train:19,sqrt:2,squar:[2,3],src:[3,4,9,11,12,13,15,17,18,19,20],src_embed:[17,18],src_feat:18,src_feats_vocab:[15,17,18],src_file:10,src_file_path:13,src_ggnn_size:17,src_group:3,src_lang:[3,20],src_languag:3,src_len:[9,11],src_length:13,src_map:13,src_onmttok_kwarg:[15,17,18],src_raw:13,src_seq_length:[5,15,17,18],src_seq_length_trunc:[17,18],src_subword_alpha:[15,17,18],src_subword_model:[15,17,18],src_subword_nbest:[15,17,18],src_subword_typ:[5,15,17,18],src_subword_vocab:[15,17,18],src_tgt:[5,20],src_vocab:[5,10,13,15,17,18],src_vocab_s:[5,17,18],src_vocab_threshold:[15,17,18],src_words_min_frequ:[17,18],srun:[4,5,7],stabl:2,stack:[11,14,17],stage:2,stand:0,standalon:4,standard:[11,17,18],start:[3,4,5,6,7,9,12,17,19],start_decay_step:17,stat:[9,17],stat_list:9,state:[9,11,13,14,17],state_dict:17,state_dim:17,statist:[5,9,17],statu:6,stdout:9,step:[2,3,6,9,11,13,17,18],stepwis:11,stepwise_penalti:[13,18],stig:8,still:0,stop:[5,15,17,18],store:17,str:[0,9,10,11,12,13,19],strategi:[1,6,9,10,14,17],streamlin:14,stride:[10,11],string:[9,11,15,17,18],strip:19,structur:[2,4,6,17,18],structured_log_fil:[17,18],style:[0,10,11,14,15,17,18],styleguid:0,sub_id:11,subclass:[9,11,13],subcompon:3,subdirectori:7,sublay:6,submodul:11,subsequ:2,subset:19,substitut:3,substr:[15,17,18],subword:[3,6,14],successfulli:20,suggest:17,suit:[4,6],sum:[9,11,13,17],sume:9,summari:[0,5,13,18,19],summit:5,superclass:0,supercomput:7,supervis:[3,11,17],support:[0,1,3,17],suppos:19,sure:[4,7,13],sutskev:21,swahili:14,switchout:[6,21],switchout_temperatur:[15,17,18],symmetr:3,sync:14,synchron:14,system:[6,13,14,17,20,21],t_idx:10,tab:[15,17,18],tabl:[11,18],tag:10,tailor:4,take:[2,3,5,8,11,15,17,18],tar:[4,5,19,20],tarbal:19,target:[3,4,5,9,11,12,13,14,15,17,18],target_prefix:13,tartu:8,task2gpu:14,task:[3,6,9,10,11,13,14,20],task_distribution_strategi:17,task_id:[4,18,20],task_queue_manag:[9,10,11],tatoeba:[3,5,19],tau:[15,17,18],team:7,technic:8,temperatur:[3,13,15,17,18],templat:3,tensor:[0,9,10,11,13],tensorboard:[5,9,17,20],tensorboard_log_dir:[5,17,20],tensorflow:17,termin:[15,17,18],test:[0,1,7],text:[5,9,10,13,17,18,19,20],tgt:[3,9,11,12,15,17,18],tgt_embed:[17,18],tgt_file:10,tgt_file_path:13,tgt_group:3,tgt_lang:[3,20],tgt_languag:3,tgt_len:9,tgt_onmttok_kwarg:[15,17,18],tgt_pad_mask:11,tgt_prefix:13,tgt_sent:13,tgt_seq_length:[5,15,17,18],tgt_seq_length_trunc:[17,18],tgt_subword_alpha:[15,17,18],tgt_subword_model:[15,17,18],tgt_subword_nbest:[15,17,18],tgt_subword_typ:[5,15,17,18],tgt_subword_vocab:[15,17,18],tgt_vocab:[5,9,10,15,17,18],tgt_vocab_s:[5,17,18],tgt_vocab_threshold:[15,17,18],tgt_words_min_frequ:[17,18],than:[0,13,14,17,19],thang:21,thank:14,thant:13,thei:[2,5,13,14],them:[3,4,11],therefor:14,thi:[0,2,3,4,5,7,8,9,10,11,13,14,15,17,18,19,20],thin:9,thing:[0,3],thread:15,three:2,threshold:[15,17,18],through:[2,3,4,9,14,20],thu:9,tic:0,tick:0,tiedemann:8,time:[2,3,7,9,13,14,15,17,18,20],timeout:12,timer:12,timothe:8,titl:8,to_cpu:12,to_gpu:12,to_yyi:3,tok:12,token:[3,5,9,10,12,13,14,15,17,18],token_drop:6,token_mask:6,tokendrop:[15,17,18],tokendrop_temperatur:[15,17,18],tokenizer_mark:12,tokenizer_opt:12,tokenmask:[15,17,18],tokenmask_temperatur:[15,17,18],too:13,tool:[1,5,6,20],toolkit:[8,14],top:[0,2,13,18],topk_id:13,topk_scor:13,torch:[0,7,9,10,11,17],torchtext:9,total:[3,9,17],tqdm:19,trail:0,train:[1,3,6,7,8,9,10,11,14],train_:[4,5,20],train_ar:4,train_bg:[5,20],train_c:[5,20],train_d:5,train_da:5,train_el:5,train_en:[5,20],train_et:5,train_extremely_large_corpu:19,train_fi:5,train_fr:5,train_from:17,train_hu:5,train_it:[5,9],train_loss:9,train_loss_md:9,train_lt:5,train_lv:5,train_nl:5,train_pl:5,train_pt:5,train_ro:5,train_sk:5,train_sl:5,train_step:[9,17],train_sv:5,trainabl:9,trainer:6,training_step:9,transfer:14,transform:[1,2,5,6,9,10,11,14,21],transformer_decod:11,transformer_encod:11,transformer_ff:[5,17],transformerattentionbridgelay:6,transformerdecod:11,transformerdecoderbas:11,transformerencod:11,transformerencoderlay:2,transforms_cl:10,transit:11,translat:[1,3,4,5,6,8,9,11,12,14,16,21],translate_batch:13,translation_serv:12,translationbuild:13,translationserv:12,transvers:14,travi:0,trg:3,triangl:3,trichotomi:14,trick:[6,11],trivial:11,trunc_siz:9,truncat:[9,17,18],truncated_decod:17,trust:19,turn:[11,17],tutori:[4,6,7,20],two:[2,3,5,7,8,11,14],txt:[0,18,19],type:[0,2,3,6,8,9,10,11,12,13,15,18],typic:[9,17],typolog:14,under:[1,3,7,17,18],undergo:2,undergon:2,underli:13,understand:4,uniform:17,unigram:[15,17,18],union:0,uniqu:[4,6],unit:[2,14,19],unittest:0,univers:8,unk:[13,18],unknown:13,unless:3,unload:12,unload_model:12,unmodifi:13,unnecessari:[0,3],unpc:[4,6],unset:3,unshar:[],until:[13,18],unwieldi:3,updat:[7,9,12,13,17],update_finish:13,update_learning_r:17,update_n_src_word:9,update_vocab:17,upgrad:7,upon:1,upper:3,url:[7,8,21],url_root:16,usag:[5,6,15,16,17,18],use:[0,3,4,5,6,7,9,11,12,13,14,15,17,18,19,20],used:[2,3,5,9,10,11,12,13,14,15,17,18,19],useful:[1,9],user:[4,5,6,7,9,12],uses:[0,3,5,6,7,11,13,17],using:[0,1,2,3,4,5,8,11,12,13,15,17,18,19,20],usual:7,uszkoreit:21,util:[2,4,5,9,10,14],valid:[5,9,15,17,18],valid_batch_s:[5,17],valid_it:9,valid_loss:9,valid_loss_md:9,valid_step:[5,9,17],valu:[2,3,4,5,9,11,12,13,15,17,18,19],variabl:[3,4,7,10,13,20],variat:0,variou:[4,14],vaswani:21,vaswanispujgkp17:0,vector:[11,14,17],venv:7,verbos:[13,17,18],veri:[0,14,18],versatil:14,version:[1,5,12,13,19],via:[11,21],vinyal:21,virtual:7,visit:0,visual:[14,17,20],vocab:[4,5,6,9,13,14],vocab_path:[15,17,18],vocab_s:[13,17,19],vocab_sample_queue_s:15,vocab_size_multipl:[17,18],vocabs_dict:10,vocabulari:[3,4,5,9,14,15,17,18,19],vsp:[11,21],wai:[3,5,13],wait:[3,5],wang:21,want:[3,18],warmup:17,warmup_step:[5,17],warn:[10,15,17,18],websit:[5,19],weight:[2,3,11,17,18],weight_decai:17,weighted_sampl:17,well:[0,6,17],wget:[4,5,19,20],what:[3,6,9,12],when:[0,3,8,10,11,13,15,17,18,19],where:[2,3,6,7,11,13,14,15,17,18],wherea:[13,17],whether:[9,11,12,13,15,17,18],which:[1,3,4,11,13,14,17],whl:7,whole:13,whose:18,why:2,wiki:17,wikipedia:17,window:[15,17,18],with_align:9,within:[2,11,12],without:[0,7,17],wojciech:21,wolfgang:21,word2vec:[17,18],word:[2,11,13,14,15,17,18],word_align:13,word_lut:11,word_padding_idx:11,word_ratio_threshold:[15,17,18],word_vec_s:11,word_vocab_s:11,work:[0,3,7,13,17],workflow:8,workstat:20,world_siz:[5,17],would:[3,11,13,17],wpdn18:[15,17,18,21],wrap:[11,12],wrapper:[4,5,9],writabl:3,write:[3,5,9,19],writer:9,wsc:[13,21],www:17,xavier_uniform:17,xent:9,xinyi:21,xiong:21,xx_side_in:19,xx_side_out:19,xx_stream:19,xzvf:19,yaml:[4,5,15,17,18],year:[4,8,19],yet:13,yield:10,yml:[0,5,20],yonghui:21,yoon:8,you:[0,3,4,5,7,11,17,18,19,20,21],your:[0,3,4,6,7,19,20],your_config:20,your_path:5,your_project_name_is_your_account:7,your_venv_nam:7,your_vevn_nam:7,yourself:[8,19],yuan:21,yuntian:8,yyi:3,zaremba:21,zero:[3,9,11,13,15,17,18],zero_grad:9,zhang:21,zhifeng:21,ziemski:19,zihang:21,zip:19,zquez:8,zxs18:[11,21]},titles:["Contributors","About MAMMOTH","Attention Bridge","Config-config Tool","MAMMOTH Sharing Schemes","Training MAMMOTH 101","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Component-level Modularity","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"case":1,"class":13,"function":1,"while":1,Are:1,The:3,about:1,actual:3,adapt:[3,17],adapter_config:3,ae_path:3,ae_transform:3,algorithm:1,align:17,allocate_devic:3,altern:3,anatomi:14,ani:1,approach:1,argument:16,attent:[2,11,17],autoencod:3,beam:18,between:1,bridg:[2,11,14,17],build:15,can:1,capabl:1,citat:8,cluster_languag:3,command:[3,4],common:[15,17,18],complete_language_pair:3,compon:14,config:3,config_al:3,config_config:3,configur:[5,15,17,18,20],content:6,contribut:1,contributor:0,core:12,corpora:3,corpora_schedul:3,creation:5,custom:14,data:[5,10,15,17,18,19,20],dataset:[4,10],dec_sharing_group:3,decod:[4,11,13,17,18],denois:[15,17,18],develop:1,direct:19,directori:5,distanc:3,distance_matrix:3,docstr:0,document:1,doe:1,domain:1,download:[5,19],dynam:17,effici:18,embed:[11,17,18],enc_sharing_group:3,encod:[4,11,17],enhanc:1,environ:5,europarl:19,featur:[1,17],feedforwardattentionbridgelay:2,filter:[15,17,18],flexibl:14,framework:9,fulli:4,further:20,futur:1,gener:17,get:19,group:3,guidelin:0,has:1,how:1,inferfeat:[15,17,18],initi:17,innov:1,input:3,instal:[7,8,20],introduc:1,issu:1,its:1,job:5,kei:[1,3],languag:3,level:[3,14],leverag:1,linattentionbridgelay:2,line:3,load:10,loader:10,log:[17,18],loss:9,lumi:7,made:1,mahti:7,mammoth:[1,4,5,20],manual:3,matrix:3,model:[5,9,12,14,17,18,19],modif:1,modul:11,modular:[4,14],n_gpus_per_nod:3,n_group:3,n_node:3,name:16,note:4,onmttok:[15,17,18],opennmt:1,optim:[9,17],opu:19,origin:1,other:3,overrid:3,overview:[4,8],own:1,parallel:14,paramet:[3,14],pars:19,particularli:1,path:19,penalti:18,perceiverattentionbridgelay:2,plan:1,prepar:[5,19,20],project:1,prune:[17,18],puhti:7,quickstart:20,random:18,rate:17,read:20,refer:21,relationship:1,relev:19,remove_temporary_kei:3,report:1,repositori:1,reproduc:[15,17,18],run:7,sampl:18,scheme:4,score:13,search:18,sentencepiec:[5,19],separ:4,server:[12,16],set:19,set_transform:3,setup:[4,5],share:[3,4,14],sharing_group:3,shot:19,simpleattentionbridgelay:2,specif:1,specifi:3,src_path:3,stage:3,start:20,statu:1,step:[5,19,20],strategi:13,structur:14,sublay:11,submiss:5,subword:[15,17,18],suit:1,supervis:19,switchout:[15,17,18],system:4,task:[5,15,17,18],test:[4,19],tgt_path:3,than:3,token:19,token_drop:[15,17,18],token_mask:[15,17,18],tool:3,top:3,train:[4,5,17,19,20],trainer:9,transform:[3,15,17,18],transformerattentionbridgelay:2,translat:[13,18,19,20],translation_config:3,translation_config_dir:3,trick:18,type:17,uniqu:1,unpc:19,unshar:4,usag:3,use:1,use_introduce_at_training_step:3,use_src_lang_token:3,use_weight:3,user:1,uses:1,valid:19,variabl:[5,19],vocab:[10,15,17,18,19],well:1,what:1,where:1,yaml:3,your:5,zero:19,zero_shot:3}})
\ No newline at end of file
+Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/sharing_schemes","examples/train_mammoth_101","index","install","main","mammoth","mammoth.inputters","mammoth.modules","mammoth.translate.translation_server","mammoth.translation","modular_model","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/sharing_schemes.md","examples/train_mammoth_101.md","index.rst","install.md","main.md","mammoth.rst","mammoth.inputters.rst","mammoth.modules.rst","mammoth.translate.translation_server.rst","mammoth.translation.rst","modular_model.md","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"mammoth.Trainer":{train:[9,1,1,""],validate:[9,1,1,""]},"mammoth.inputters.dataloader":{DynamicDatasetIter:[10,0,1,""],InferenceBatcher:[10,0,1,""],LookAheadBucketing:[10,0,1,""]},"mammoth.inputters.dataloader.DynamicDatasetIter":{from_opts:[10,1,1,""]},"mammoth.inputters.dataloader.LookAheadBucketing":{bucket_is_empty:[10,1,1,""],is_empty:[10,1,1,""],maybe_replenish:[10,1,1,""]},"mammoth.inputters.dataset":{ParallelCorpus:[10,0,1,""]},"mammoth.inputters.vocab":{Vocab:[10,0,1,""]},"mammoth.inputters.vocab.Vocab":{merge:[10,1,1,""]},"mammoth.models":{NMTModel:[9,0,1,""]},"mammoth.models.NMTModel":{count_parameters:[9,1,1,""],forward:[9,1,1,""]},"mammoth.modules":{Embeddings:[11,0,1,""]},"mammoth.modules.Embeddings":{emb_luts:[11,1,1,""],forward:[11,1,1,""],load_pretrained_vectors:[11,1,1,""],word_lut:[11,1,1,""]},"mammoth.modules.attention_bridge":{AttentionBridge:[11,0,1,""]},"mammoth.modules.attention_bridge.AttentionBridge":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.modules.average_attn":{AverageAttention:[11,0,1,""]},"mammoth.modules.average_attn.AverageAttention":{cumulative_average:[11,1,1,""],cumulative_average_mask:[11,1,1,""],forward:[11,1,1,""]},"mammoth.modules.decoder":{DecoderBase:[11,0,1,""]},"mammoth.modules.decoder.DecoderBase":{from_opts:[11,1,1,""]},"mammoth.modules.decoder_ensemble":{EnsembleModel:[11,0,1,""]},"mammoth.modules.encoder":{EncoderBase:[11,0,1,""]},"mammoth.modules.encoder.EncoderBase":{forward:[11,1,1,""]},"mammoth.modules.layer_stack_decoder":{LayerStackDecoder:[11,0,1,""]},"mammoth.modules.layer_stack_decoder.LayerStackDecoder":{add_adapter:[11,1,1,""],forward:[11,1,1,""],from_opts:[11,1,1,""],from_trans_opt:[11,1,1,""],get_submodule:[11,1,1,""],init_state:[11,1,1,""]},"mammoth.modules.layer_stack_encoder":{LayerStackEncoder:[11,0,1,""]},"mammoth.modules.layer_stack_encoder.LayerStackEncoder":{add_adapter:[11,1,1,""],forward:[11,1,1,""],from_opts:[11,1,1,""],from_trans_opt:[11,1,1,""],get_submodule:[11,1,1,""]},"mammoth.modules.mean_encoder":{MeanEncoder:[11,0,1,""]},"mammoth.modules.mean_encoder.MeanEncoder":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.modules.multi_headed_attn":{MultiHeadedAttention:[11,0,1,""]},"mammoth.modules.multi_headed_attn.MultiHeadedAttention":{forward:[11,1,1,""]},"mammoth.modules.position_ffn":{PositionwiseFeedForward:[11,0,1,""]},"mammoth.modules.position_ffn.PositionwiseFeedForward":{forward:[11,1,1,""]},"mammoth.modules.transformer_decoder":{TransformerDecoder:[11,0,1,""]},"mammoth.modules.transformer_decoder.TransformerDecoder":{forward:[11,1,1,""]},"mammoth.modules.transformer_encoder":{TransformerEncoder:[11,0,1,""]},"mammoth.modules.transformer_encoder.TransformerEncoder":{forward:[11,1,1,""],from_opts:[11,1,1,""]},"mammoth.translate":{BeamSearch:[13,0,1,""],DecodeStrategy:[13,0,1,""],GNMTGlobalScorer:[13,0,1,""],GreedySearch:[13,0,1,""],Translation:[13,0,1,""],TranslationBuilder:[13,0,1,""],Translator:[13,0,1,""]},"mammoth.translate.BeamSearch":{initialize:[13,1,1,""]},"mammoth.translate.DecodeStrategy":{advance:[13,1,1,""],block_ngram_repeats:[13,1,1,""],initialize:[13,1,1,""],maybe_update_forbidden_tokens:[13,1,1,""],maybe_update_target_prefix:[13,1,1,""],target_prefixing:[13,1,1,""],update_finished:[13,1,1,""]},"mammoth.translate.GreedySearch":{advance:[13,1,1,""],initialize:[13,1,1,""],update_finished:[13,1,1,""]},"mammoth.translate.Translation":{log:[13,1,1,""]},"mammoth.translate.Translator":{translate_batch:[13,1,1,""]},"mammoth.translate.greedy_search":{sample_with_temperature:[13,2,1,""]},"mammoth.translate.penalties":{PenaltyBuilder:[13,0,1,""]},"mammoth.translate.penalties.PenaltyBuilder":{coverage_none:[13,1,1,""],coverage_summary:[13,1,1,""],coverage_wu:[13,1,1,""],length_average:[13,1,1,""],length_none:[13,1,1,""],length_wu:[13,1,1,""]},"mammoth.translate.translation_server":{ServerModel:[12,0,1,""],ServerModelError:[12,3,1,""],Timer:[12,0,1,""],TranslationServer:[12,0,1,""]},"mammoth.translate.translation_server.ServerModel":{build_tokenizer:[12,1,1,""],detokenize:[12,1,1,""],do_timeout:[12,1,1,""],maybe_convert_align:[12,1,1,""],maybe_detokenize:[12,1,1,""],maybe_detokenize_with_align:[12,1,1,""],maybe_postprocess:[12,1,1,""],maybe_preprocess:[12,1,1,""],maybe_tokenize:[12,1,1,""],parse_opt:[12,1,1,""],postprocess:[12,1,1,""],preprocess:[12,1,1,""],rebuild_seg_packages:[12,1,1,""],to_gpu:[12,1,1,""],tokenize:[12,1,1,""],tokenizer_marker:[12,1,1,""]},"mammoth.translate.translation_server.TranslationServer":{clone_model:[12,1,1,""],list_models:[12,1,1,""],load_model:[12,1,1,""],preload_model:[12,1,1,""],run:[12,1,1,""],start:[12,1,1,""],unload_model:[12,1,1,""]},"mammoth.utils":{Optimizer:[9,0,1,""],Statistics:[9,0,1,""]},"mammoth.utils.Optimizer":{amp:[9,1,1,""],backward:[9,1,1,""],from_opts:[9,1,1,""],learning_rate:[9,1,1,""],step:[9,1,1,""],training_step:[9,1,1,""],zero_grad:[9,1,1,""]},"mammoth.utils.Statistics":{accuracy:[9,1,1,""],all_gather_stats:[9,1,1,""],all_gather_stats_list:[9,1,1,""],elapsed_time:[9,1,1,""],log_tensorboard:[9,1,1,""],output:[9,1,1,""],ppl:[9,1,1,""],update:[9,1,1,""],xent:[9,1,1,""]},"mammoth.utils.loss":{LossComputeBase:[9,0,1,""]},mammoth:{Trainer:[9,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"],"3":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function","3":"py:exception"},terms:{"1node":5,"24th":8,"25g":7,"30gb":19,"3lang":19,"4gpu":5,"64k":[5,19],"666m":19,"abstract":11,"boolean":[9,13],"break":[14,19],"byte":17,"case":[4,6,20],"class":[0,6,9,10,11,12],"default":[12,15,16,17,18,19],"export":[4,5,7],"final":[2,5,11,13],"float":[3,11,13],"function":[0,2,3,4,6,9,11,12,13,17],"import":[0,19],"int":[9,10,11,12,13],"long":0,"new":[0,2,20],"portoro\u017e":19,"public":7,"return":[0,9,11,12,13],"short":14,"static":[9,17],"t\u00e4ttar":14,"throw":11,"true":[3,5,9,10,11,13,17,18,19],"try":[0,7,10],"while":[3,6,11,14],And:0,Are:6,But:7,EOS:13,For:[0,3,4,5,7,11,13,14,17,19,20],IDs:13,IFS:19,Its:1,LPs:3,Not:0,One:3,The:[2,4,5,7,9,11,12,13,14,17,19,20],Then:[0,5,19],There:[2,3],These:[2,3,4,13],Use:[3,17],Used:13,Will:3,__init__:12,_compute_loss:9,a3s:[4,5,19,20],aan:[11,17],aan_useffn:[11,17],ab_fixed_length:17,ab_lay:17,ab_layer_norm:17,abbrevi:0,abil:11,about:[0,6],abov:[0,11,13,20],abs:[2,17,18,21],acceler:[11,21],accept:[0,3,5,13],access:[2,3,7,20],accord:[3,4],accordingli:[],account:[3,7],accross:9,accum:[9,10],accum_count:[5,9,17],accum_step:[9,17],accumul:[9,17],accuraci:[5,9,13],achiev:[3,11,14],achin:21,acl:[8,21],aclantholog:[8,17],aclweb:17,across:[4,5,14],action:[11,13,17],activ:[1,2,7,11,17],activation_fn:11,activationfunct:11,actual:13,adadelta:17,adafactor:[5,17],adagrad:17,adagrad_accumulator_init:17,adam:17,adam_beta1:17,adam_beta2:17,adamoptim:17,adamw:17,adapt:[1,4,6,8,10,11,14],adapter_group:11,adapter_nam:3,add:[0,10,11],add_adapt:11,add_argu:[],added:3,adding:[0,1],addit:[0,4,11,15,17,18],addition:[4,11],address:[8,13],adjust:[3,4,20],adopt:[1,17],advanc:[13,17],advic:0,aforement:4,after:[0,2,4,13,17,18,19],afterward:11,again:0,against:11,aidan:21,alessandro:8,alexand:8,algorithm:[6,21],align:[6,9,11,12,13,18],align_debug:18,alignment_head:[11,17],alignment_lay:[11,17],aliv:13,alive_attn:13,alive_seq:13,all:[0,3,4,9,10,11,13,14,15,17,18,20,21],all_gather_stat:9,all_gather_stats_list:9,all_preprocess:12,allennlp:0,alloc:[3,5,14],allow:[0,1,2,3,4,14,17],almost:[13,17],alon:0,alpha:[13,18],alphabet:3,alreadi:[5,15,17,18,20],also:[0,3,4,5,7,9,11,14,17,19],altern:[4,11,19],although:11,alwai:[0,3,11],amd:7,among:[4,14],amp:[9,17],anatomi:6,ancestor:8,ancestri:14,ani:[0,3,6,13,15,17,18],anoth:[0,2,9],antholog:17,apex:17,apex_opt_level:17,api:[0,6],api_doc:17,appear:3,append:[7,19],appl:14,appli:[2,3,11,13,15,17,18],applic:18,approach:[6,14],appropri:[4,13],approxim:17,arab:[4,19],arbitrarili:20,architectur:[1,2,14],arg:[0,11,12],argmax:18,argpars:12,argument:[0,5,6,20],argumentpars:[],arn:8,around:19,arxiv:[0,2,17,18,21],ashish:21,ask:6,aspect:14,assign:[3,5,18],assum:[11,13,19,20],att_typ:2,attend:2,attent:[0,6,9,13,14,18,21],attention_bridg:[9,11],attention_dropout:[11,17],attentionbridg:11,attentionbridgenorm:2,attn:[11,13,18],attn_debug:[13,18],attn_typ:11,attr:12,attribut:[13,14],attributeerror:11,augment:21,author:8,autodoc:0,autogener:17,automat:[1,5,20],avail:[4,9,12,14,17,18,20],available_model:16,averag:[11,17,18,21],average_attn:11,average_decai:[9,17],average_everi:[9,17],average_output:11,averageattent:11,avg:[11,18],avg_raw_prob:18,avoid:[0,3],aws:7,axi:13,back:9,backend:17,backward:9,bahdanau:17,balanc:14,ban_unk_token:[13,18],bank:11,barri:21,bart:[5,15,17,18],base:[0,1,2,3,4,5,7,8,9,10,11,12,13,14,15,17,18,20],baselin:17,basemodel:9,basenam:19,bash:7,batch:[2,5,9,10,11,13,17,18,20],batch_siz:[4,5,10,11,13,17,18,20],batch_size_multipl:[10,17],batch_typ:[5,10,17,18,20],beam:[6,13],beam_search:13,beam_siz:[13,18],beamsearch:13,beamsearchbas:13,becaus:[3,18],becom:3,been:[13,15,17,18],befor:[0,4,12,13,17,18,19,20],begin:[9,13],behavior:14,below:[0,4,14,19,20],ben:3,bengali:3,best:[13,18],beta1:17,beta2:17,beta:[13,18],better:[0,4,7,15,17,18],between:[2,4,6,11,14,15,17,18,21],beyond:9,bia:11,biao:21,bib:0,bibtex:0,bibtext:0,bidir_edg:17,bidirect:17,bin:[7,17],binari:11,bit:18,bitext:14,blank:0,block:[13,18],block_ngram_repeat:[13,18],boggia:8,booktitl:8,bool:[9,10,11,12,13],bos:13,both:[3,4,13,14,17,18],both_embed:[17,18],bound:11,boundari:[15,17,18],box:20,bpe:[15,17,18],bptt:[9,17],bridg:[6,21],bridge_extra_nod:17,broad:14,broadcast:14,browser:0,bucket:10,bucket_fn:10,bucket_is_empti:10,buffer:9,build:[0,1,6,9,11,12,13,14,18,19,20],build_token:12,build_vocab:15,built:[0,9],bulgarian:5,bytetensor:13,cach:11,calcul:[2,9,13],call:[5,11,13],callabl:13,callback:9,can:[2,3,4,5,6,7,9,12,13,14,15,17,18,19,20],cancel:12,candid:[3,15,17,18],cao:21,capabl:[6,14],capit:0,captur:2,care:11,cat:19,catalan:14,categor:13,categori:13,celebr:5,central:14,challeng:[5,14,19],chang:[0,3,9,17],channel:2,charact:0,character:14,character_coverag:19,characterist:14,check:[0,5,8,10,11,20],checklist:0,checkpoint:[4,9,17,20],chen:21,chines:[4,19],chmod:7,choic:[0,10,11,15,17,18],choos:[0,1,4,5,15,17,18],chosen:[13,20],chronopoul:14,citat:[0,6],cite:[0,5,8,19],classmethod:[9,10,11],clean:[5,19],clear:0,clip:5,clone:12,clone_model:12,close:0,cls:9,cluster:[3,7,8,14,20],clutter:0,cmd:20,code:[0,3,7],code_dir:7,codebas:[5,7],coder:14,cohes:14,collect:10,column:3,com:[],combin:[14,18],comma:3,command:6,commenc:4,comment:0,common:[0,6,14],commun:[0,1,5,14],compar:8,compil:19,complet:[4,5,13,20],complex:[3,13,20],compon:[2,3,6,18],composit:17,comput:[3,4,8,9,11,14,17,18],concat:[11,17],conclus:14,condit:[13,17,18],conf:16,confer:8,config:[4,5,6,12,15,16,17,18,20],config_dir:5,config_fil:12,configur:[1,3,4,6,14],congratul:20,connect:2,consid:[1,3,14,19],consider:17,consist:[0,4,19],constant:3,constructor:[0,11],consum:17,contain:[3,4,5,7,11,12,13,14,19,20],content:[0,18],context:[2,11,17],context_attn:11,continu:0,contribut:[0,2,6],contributor:6,control:[3,9],conv2d:11,conv:11,conveni:[3,4],convent:0,convers:13,convert:[10,12],coordin:14,copi:[0,3,7,11,17,18,20],copy_attn:[11,13,17],copy_attn_forc:17,copy_attn_typ:17,copy_loss_by_seqlength:17,core:[2,6,9],corpora:[10,14],corpora_info:10,corpu:[3,5,10,14,15,17,18,19],corr:[0,21],correct:[3,4],correspand:12,correspond:[4,10,14,18],could:[13,14],count:[3,9,10,13,15,17,18],count_paramet:9,cov:13,cov_pen:13,coverag:[13,17,18],coverage_attn:17,coverage_non:13,coverage_penalti:[13,18],coverage_summari:13,coverage_wu:13,cpu:[10,12,17,18],crai:7,crayon:17,creat:[3,5,7,9,19,20],creation:3,criteria:[14,17],criterion:[5,9],critic:[17,18],cross:[9,11,17],csc:[14,19],csv:3,ct2_model:12,ct2_translate_batch_arg:12,ct2_translator_arg:12,ctrl:0,cuda_visible_devic:[5,20],cumbersom:3,cumul:[11,13,18],cumulative_averag:11,cumulative_average_mask:11,cur_dir:19,cur_len:13,current:[3,9,10,11,13,14,17],curricula:3,curriculum:3,custom:[4,6,12,17,19],custom_opt:12,customiz:4,cut:[0,19],cutoff:13,d_ff:11,d_model:11,dai:21,data:[1,2,3,4,6,9,13,14,21],data_path:19,data_typ:[9,10,13,17,18],databas:14,dataload:10,datapoint:14,dataset:[6,15,17,18,19,20],dataset_adapt:10,datasetadapt:10,datastructur:12,dblp:0,ddress:21,deal:[3,14],debug:[16,17,18],dec:[3,14],dec_lay:[5,17,20],dec_sharing_group:[4,5,20],decai:17,decay_method:[5,17],decay_step:17,decod:[2,3,6,9,14,20],decode_strategi:13,decoder_ensembl:11,decoder_typ:[5,17],decoderbas:[9,11],decodestrategi:13,deep:14,def:0,defin:[3,5,11,14,15,17,18,20],definit:[11,14,20],degre:11,delai:3,delet:[15,17,18],delimit:18,delv:4,deng:8,denois:[3,5,6],denoising_object:[15,17,18],denot:[2,4],depend:[0,3,7,9,12,19],deprec:[17,18],depth:14,desc:19,describ:[2,11,12,17,20],descript:0,design:[4,14,20],desir:[3,4,20],detail:[1,4,5,7,8,15,17,18,20],determin:[3,14,18],detoken:12,dev:[7,19],develop:[0,6],devic:[3,4,10,11,13,14,18],device_context:9,deyi:21,diagon:3,diagram:11,dict:[3,9,10,12,13,15,17,18],dict_kei:17,dictionari:[9,11,13,17],differ:[0,1,2,3,4,11,12,14,18,20],dim:11,dimens:[5,11,13,17],dimension:2,dir:19,direct:[0,3,8,13,14],directli:[0,5,18,19],directori:[3,4,7,12,17,20],disabl:17,discard:[15,17,18],discourag:17,disk:[17,18],displai:9,dist:9,distanc:[11,17],distinct:[4,14],distribut:[3,5,9,11,13,14,15,17,18,20],dive:4,divers:[15,17,18],divid:[2,3,17,18],divis:11,dmon:5,do_timeout:12,doc:0,document:[0,4,6,8,19,20],doe:[3,6,18],doesn:19,doi:8,doing:[3,18],domain:6,don:0,done:[5,13,14,19],dot:[2,11,17],dotprod:17,dowmunt:19,down:[13,14,15],download:[4,6,7,20],dozen:8,drop:19,dropout:[5,9,11,15,17,18],dropout_step:[9,17],due:17,dummi:[5,11],dump:[15,17,18],dump_beam:[13,18],dump_sampl:15,dump_transform:[17,18],duplic:20,dure:[11,12,14,17,18],dynam:[6,10,11,18],dynamicdatasetit:10,each:[3,4,5,11,13,14,15,17,18,19],earli:[5,17],earlier:[2,15,17,18],early_stop:[5,17],early_stopping_criteria:[5,17],earlystopp:9,eas:[3,14],easi:0,easili:3,echo:[5,19],ecosystem:1,edg:17,effect:[2,3,12,15],effici:[6,9,14,21],eight:[],either:[13,17],elaps:9,elapsed_tim:9,element:[2,3],els:19,emb_fil:11,emb_lut:11,embed:[2,6,14,15],embedding_s:11,embeddingless:17,embeddings_typ:[17,18],emerg:2,emploi:[2,4,9],empti:[5,10,11,13,15,17,18,19],en_side_in:19,en_side_out:19,en_stream:19,enabl:[1,5,11,14,17,18],enable_embeddingless:[11,17],enc:[3,14],enc_hidden:11,enc_lay:[5,17,20],enc_output:11,enc_sharing_group:[4,5,20],encod:[2,3,6,9,13,14,19,20],encoder_typ:[5,17],encoderbas:[9,11],encordec:[15,17,18],encount:[15,17,18],encout:[10,15,17,18],end:13,eng:3,english:[3,4,5,19],enhanc:[2,6,14],ensembl:18,ensemblemodel:11,ensur:[2,4,14],entir:[4,19],entri:0,entropi:9,env_dir:7,environ:[4,7,19,20],eos:13,epoch:17,epsilon:17,equal:[13,17],equat:11,equival:17,error:[0,11,15,17,18],especi:3,essenti:13,establish:2,etal:8,etc:5,eural:21,europarl:[5,6,20],europarl_data:[19,20],european:[5,19],evalu:[9,19],even:3,event:13,everi:[9,11,17,18],everyth:14,exactli:0,exampl:[0,3,4,5,7,10,11,14,15,17,18,19,20],exce:17,except:[0,12,15,17,18],exclusion_token:13,execut:[4,14,15,17,18],exist:[5,11,15,17,18,19],exist_ok:19,exp:17,exp_host:17,exp_id:5,expand:[5,19],expans:14,expect:[3,13,20],experi:[4,5,15,17,18],experiment:17,expert:1,explan:[],explicitli:14,explor:4,exponenti:17,extend:0,extern:0,extra:[7,11,17],extract:[5,19],facilit:[2,14],fail:13,fairseq:0,fals:[5,9,10,11,12,13,15,16,17,18,19],familiar:8,faro:8,faster:17,favorit:5,feat_0:18,feat_1:18,feat_dim_expon:11,feat_merg:[11,17],feat_merge_s:17,feat_padding_idx:11,feat_vec_expon:[11,17],feat_vec_s:[11,17],feat_vocab_s:11,feats0:18,feats1:18,featur:[2,6,9,11,14,15,18,21],fed:2,feed:[3,11,14,17],feedforward:[2,17],feedforwardattentionbridgelay:6,feel:[0,4],few:[0,20],ffn:[11,17],field:10,fifth:5,figur:[11,14],file:[0,3,4,5,10,12,15,17,18,19,20],filenam:17,filter:[5,6,19],filterfeat:[15,17,18],filternonzeronumer:[15,17,18],filterrepetit:[15,17,18],filterterminalpunct:[15,17,18],filtertoolong:[3,5,15,17,18],filterwordratio:[15,17,18],find:[0,5],finer:14,firefox:0,first:[0,3,5,7,11,13,17],five:2,fix:[0,13,17],flag:[3,4,9],flake8:0,flexibl:[1,4],floattensor:[9,11,13],flow:[2,14],fnn:11,focu:[0,2],focus:14,folder:0,follow:[0,2,3,4,18,19,20],foo:0,forbidden:13,forbidden_token:13,forc:[13,18],format:[0,12,15,17,18,19,20],former:11,forward:[3,9,11,14,17],fotranmt:8,found:[19,20],foundat:[2,4],four:[],fp16:[17,18],fp32:[9,17,18],frac:2,fraction:[15,17,18],framework:[4,6,17],free:[0,4,5,12,19],freez:[11,17],freeze_word_vec:11,freeze_word_vecs_dec:17,freeze_word_vecs_enc:17,french:[4,19],frequenc:[15,17,18],frequent:6,friendli:1,from:[1,2,3,5,7,9,10,11,13,14,17,18,19,20],from_opt:[9,10,11],from_trans_opt:11,from_xxx:3,frozenset:13,full:[0,3,11,12,14,15,17,18,19],full_context_align:[11,17],fulli:[1,3,11,14],further:[5,15,17,18,19],fusedadam:17,futur:[6,7],gao:21,gap:21,garg:17,gather:9,gating_output:11,gelu:17,gener:[0,1,2,3,4,5,6,9,10,13,18,19,20],generator_funct:17,ger:14,get:[6,7],get_submodul:11,git:[],github:[17,19],give:[3,17,18],given:[2,3,11,12],global_attent:17,global_attention_funct:17,global_scor:13,glove:[17,18],gnmt:13,gnmtglobalscor:13,going:13,gold:13,gold_scor:13,gold_sent:13,gomez:21,gone:17,good:[0,17],googl:[0,13,18,21],gpu:[3,4,5,7,12,13,14,17,18,20],gpu_backend:17,gpu_load:5,gpu_rank:[5,17,20],gpu_verbose_level:[9,17],gpuid:17,grad:9,gradient:[5,9,14,17],graham:21,grain:14,gram:13,graph:17,gre:7,greater:13,greedy_search:13,greedysearch:13,group:[4,5,11,14,17,18],groupwis:[3,14],grow:13,gtx1080:18,guid:[4,5,8,11,20],guidelin:[6,19],guillaum:8,hack:14,had:18,haddow:21,hand:[3,14],handl:[0,9,14],happen:13,hardwar:14,has:[2,3,4,6,11,13,14,15,17,18],has_cov_pen:13,has_len_pen:13,has_tgt:13,have:[0,3,11,13,14,17,18,19,20],head:[2,5,11,17],head_count:11,help:[0,2,18],helsinki:8,henc:20,here:[4,5,7,13,19],hidden:[9,11,17],hidden_ab_s:17,hidden_dim:2,hieu:21,high:[3,5,19],higher:[13,17,18],highest:18,hold:13,hook:11,hoorai:5,host:7,hour:5,how:[0,6,11,20],howev:[0,9],html:[0,17],http:[2,4,5,7,8,17,18,19,20,21],huge:17,human:[3,21],hyp:[4,20],hyper:1,hyphen:3,identifi:[5,18],idl:3,ids:3,ignor:[5,11,15,17,18],ignore_when_block:[13,18],iii:14,illia:21,illustr:14,ilya:21,imag:9,impact:17,implement:[2,9,14,17],improv:[1,5,11,13,17,19,21],in_config:3,in_featur:11,includ:[0,1,3,4,5,11,14,15,17,18,19],incompat:[15,17,18],incorpor:17,increas:[3,14],independ:14,index:[7,11,17],indic:[4,5,9,11,13,15,17,18],individu:[3,11],inf:13,infer:[10,13,14],inferencebatch:10,inferfeat:6,info:[5,10,17,18],inform:[2,3,5,14,17,18,20],infrastructur:14,ingredi:13,init:17,init_st:[9,11],initi:[4,5,6,9,11,12,13],initial_accumulator_valu:17,initil:10,inner:11,innov:6,inp:13,inp_seq_len:13,inproceed:8,input:[2,6,9,10,11,12,13,15,17,18,19,20,21],input_dir:19,input_len:11,input_sentence_s:19,inputs_len:11,inputt:[10,13],insert:[15,17,18],insert_ratio:[15,17,18],insid:5,instal:[0,5,6,19],instanc:[9,11,13],instanti:9,instead:[0,3,7,11,15,17,18],instruct:[7,17,19],int8:18,integ:13,integr:0,interact:7,interfac:[9,11],intermedi:2,intermediate_output:2,intern:12,interv:17,introduc:[2,3,6],introduct:3,invalid:[11,15,17,18],involv:[2,14],is_empti:10,is_finish:13,is_normform:11,is_on_top:11,is_train:10,island:8,isn:13,issu:6,item:[10,11],iter:[9,10],iterabledataset:10,its:[0,3,6,14],itself:[3,11],jakob:21,jean:8,jinsong:21,job:[4,7],johnson:14,joiner:[15,17,18],jone:21,journal:0,json:16,junczi:19,just:19,kaiser:21,keep:[12,13,17],keep_checkpoint:[5,17],keep_stat:17,keep_topk:13,keep_topp:13,kei:[6,11,14],kera:17,kernel_s:11,key_len:11,kim:8,klau:21,klein:8,koehn:5,krikun:21,kwarg:11,label:[5,17],label_smooth:[5,17],lakew:14,lambda:[15,17,18],lambda_align:17,lambda_coverag:17,lang:[3,5,19],lang_a:3,lang_b:3,lang_pair:[3,20],languag:[2,4,5,14,15,17,18,19,20],language_pair:19,larg:14,last:[3,4,17,18,19],latter:11,launch:[],layer:[2,5,11,14,17,18,20],layer_cach:11,layer_norm_modul:11,layer_stack_decod:11,layer_stack_encod:11,layer_stack_index:11,layer_type_to_cl:2,layernorm:17,layerstack:3,layerstackdecod:11,layerstackencod:11,layerwis:14,lead:13,learn:[2,5,9,14,17],learning_r:[5,9,17],learning_rate_decai:17,learning_rate_decay_fn:9,least:0,leav:[3,17,19],len:[9,11,13],length:[3,9,11,13,15,17,18,19],length_averag:13,length_non:13,length_pen:13,length_penalti:[13,18],length_wu:13,less:3,let:[3,4,11,14],level:[5,6,10,15,17,18,19],leverag:[6,14],lib:7,librari:[8,17],lightweight:14,like:[0,4,11,13,14,18,19],limit:18,lin:[2,14,17],linattentionbridgelay:6,line:[0,5,10,15,17,18,19],linear:[2,11],linear_warmup:17,linguist:[8,11,14,21],link:[0,2,7],list:[0,3,9,11,12,13,15,17,18],list_model:12,literatur:17,llion:21,load:[6,7,9,11,12,14,17,18],load_model:12,load_pretrained_vector:11,loader:6,local:[0,3],localhost:17,locat:5,log:[5,6,9,13],log_dir:[5,20],log_fil:[17,18],log_file_level:[17,18],log_prob:13,log_tensorboard:9,logger:13,login:7,logit:[13,18],logsumexp:13,longer:18,longest:13,longitudin:14,longtensor:[9,11,13],look:[0,8,10,11,18,19],look_ahead_s:10,lookaheadbucket:10,loop:9,loppi:8,loss:[6,17],loss_scal:17,losscomputebas:9,love:[0,1],low:14,lower:[3,17,18],lrec:19,lsl:[13,21],lstm:17,lua:12,lukasz:21,lumi:6,luong:[17,21],lust:7,lustrep1:7,lustrep2:7,macherei:21,machin:[1,5,8,11,13,14,17,18,20,21],made:[3,5,6],magic:13,mahti:6,mai:[3,5,8,9,10,12,13,15,17,19],main:[0,5,8,9,15,17,18,19],maintain:[4,13],make:[0,4,7,9,10,14,15,17,18,19],make_shard_st:9,mammoth101:[5,19,20],mammoth:[0,3,6,7,8,9,10,11,12,13,14,17,19],mammoth_transform:[15,17,18],mamooth:[19,20],manag:[4,5,9,14],mani:[9,13,17],manipul:9,manual:[4,5,12,13,19],map:[3,9,19],marian:17,mark:17,marker:12,mask:[11,15,17,18],mask_length:[15,17,18],mask_or_step:11,mask_ratio:[5,15,17,18],mass:[15,17,18],massiv:[1,3,8],master:[5,17],master_ip:[4,17],master_port:[4,5,17],match:12,mathbb:2,mathbf:2,matric:2,matrix:[2,11,17],max:[9,11,13,19],max_generator_batch:[5,17],max_grad_norm:[5,9,17],max_length:[13,18],max_relative_posit:[11,17],max_sent_length:18,max_sentence_length:19,max_siz:9,maxim:21,maximum:[15,17,18],maybe_convert_align:12,maybe_detoken:12,maybe_detokenize_with_align:12,maybe_postprocess:12,maybe_preprocess:12,maybe_replenish:10,maybe_token:12,maybe_update_forbidden_token:13,maybe_update_target_prefix:13,mean:[3,4,11,12,14,17,18],mean_encod:11,meanencod:11,mechan:[1,2,3,14],mem:7,memori:[11,12,17,19],memory_bank:[11,13],memory_length:11,merg:[10,11,17],meta:3,metadata:9,method:[9,11,17],metric:18,mi250:7,michel:8,micku:8,mike:21,million:8,min_length:[13,18],minh:21,minim:14,minimum:[15,17,18],mirror:17,mix:9,mixer:10,mixingstrategi:10,mixtur:1,mkdir:[5,7,19],mlp:[11,17],mnmt:14,mode:[3,15,17,18],model:[1,2,3,4,6,11,13,15,20],model_checkpoint:20,model_dim:[5,11,17,20],model_dtyp:[9,17],model_fil:19,model_id:12,model_kwarg:12,model_prefix:19,model_root:12,model_sav:9,model_task:17,model_typ:[5,17],modelsaverbas:9,modif:[6,9],modifi:[0,4,13],modul:[0,2,6,7,9,14,17,20],modular:[1,6,8,20],module_id:11,mohammad:21,moment:5,monitor:5,monolingu:3,month:8,more:[0,1,3,5,7,10,13,14,15,17,18,20],most:[13,14,18],mostli:9,move:[12,17],moving_averag:[9,17],much:17,mul:[5,19,20],multi:[0,2,5,11,20],multi_headed_attn:11,multiheadedattent:[2,11],multilingu:[1,3,5,8,19,20],multinod:5,multipl:[0,2,3,9,10,11,14,17,18],multipli:[2,10],multplic:0,must:[3,11,12,14,17],my_config:20,mymodul:7,n_batch:9,n_best:[12,13,18],n_bucket:[10,17],n_correct:9,n_edge_typ:17,n_node:[5,17],n_sampl:[15,17,18],n_seg:12,n_src_word:9,n_step:17,n_word:9,naacl:17,name:[0,3,5,6,11,13,15,17,18,19,20],named_modul:11,namespac:12,napoleon:0,narg:[],nation:19,nccl:17,necessari:[0,4,5,7,9,13,17,18,20],necessit:3,need:[0,3,4,5,9,11,14,17,19,20,21],neg:[12,17],nest:11,net_b:11,net_c:11,network:[1,11,21],neubig:21,neural:[1,8,11,13,14,21],never:13,next:[3,9,13,18],nfeat:11,ngram:[13,18],nightmar:3,niki:[8,21],nlp:[8,20],nmt:[9,13,17,18],nmtmodel:[9,11],noam:[17,21],noamwd:17,nodalida:8,node:[3,5,7,9,14,17,20],node_gpu:[5,20],node_rank:[4,5,17],nois:3,non:[11,13,17,20],none:[5,9,10,11,12,13,15,17,18],nonetyp:13,nonzero_threshold:[15,17,18],nordic:8,norm:[11,17],norm_method:9,normal:[2,3,5,9,11,17],normalz:9,normform:[11,17],norouzi:21,note:[0,3,7,13,19,20],noth:[0,9],notset:[17,18],now:[1,4,5,20],nroo:8,ntask:7,nucleu:18,num_lay:11,num_step:9,num_thread:15,number:[3,9,10,11,13,14,15,17,18],numel_fn:10,numer:[15,17,18],nvidia:[5,7,17],obj:[0,9],object:[0,9,10,12,13,15,17,18,19],occur:4,oder:3,off:17,offer:[4,14],offici:[4,19],offset:10,ofi:7,often:[15,17,18],on_timemout:12,on_timeout:12,onc:[13,17],one:[0,2,3,4,5,9,10,11,15,17,18,20],ones:20,onli:[3,4,9,10,13,14,15,17,18,19],onmt:[5,19,20],onmt_token:[15,17,18],onmttok:6,open:[1,8,19],opennmt:[0,3,6,8,9,16],oper:14,operatornam:2,opt:[5,9,10,11,12,17,18],opt_level:17,optim:[1,5,6,14],option:[0,3,4,5,7,9,10,11,12,13,15,17,18,19,20],opu:[5,6],opus100:19,opustc:[5,19,20],ord:21,order:[3,17],org:[2,7,8,17,18,21],organ:[10,14],origin:[5,6,17,19],oriol:21,other:[2,4,5,7,9,13,15,17,18,19,20,21],other_lang:19,otherwis:[3,11,17,18],our:[5,7,13,19,20],our_stat:9,out:[3,8,9,20],out_config:3,out_featur:11,out_fil:13,out_path:[4,20],out_typ:19,output:[2,3,4,5,9,11,12,13,15,17,18,19,20],output_dir:[19,20],output_model:18,over:[0,3,9,13,17,18,19,20],overal:2,overcom:14,overhead:14,overrid:[11,13,15,17,18,20],overridden:11,overse:14,overview:6,overwrit:[5,7,15,17,18],own:[6,9,14],ownership:9,p17:8,p18:17,packag:[5,7,12],pad:[9,11,13],page:[5,8],pair:[3,4,9,12,14,17,19,20],paper:[0,2,5,17],parallel:[4,5,6,11,13,15,17,19],parallel_path:13,parallelcorpu:[10,13],param:9,param_init:[5,17],param_init_glorot:[5,17],paramet:[1,4,5,6,8,9,10,11,12,13,15,17,18,20],parenthes:0,parliament:[5,19],parmar:21,pars:[10,12],parse_arg:[],parse_opt:12,parser:[],part:[2,13],partial:14,particular:[0,3],particularli:6,partit:7,pass:[2,3,9,11,12,17],past:[0,17,20],path:[3,4,5,7,10,11,12,13,15,17,18,20],path_src:[5,20],path_tgt:[5,20],path_to_codebas:5,path_to_europarl:5,path_to_mammoth:[],path_to_src_languag:20,path_to_vocab:5,path_to_your_env:5,path_valid_src:5,path_valid_tgt:5,pathlib:19,patienc:[5,9],pattern:[3,15,17,18],pdf:17,pen:13,penalti:[6,13,15,17],penaltybuild:13,peopl:7,per:[0,3,15,17,18],perceiv:[2,17],perceiverattentionbridgelay:6,percentag:[15,17,18],perfom:17,perform:[2,11,17],permut:[15,17,18],permute_sent_ratio:[15,17,18],perplex:9,pertain:14,pfs:7,pham:21,philipp:5,phrase_t:[13,18],phylogenet:14,pip3:7,pip:[0,7,8,19,20],pipelin:[15,17,18],plain:10,plan:6,platform:7,pleas:[0,5,8,19,20],plu:17,point:[5,20],pointer:5,poisson:[15,17,18],poisson_lambda:[15,17,18],polosukhin:21,pool:[11,17],pool_siz:[10,17],port:[5,16,17],portal:8,pos_ffn_activation_fn:[11,17],posit:[5,11,17],position_encod:[5,11,17],position_ffn:11,positionalencod:11,positionwisefeedforward:[11,17],possibl:[3,9,11,12,13,15,17,18,20],postprocess:12,postprocess_opt:12,potenti:13,pouliquen:19,pouta:19,ppid:5,ppl:9,pre:[9,12,13],pre_word_vecs_dec:17,pre_word_vecs_enc:17,preced:3,precis:9,pred:18,pred_scor:13,pred_sent:13,predict:[9,13,18],prefer:0,prefix:[3,9,15,17,18],prefix_seq_len:13,preload:12,preload_model:12,prepar:[6,13],preprint:21,preprocess:[4,12,14,19],preprocess_opt:12,presenc:3,present:5,presum:13,pretrain:[11,17],prevent:[13,18],previou:[3,11,13,20],previous:2,primari:3,prime:2,print:[9,17,18,19],prior_token:[15,17,18],priori:14,prob:13,proba:18,probabl:[11,13,15,17,18],problem:[13,14],proc:[8,21],proce:20,procedur:3,proceed:[5,8,19],process:[2,4,5,9,12,14,15,17,19],processed_data:4,processu:12,produc:[2,13,15,17,18,19],product:2,progress:5,projappl:7,project:[0,2,5,6,7,8],project_462000125:7,propag:9,proper:12,properli:7,properti:[9,11],proport:[3,15,17,18],provid:[4,5,7,8,18,19,20],prune:6,pty:7,publish:8,puhti:6,pull_request_chk:0,punct_threshold:[15,17,18],punctuat:[0,15,17,18],purason:14,put:[13,20],pwd:19,pyonmttok:[15,17,18],python3:[3,4,7,20],python:[0,3,4,5,7,17,20],pythonpath:7,pythonuserbas:[5,7],pytorch:[0,1,7],qin:21,qualifi:11,quantiz:18,queri:11,query_len:11,question:6,queue:[15,17],queue_siz:17,quickstart:[6,8],quoc:21,quot:0,raganato:8,rais:[11,15,17,18],random:[6,15,17,19],random_ratio:[15,17,18],random_sampling_temp:[13,18],random_sampling_topk:[13,18],random_sampling_topp:[13,18],randomli:[5,13,19],rang:18,rank:[5,13,14,17],ranslat:21,rare:13,rate:[5,6,9],rather:0,ratio:[13,15,17,18],raw:[10,13,18],raw_prob:11,rccl:7,reach:13,read:[0,3,5,12,19],readabl:[0,3,17,18],readm:17,real:11,rebuild:12,rebuild_seg_packag:12,receiv:3,recent:17,recip:11,recommend:[4,7,17,20],recommonmark:0,rectifi:2,recurr:11,reduc:14,redund:3,ref:0,refer:[0,1,2,4,6,7,11,19,20],referenc:11,regardless:[3,4],regist:11,regular:[15,17,18],rel:[11,14,17],relat:[5,14,15,17,18],relationship:[2,6],releas:[5,7,19],relev:[11,13,14],relu:[2,11,17],rememb:[0,4],remov:[3,5,15,17,18,19],renorm:17,reorder:13,rep_max_len:[15,17,18],rep_min_len:[15,17,18],rep_threshold:[15,17,18],repeat:[13,15,17,18],repetit:18,replac:[4,13,15,17,18],replace_length:[5,15,17,18],replace_unk:[13,18],replic:11,report:[6,8,9,17,18],report_align:[13,18],report_everi:[5,17],report_manag:9,report_scor:13,report_stats_from_paramet:[9,17],report_tim:[13,18],reportmgrbas:9,repositori:6,repres:[5,9],represent:[2,11,14,17],reproduc:6,requir:[0,4,9,11,14,17,19,20],research:[5,8],reservoir:10,reset:9,reset_optim:17,resett:17,residu:[11,14],resolv:11,resourc:[3,5,14,19],respect:[2,3],respons:9,rest:16,restrict:[15,17,18],result:[11,12,17],return_attent:13,reus:17,reuse_copy_attn:17,revers:[15,17,18],reversible_token:[15,17,18],rico:21,right:0,rmsnorm:17,rnn:[9,17],roblem:21,rocm5:7,rocm:7,root:[2,3],rotat:[15,17,18],rotate_ratio:[15,17,18],roundrobin:17,row:3,rshavn:8,rsqrt:17,rst:0,run:[0,3,4,5,9,11,12,17,18,19,20],runtim:11,rush:8,russian:[4,19],s_idx:10,sacrebleu:7,sai:[3,11,14],samantao:7,same:[0,3,11,12,14,17,20],sampl:[6,13,15,17,19],sample_with_temperatur:13,sampling_temp:13,save:[5,9,14,15,17,18,19,20],save_all_gpu:[5,17],save_checkpoint_step:[5,9,17],save_config:[15,17,18],save_data:[15,17,18],save_dir:5,save_model:[5,17,20],saver:9,scalabl:[1,14],scale:[11,13,14,17],scenario:[1,20],schedul:[4,9,17],scheme:[5,6,14,20],schuster:21,score:[6,12,15,17,18],scorer:13,scratch:[1,7],script:[0,4,5,6,7,19,20],script_dir:5,seamless:4,search:[0,3,6,13],second:[2,3,11,12],section:[5,7],secur:[10,15,17,18],see:[3,5,11,12,13,15,17,18],seed:[5,13,15,17,18],seemingli:17,seen:2,segment:[3,12,15,17,18],select:[11,13],select_index:13,selector:3,self:[2,11,12,13,17],self_attn_typ:[11,17],send:[0,17],senellart:8,sennrich:21,sensibl:0,sent:[9,10,17,18],sent_numb:13,sentenc:[13,15,17,18,19],sentencepiec:[3,6,7,15,17,18,20],sentencepieceprocessor:19,separ:[3,5,11,14,19],seper:12,seq2seq:[13,17],seq:13,seq_len:[2,13],sequenc:[1,2,3,9,11,12,13,14,15,17,18],seri:14,serial:11,serv:[2,4],server:[6,17,19],servermodel:12,servermodelerror:12,session:7,set:[1,2,3,4,5,7,9,11,12,13,14,15,17,18,20],sever:[3,5,11,13],sgd:17,sh16:[11,21],shaham:17,shallow:14,shape:[0,11,13],shard:[9,17,18],shard_siz:[4,9,18,20],share:[1,5,6,7,8,11,15,17,18,19,20],share_decoder_embed:17,share_embed:17,share_vocab:[15,17,18],shared_dec:20,shared_enc:20,shazeer:21,shortest:13,shot:3,should:[3,5,11,13,17,18,20],show:[11,20],shuf:19,shuffl:[5,19],shuffle_input_sent:19,side:[3,9,12,15,17,18],side_a:3,side_b:3,sign:[15,17,18],signifi:4,silent:[5,11,15,17,18],similar:[2,3,11,17],simpl:[2,9,11,17],simpleattentionbridgelay:6,simpli:[11,20],simulatan:11,sin:17,sinc:11,singl:[0,4,5,12,14,17,20],single_pass:17,singular:7,site:7,six:[4,19],size:[3,5,9,10,11,13,15,17,18,19,20],skip:[3,5,15,17,18,19],skip_embed:11,skip_empty_level:[5,10,15,17,18],slovenia:19,slow:[15,18],slurm:[3,4,5,7,20],slurm_nodeid:[4,5],slurmd_nodenam:4,smaller:[14,15,17,18],smi:5,smooth:[5,15,17,18],softmax:[2,17,18],solid:4,some:[0,3,9,11,18],someth:[0,11],sometim:0,soon:5,sort:[12,19],sorted_pair:3,sourc:[0,1,3,4,5,7,8,9,10,11,12,13,14,15,17,18,20],sp_path:19,space:[0,2,14,17],spacer:[15,17,18],span:[4,15,17,18],spanish:[4,19],spars:[1,11],special:10,specif:[2,3,4,5,6,8,13,14,15,17,18,20],specifi:[4,5,11,15,17,18,20],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm:[5,19],spm_encod:19,spm_train:19,sqrt:2,squar:[2,3],src:[3,4,9,11,12,13,15,17,18,19,20],src_embed:[17,18],src_feat:18,src_feats_vocab:[15,17,18],src_file:10,src_file_path:13,src_ggnn_size:17,src_group:3,src_lang:[3,20],src_languag:3,src_len:[9,11],src_length:13,src_map:13,src_onmttok_kwarg:[15,17,18],src_raw:13,src_seq_length:[5,15,17,18],src_seq_length_trunc:[17,18],src_subword_alpha:[15,17,18],src_subword_model:[15,17,18],src_subword_nbest:[15,17,18],src_subword_typ:[5,15,17,18],src_subword_vocab:[15,17,18],src_tgt:[5,20],src_vocab:[5,10,13,15,17,18,20],src_vocab_s:[5,17,18],src_vocab_threshold:[15,17,18],src_words_min_frequ:[17,18],srun:[4,5,7],stabl:2,stack:[11,14,17],stage:2,stand:0,standalon:4,standard:[11,17,18],start:[3,4,5,6,7,9,12,17,19],start_decay_step:17,stat:[9,17],stat_list:9,state:[9,11,13,14,17],state_dict:17,state_dim:17,statist:[5,9,17],statu:6,stdout:9,step:[2,3,6,9,11,13,17,18],stepwis:11,stepwise_penalti:[13,18],stig:8,still:0,stop:[5,15,17,18],store:17,str:[0,9,10,11,12,13,19],strategi:[1,6,9,10,14,17],streamlin:14,stride:[10,11],string:[9,11,15,17,18],strip:19,structur:[2,4,6,17,18],structured_log_fil:[17,18],style:[0,10,11,14,15,17,18],styleguid:0,sub_id:11,subclass:[9,11,13],subcompon:3,subdirectori:7,sublay:6,submodul:11,subsequ:2,subset:19,substitut:3,substr:[15,17,18],subword:[3,6,14],successfulli:20,suggest:17,suit:[4,6],sum:[9,11,13,17],sume:9,summari:[0,5,13,18,19],summit:5,superclass:0,supercomput:7,supervis:[3,11,17],support:[0,1,3,17],suppos:19,sure:[4,7,13,19],sutskev:21,swahili:14,switchout:[6,21],switchout_temperatur:[15,17,18],symmetr:3,sync:14,synchron:14,system:[6,13,14,17,20,21],t_idx:10,tab:[15,17,18],tabl:[11,18],tag:10,tailor:4,take:[2,3,5,8,11,15,17,18,19],tar:[4,5,19,20],tarbal:19,target:[3,4,5,9,11,12,13,14,15,17,18],target_prefix:13,tartu:8,task2gpu:14,task:[3,6,9,10,11,13,14,20],task_distribution_strategi:17,task_id:[4,18,20],task_queue_manag:[9,10,11],tatoeba:[3,5,19],tau:[15,17,18],team:7,technic:8,temperatur:[3,13,15,17,18],templat:3,tensor:[0,9,10,11,13],tensorboard:[5,9,17,20],tensorboard_log_dir:[5,17,20],tensorflow:17,termin:[15,17,18],test:[0,1,4,7],text:[5,9,10,13,17,18,19,20],tgt:[3,9,11,12,15,17,18],tgt_embed:[17,18],tgt_file:10,tgt_file_path:13,tgt_group:3,tgt_lang:[3,20],tgt_languag:3,tgt_len:9,tgt_onmttok_kwarg:[15,17,18],tgt_pad_mask:11,tgt_prefix:13,tgt_sent:13,tgt_seq_length:[5,15,17,18],tgt_seq_length_trunc:[17,18],tgt_subword_alpha:[15,17,18],tgt_subword_model:[15,17,18],tgt_subword_nbest:[15,17,18],tgt_subword_typ:[5,15,17,18],tgt_subword_vocab:[15,17,18],tgt_vocab:[5,9,10,15,17,18,20],tgt_vocab_s:[5,17,18],tgt_vocab_threshold:[15,17,18],tgt_words_min_frequ:[17,18],than:[0,13,14,17,19],thang:21,thank:14,thant:13,thei:[2,5,13,14],them:[3,4,11],therefor:14,thi:[0,2,3,4,5,7,8,9,10,11,13,14,15,17,18,19,20],thin:9,thing:[0,3],thread:15,three:[2,19],threshold:[15,17,18],through:[2,3,4,9,14],thu:[9,20],tic:0,tick:0,tiedemann:8,time:[2,3,7,9,13,14,15,17,18,20],timeout:12,timer:12,timothe:8,titl:8,to_cpu:12,to_gpu:12,to_yyi:3,tok:12,token:[3,5,9,10,12,13,14,15,17,18,20],token_drop:6,token_mask:6,tokendrop:[15,17,18],tokendrop_temperatur:[15,17,18],tokenizer_mark:12,tokenizer_opt:12,tokenmask:[15,17,18],tokenmask_temperatur:[15,17,18],too:13,tool:[1,5,6,20],toolkit:[8,14],top:[0,2,13,18],topk_id:13,topk_scor:13,torch:[0,7,9,10,11,17],torchtext:9,total:[3,9,17],tqdm:19,trail:0,train:[1,3,6,7,8,9,10,11,14],train_:[4,5,20],train_ar:4,train_bg:[5,20],train_c:[5,20],train_d:5,train_da:5,train_el:5,train_en:[5,20],train_et:5,train_extremely_large_corpu:19,train_fi:5,train_fr:5,train_from:17,train_hu:5,train_it:[5,9],train_loss:9,train_loss_md:9,train_lt:5,train_lv:5,train_nl:5,train_pl:5,train_pt:5,train_ro:5,train_sk:5,train_sl:5,train_step:[9,17],train_sv:5,trainabl:9,trainer:6,training_step:9,transfer:14,transform:[1,2,5,6,9,10,11,14,20,21],transformer_decod:11,transformer_encod:11,transformer_ff:[5,17],transformerattentionbridgelay:6,transformerdecod:11,transformerdecoderbas:11,transformerencod:11,transformerencoderlay:2,transforms_cl:10,transit:11,translat:[1,3,4,5,6,8,9,11,12,14,16,21],translate_batch:13,translation_serv:12,translationbuild:13,translationserv:12,transvers:14,travi:0,trg:3,triangl:3,trichotomi:14,trick:[6,11],trivial:11,trunc_siz:9,truncat:[9,17,18],truncated_decod:17,trust:19,turn:[11,17],tutori:[4,6,7,19,20],two:[2,3,5,7,8,11,14,20],txt:[0,18,19],type:[0,2,3,6,8,9,10,11,12,13,15,18],typic:[9,17],typolog:14,under:[1,3,7,17,18],undergo:2,undergon:2,underli:13,understand:4,uniform:17,unigram:[15,17,18],union:0,uniqu:[4,6],unit:[2,14,19],unittest:0,univers:8,unk:[13,18],unknown:13,unless:3,unload:12,unload_model:12,unmodifi:13,unnecessari:[0,3],unpc:[4,6],unset:3,unshar:20,until:[13,18],unwieldi:3,updat:[7,9,12,13,17,20],update_finish:13,update_learning_r:17,update_n_src_word:9,update_vocab:17,upgrad:7,upon:1,upper:3,url:[7,8,21],url_root:16,usag:[5,6,15,16,17,18],use:[0,3,4,5,6,7,9,11,12,13,14,15,17,18,19,20],used:[2,3,5,9,10,11,12,13,14,15,17,18],useful:[1,9],user:[4,5,6,7,9,12],uses:[0,3,5,6,7,11,13,17,20],using:[0,1,2,3,4,5,8,11,12,13,15,17,18,19,20],usual:7,uszkoreit:21,util:[2,4,5,9,10,14],valid:[5,9,15,17,18,20],valid_batch_s:[5,17],valid_it:9,valid_loss:9,valid_loss_md:9,valid_step:[5,9,17],valu:[2,3,4,5,9,11,12,13,15,17,18],variabl:[3,4,7,10,13],variat:0,variou:[4,14],vaswani:21,vaswanispujgkp17:0,vector:[11,14,17],venv:7,verbos:[13,17,18],veri:[0,14,18],versatil:14,version:[1,5,12,13,19],via:[11,21],vinyal:21,virtual:7,visit:0,visual:[14,17],vocab:[4,5,6,9,13,14,20],vocab_path:[15,17,18],vocab_s:[13,17,19],vocab_sample_queue_s:15,vocab_size_multipl:[17,18],vocabs_dict:10,vocabulari:[3,4,5,9,14,15,17,18,19,20],vsp:[11,21],wai:[3,5,13],wait:[3,5],wang:21,want:[3,18],warmup:17,warmup_step:[5,17],warn:[10,15,17,18],websit:[5,19],weight:[2,3,11,17,18],weight_decai:17,weighted_sampl:17,well:[0,6,17],wget:[4,5,19,20],what:[3,6,9,12],when:[0,3,8,10,11,13,15,17,18,19,20],where:[2,3,6,7,11,13,14,15,17,18],wherea:[13,17],whether:[9,11,12,13,15,17,18],which:[1,3,4,11,13,14,17,19,20],whl:7,whole:13,whose:18,why:2,wiki:17,wikipedia:17,window:[15,17,18],with_align:9,within:[2,11,12],without:[0,7,17],wojciech:21,wolfgang:21,word2vec:[17,18],word:[2,11,13,14,15,17,18],word_align:13,word_lut:11,word_padding_idx:11,word_ratio_threshold:[15,17,18],word_vec_s:11,word_vocab_s:11,work:[0,3,7,13,17,20],workflow:8,workstat:[],world_siz:[5,17,20],would:[3,11,13,17,19],wouldn:4,wpdn18:[15,17,18,21],wrap:[11,12],wrapper:[4,5,9],writabl:3,write:[3,5,9,19],writer:9,wsc:[13,21],www:17,xavier_uniform:17,xent:9,xinyi:21,xiong:21,xvzf:19,xx_side_in:19,xx_side_out:19,xx_stream:19,xzvf:19,yaml:[4,5,15,17,18,20],year:[4,8,19],yet:13,yield:10,yml:[0,5,20],yonghui:21,yoon:8,you:[0,3,4,5,7,11,17,18,19,20,21],your:[0,3,4,6,7,19,20],your_config:[],your_path:5,your_project_name_is_your_account:7,your_venv_nam:7,your_vevn_nam:7,yourself:[8,19],yuan:21,yuntian:8,yyi:3,zaremba:21,zero:[3,9,11,13,15,17,18],zero_grad:9,zhang:21,zhifeng:21,ziemski:19,zihang:21,zip:19,zquez:8,zxs18:[11,21]},titles:["Contributors","About MAMMOTH","Attention Bridge","Config-config Tool","MAMMOTH Sharing Schemes","Training MAMMOTH 101","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Component-level Modularity","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"case":1,"class":13,"function":1,"while":1,Are:1,The:3,about:1,actual:3,adapt:[3,17],adapter_config:3,ae_path:3,ae_transform:3,algorithm:1,align:17,allocate_devic:3,altern:3,anatomi:14,ani:1,approach:1,argument:16,attent:[2,11,17],autoencod:3,beam:18,between:1,bridg:[2,11,14,17],build:15,can:1,capabl:1,citat:8,cluster_languag:3,command:[3,4],common:[15,17,18],complete_language_pair:3,compon:14,config:3,config_al:3,config_config:3,configur:[5,15,17,18,20],content:6,contribut:1,contributor:0,core:12,corpora:3,corpora_schedul:3,creation:5,custom:14,data:[5,10,15,17,18,19,20],dataset:[4,10],dec_sharing_group:3,decod:[4,11,13,17,18],denois:[15,17,18],develop:1,direct:19,directori:5,distanc:3,distance_matrix:3,docstr:0,document:1,doe:1,domain:1,download:[5,19],dynam:17,effici:18,embed:[11,17,18],enc_sharing_group:3,encod:[4,11,17],enhanc:1,environ:5,europarl:19,featur:[1,17],feedforwardattentionbridgelay:2,filter:[15,17,18],flexibl:14,framework:9,fulli:4,further:20,futur:1,gener:17,get:19,group:3,guidelin:0,has:1,how:1,infer:4,inferfeat:[15,17,18],initi:17,innov:1,input:3,instal:[7,8,20],introduc:1,issu:1,its:1,job:5,kei:[1,3],languag:3,level:[3,14],leverag:1,linattentionbridgelay:2,line:3,load:10,loader:10,log:[17,18],loss:9,lumi:7,made:1,mahti:7,mammoth:[1,4,5,20],manual:3,matrix:3,model:[5,9,12,14,17,18,19],modif:1,modul:11,modular:[4,14],n_gpus_per_nod:3,n_group:3,n_node:3,name:16,note:4,onmttok:[15,17,18],opennmt:1,optim:[9,17],opu:19,origin:1,other:3,overrid:3,overview:[4,8],own:1,parallel:14,paramet:[3,14],pars:19,particularli:1,path:19,penalti:18,perceiverattentionbridgelay:2,plan:1,prepar:[5,19,20],project:1,prune:[17,18],puhti:7,quickstart:[19,20],random:18,rate:17,read:20,refer:21,relationship:1,relev:19,remove_temporary_kei:3,report:1,repositori:1,reproduc:[15,17,18],run:7,sampl:18,scheme:4,score:13,search:18,sentencepiec:[5,19],separ:4,server:[12,16],set:19,set_transform:3,setup:[4,5],share:[3,4,14],sharing_group:3,shot:19,simpleattentionbridgelay:2,specif:1,specifi:3,src_path:3,stage:3,start:20,statu:1,step:[5,19,20],strategi:13,structur:14,sublay:11,submiss:5,subword:[15,17,18],suit:1,supervis:19,switchout:[15,17,18],system:4,task:[5,15,17,18],test:19,tgt_path:3,than:3,token:19,token_drop:[15,17,18],token_mask:[15,17,18],tool:3,top:3,train:[4,5,17,19,20],trainer:9,transform:[3,15,17,18],transformerattentionbridgelay:2,translat:[13,18,19,20],translation_config:3,translation_config_dir:3,trick:18,type:17,uniqu:1,unpc:19,unshar:4,usag:3,use:1,use_introduce_at_training_step:3,use_src_lang_token:3,use_weight:3,user:1,uses:1,valid:19,variabl:[5,19],vocab:[10,15,17,18,19],well:1,what:1,where:1,yaml:3,your:5,zero:19,zero_shot:3}})
\ No newline at end of file