diff --git a/_modules/onmt/trainer.html b/_modules/onmt/trainer.html index 53e16c0b..971bd762 100644 --- a/_modules/onmt/trainer.html +++ b/_modules/onmt/trainer.html @@ -558,12 +558,12 @@

Source code for onmt.trainer

                         valid_stats=valid_stats,
                     )
 
-                # Run patience mechanism
-                if self.earlystopper is not None:
-                    self.earlystopper(valid_stats, step)
-                    # If the patience has reached the limit, stop training
-                    if self.earlystopper.has_stopped():
-                        break
+            #     # Run patience mechanism
+            #     if self.earlystopper is not None:
+            #         self.earlystopper(valid_stats, step)
+            #         # If the patience has reached the limit, stop training
+            #         if self.earlystopper.has_stopped():
+            #             break
 
             if self.model_saver is not None and (save_checkpoint_steps != 0 and step % save_checkpoint_steps == 0):
                 self.model_saver.save(step, moving_average=self.moving_average)
@@ -625,10 +625,6 @@ 

Source code for onmt.trainer

         # Set model back to training mode.
         valid_model.train()
 
-        for p in self.model.parameters():
-            if hasattr(p, 'has_grad'):
-                p.has_grad = False
-
         return stats
def _gradient_accumulation_over_lang_pairs( @@ -643,7 +639,7 @@

Source code for onmt.trainer

             seen_comm_batches.add(comm_batch)
             if self.norm_method == "tokens":
                 num_tokens = (
-                    batch.labels[1:, :, 0].ne(self.train_loss_md[f'trainloss{metadata.tgt_lang}'].padding_idx).sum()
+                    batch.tgt[1:, :, 0].ne(self.train_loss_md[f'trainloss{metadata.tgt_lang}'].padding_idx).sum()
                 )
                 normalization += num_tokens.item()
             else:
@@ -663,9 +659,6 @@ 

Source code for onmt.trainer

             if src_lengths is not None:
                 report_stats.n_src_words += src_lengths.sum().item()
 
-            # tgt_outer corresponds to the target-side input. The expected
-            # decoder output will be read directly from the batch:
-            # cf. `onmt.utils.loss.CommonLossCompute._make_shard_state`
             tgt_outer = batch.tgt
 
             bptt = False
diff --git a/_modules/onmt/utils/loss.html b/_modules/onmt/utils/loss.html
index 003c46ea..10e968a6 100644
--- a/_modules/onmt/utils/loss.html
+++ b/_modules/onmt/utils/loss.html
@@ -357,19 +357,19 @@ 

Source code for onmt.utils.loss

             batch_stats.update(stats)
         return None, batch_stats
 
-    def _stats(self, loss, scores, labels):
+    def _stats(self, loss, scores, target):
         """
         Args:
             loss (:obj:`FloatTensor`): the loss computed by the loss criterion.
             scores (:obj:`FloatTensor`): a score for each possible output
-            labels (:obj:`FloatTensor`): true targets
+            target (:obj:`FloatTensor`): true targets
 
         Returns:
             :obj:`onmt.utils.Statistics` : statistics for this batch.
         """
         pred = scores.max(1)[1]
-        non_padding = labels.ne(self.padding_idx)
-        num_correct = pred.eq(labels).masked_select(non_padding).sum().item()
+        non_padding = target.ne(self.padding_idx)
+        num_correct = pred.eq(target).masked_select(non_padding).sum().item()
         num_non_padding = non_padding.sum().item()
         return onmt.utils.Statistics(loss.item(), num_non_padding, num_correct)
 
@@ -399,14 +399,14 @@ 

Source code for onmt.utils.loss

 
         self.confidence = 1.0 - label_smoothing
 
-    def forward(self, output, labels):
+    def forward(self, output, target):
         """
         output (FloatTensor): batch_size x n_classes
-        labels (LongTensor): batch_size
+        target (LongTensor): batch_size
         """
-        model_prob = self.one_hot.repeat(labels.size(0), 1)
-        model_prob.scatter_(1, labels.unsqueeze(1), self.confidence)
-        model_prob.masked_fill_((labels == self.ignore_index).unsqueeze(1), 0)
+        model_prob = self.one_hot.repeat(target.size(0), 1)
+        model_prob.scatter_(1, target.unsqueeze(1), self.confidence)
+        model_prob.masked_fill_((target == self.ignore_index).unsqueeze(1), 0)
 
         return F.kl_div(output, model_prob, reduction='sum')
 
@@ -440,14 +440,12 @@ 

Source code for onmt.utils.loss

         )
         shard_state.update({"std_attn": attns.get("std"), "coverage_attn": coverage})
 
-    def _compute_loss(
-        self, batch, output, target, labels, std_attn=None, coverage_attn=None, align_head=None, ref_align=None
-    ):
+    def _compute_loss(self, batch, output, target, std_attn=None, coverage_attn=None, align_head=None, ref_align=None):
 
         bottled_output = self._bottle(output)
 
         scores = self.generator(bottled_output)
-        gtruth = labels.view(-1)
+        gtruth = target.view(-1)
 
         loss = self.criterion(scores, gtruth)
         if self.lambda_coverage != 0.0:
@@ -507,9 +505,7 @@ 

Source code for onmt.utils.loss

         range_end = range_[1]
         shard_state = {
             "output": output,
-            # TODO: target here is likely unnecessary, as it now corresponds to target-side input
             "target": batch.tgt[range_start:range_end, :, 0],
-            "labels": batch.labels[range_start:range_end, :, 0],
         }
         if self.lambda_coverage != 0.0:
             self._add_coverage_shard_state(shard_state, attns)
diff --git a/_sources/config_config.md.txt b/_sources/config_config.md.txt
index d64773b1..650439de 100644
--- a/_sources/config_config.md.txt
+++ b/_sources/config_config.md.txt
@@ -34,7 +34,7 @@ The meta-parameters under the `config_config` key:
 Path templates for source and target corpora, respectively.
 The path templates can contain the following variables that will be substituted by `config_config`:
 
-- Directional corpus mode
+- Directional corpus mode 
   - `{src_lang}`: The source language of the task
   - `{tgt_lang}`: The target language of the task
   - `{lang_pair}`: `{src_lang}-{tgt_lang}` for convenience
@@ -99,7 +99,7 @@ Generate translation configs for zero-shot directions.
 #### `transforms` and `ae_transforms`
 
 A list of transforms, for translation tasks and autoencoder tasks, respectively.
-Use this to apply subword segmentation, e.g. using `sentencepiece`, and `denoising` noise for autoencoder.
+Use this to apply subword segmentation, e.g. using `sentencepiece`, and `bart` noise for autoencoder.
 Both of these may change the sequence length, necessitating a `filtertoolong` transform.
 
 #### `enc_sharing_groups` and `dec_sharing_groups`
diff --git a/config_config.html b/config_config.html
index c5cd31bf..9c38542f 100644
--- a/config_config.html
+++ b/config_config.html
@@ -321,7 +321,7 @@ 

zero_shot

transforms and ae_transforms

A list of transforms, for translation tasks and autoencoder tasks, respectively. -Use this to apply subword segmentation, e.g. using sentencepiece, and denoising noise for autoencoder. +Use this to apply subword segmentation, e.g. using sentencepiece, and bart noise for autoencoder. Both of these may change the sequence length, necessitating a filtertoolong transform.

diff --git a/index.html b/index.html index 2de8cc8b..d5511b43 100644 --- a/index.html +++ b/index.html @@ -241,14 +241,18 @@

ContentsConfiguration
  • Data
  • Vocab
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Reproducibility
  • @@ -258,14 +262,18 @@

    ContentsVocab
  • Pruning
  • Embeddings
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Model-Embeddings
  • Model-Embedding Features
  • Model- Task
  • @@ -295,14 +303,18 @@

    ContentsDecoding tricks
  • Logging
  • Efficiency
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Source and Target Languages
  • diff --git a/options/build_vocab.html b/options/build_vocab.html index a54d1cbc..a678e8b7 100644 --- a/options/build_vocab.html +++ b/options/build_vocab.html @@ -107,14 +107,18 @@
  • Configuration
  • Data
  • Vocab
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Reproducibility
  • @@ -199,13 +203,27 @@

    Build Vocab
    usage: build_vocab.py [-h] [-config CONFIG] [-save_config SAVE_CONFIG] -data
                           DATA [-skip_empty_level {silent,warning,error}]
    -                      [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]]
    +                      [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]]
                           -save_data SAVE_DATA [-overwrite] [-n_sample N_SAMPLE]
                           [-dump_samples] [-num_threads NUM_THREADS]
                           [-vocab_sample_queue_size VOCAB_SAMPLE_QUEUE_SIZE]
                           -src_vocab SRC_VOCAB [-tgt_vocab TGT_VOCAB]
                           [-share_vocab] [-vocab_paths VOCAB_PATHS]
                           [-src_feats_vocab SRC_FEATS_VOCAB]
    +                      [--permute_sent_ratio PERMUTE_SENT_RATIO]
    +                      [--rotate_ratio ROTATE_RATIO]
    +                      [--insert_ratio INSERT_RATIO]
    +                      [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
    +                      [--mask_length {subword,word,span-poisson}]
    +                      [--poisson_lambda POISSON_LAMBDA]
    +                      [--replace_length {-1,0,1}]
    +                      [--src_seq_length SRC_SEQ_LENGTH]
    +                      [--tgt_seq_length TGT_SEQ_LENGTH]
    +                      [--word_ratio_threshold WORD_RATIO_THRESHOLD]
    +                      [--rep_threshold REP_THRESHOLD]
    +                      [--rep_min_len REP_MIN_LEN] [--rep_max_len REP_MAX_LEN]
    +                      [--punct_threshold PUNCT_THRESHOLD]
    +                      [--nonzero_threshold NONZERO_THRESHOLD]
                           [--reversible_tokenization {joiner,spacer}]
                           [--prior_tokenization]
                           [-switchout_temperature SWITCHOUT_TEMPERATURE]
    @@ -224,17 +242,7 @@ 

    Build Vocab[-src_subword_type {none,sentencepiece,bpe}] [-tgt_subword_type {none,sentencepiece,bpe}] [-src_onmttok_kwargs SRC_ONMTTOK_KWARGS] - [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] - [--src_seq_length SRC_SEQ_LENGTH] - [--tgt_seq_length TGT_SEQ_LENGTH] - [--permute_sent_ratio PERMUTE_SENT_RATIO] - [--rotate_ratio ROTATE_RATIO] - [--insert_ratio INSERT_RATIO] - [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO] - [--mask_length {subword,word,span-poisson}] - [--poisson_lambda POISSON_LAMBDA] - [--replace_length {-1,0,1}] - [--denoising_objective {bart,mass}] [--seed SEED] + [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] [--seed SEED]

    +
    +

    Transform/BART

    +
    +

    Caution

    +

    This transform will not take effect when building vocabulary.

    +
    +
    +
    --permute_sent_ratio, -permute_sent_ratio
    +

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    +

    Default: 0.0

    +
    +
    --rotate_ratio, -rotate_ratio
    +

    Rotate this proportion of inputs.

    +

    Default: 0.0

    +
    +
    --insert_ratio, -insert_ratio
    +

    Insert this percentage of additional random tokens.

    +

    Default: 0.0

    +
    +
    --random_ratio, -random_ratio
    +

    Instead of using <mask>, use random token this often.

    +

    Default: 0.0

    +
    +
    --mask_ratio, -mask_ratio
    +

    Fraction of words/subwords that will be masked.

    +

    Default: 0.0

    +
    +
    --mask_length, -mask_length
    +

    Possible choices: subword, word, span-poisson

    +

    Length of masking window to apply.

    +

    Default: “subword”

    +
    +
    --poisson_lambda, -poisson_lambda
    +

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    +

    Default: 3.0

    +
    +
    --replace_length, -replace_length
    +

    Possible choices: -1, 0, 1

    +

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    +

    Default: -1

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --src_seq_length, -src_seq_length
    +

    Maximum source sequence length.

    +

    Default: 200

    +
    +
    --tgt_seq_length, -tgt_seq_length
    +

    Maximum target sequence length.

    +

    Default: 200

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --word_ratio_threshold, -word_ratio_threshold
    +

    Threshold for discarding sentences based on word ratio.

    +

    Default: 3

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --rep_threshold, -rep_threshold
    +

    Number of times the substring is repeated.

    +

    Default: 2

    +
    +
    --rep_min_len, -rep_min_len
    +

    Minimum length of the repeated pattern.

    +

    Default: 3

    +
    +
    --rep_max_len, -rep_max_len
    +

    Maximum length of the repeated pattern.

    +

    Default: 100

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --punct_threshold, -punct_threshold
    +

    Minimum penalty score for discarding sentences based on their terminal punctuation signs

    +

    Default: -2

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --nonzero_threshold, -nonzero_threshold
    +

    Threshold for discarding sentences based on numerals between the segments with zeros removed

    +

    Default: 0.5

    +
    +
    +

    Transform/InferFeats

    @@ -425,63 +533,6 @@

    Transform/Subword/ONMTTOK -

    Transform/Filter

    -
    -
    --src_seq_length, -src_seq_length
    -

    Maximum source sequence length.

    -

    Default: 200

    -
    -
    --tgt_seq_length, -tgt_seq_length
    -

    Maximum target sequence length.

    -

    Default: 200

    -
    -
    -

    -
    -

    Transform/Denoising AE

    -
    -
    --permute_sent_ratio, -permute_sent_ratio
    -

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    -

    Default: 0.0

    -
    -
    --rotate_ratio, -rotate_ratio
    -

    Rotate this proportion of inputs.

    -

    Default: 0.0

    -
    -
    --insert_ratio, -insert_ratio
    -

    Insert this percentage of additional random tokens.

    -

    Default: 0.0

    -
    -
    --random_ratio, -random_ratio
    -

    Instead of using <mask>, use random token this often. Incompatible with MASS

    -

    Default: 0.0

    -
    -
    --mask_ratio, -mask_ratio
    -

    Fraction of words/subwords that will be masked.

    -

    Default: 0.0

    -
    -
    --mask_length, -mask_length
    -

    Possible choices: subword, word, span-poisson

    -

    Length of masking window to apply.

    -

    Default: “subword”

    -
    -
    --poisson_lambda, -poisson_lambda
    -

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    -

    Default: 3.0

    -
    -
    --replace_length, -replace_length
    -

    Possible choices: -1, 0, 1

    -

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    -

    Default: -1

    -
    -
    --denoising_objective
    -

    Possible choices: bart, mass

    -

    choose between BART-style or MASS-style denoising objectives

    -

    Default: “bart”

    -
    -
    -

    Reproducibility

    diff --git a/options/train.html b/options/train.html index 129969dc..b47a45bc 100644 --- a/options/train.html +++ b/options/train.html @@ -110,14 +110,18 @@
  • Vocab
  • Pruning
  • Embeddings
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Model-Embeddings
  • Model-Embedding Features
  • Model- Task
  • @@ -216,7 +220,7 @@

    Train

    usage: train.py [-h] [-config CONFIG] [-save_config SAVE_CONFIG] -data DATA
                     [-skip_empty_level {silent,warning,error}]
    -                [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]]
    +                [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]]
                     [-save_data SAVE_DATA] [-overwrite] [-n_sample N_SAMPLE]
                     [-dump_transforms] -src_vocab SRC_VOCAB [-tgt_vocab TGT_VOCAB]
                     [-share_vocab] [-vocab_paths VOCAB_PATHS]
    @@ -232,6 +236,18 @@ 

    Train [-src_embeddings SRC_EMBEDDINGS] [-tgt_embeddings TGT_EMBEDDINGS] [-embeddings_type {GloVe,word2vec}] + [--permute_sent_ratio PERMUTE_SENT_RATIO] + [--rotate_ratio ROTATE_RATIO] [--insert_ratio INSERT_RATIO] + [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO] + [--mask_length {subword,word,span-poisson}] + [--poisson_lambda POISSON_LAMBDA] [--replace_length {-1,0,1}] + [--src_seq_length SRC_SEQ_LENGTH] + [--tgt_seq_length TGT_SEQ_LENGTH] + [--word_ratio_threshold WORD_RATIO_THRESHOLD] + [--rep_threshold REP_THRESHOLD] [--rep_min_len REP_MIN_LEN] + [--rep_max_len REP_MAX_LEN] + [--punct_threshold PUNCT_THRESHOLD] + [--nonzero_threshold NONZERO_THRESHOLD] [--reversible_tokenization {joiner,spacer}] [--prior_tokenization] [-switchout_temperature SWITCHOUT_TEMPERATURE] @@ -251,14 +267,6 @@

    Train [-tgt_subword_type {none,sentencepiece,bpe}] [-src_onmttok_kwargs SRC_ONMTTOK_KWARGS] [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] - [--src_seq_length SRC_SEQ_LENGTH] - [--tgt_seq_length TGT_SEQ_LENGTH] - [--permute_sent_ratio PERMUTE_SENT_RATIO] - [--rotate_ratio ROTATE_RATIO] [--insert_ratio INSERT_RATIO] - [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO] - [--mask_length {subword,word,span-poisson}] - [--poisson_lambda POISSON_LAMBDA] [--replace_length {-1,0,1}] - [--denoising_objective {bart,mass}] [--src_word_vec_size SRC_WORD_VEC_SIZE] [--tgt_word_vec_size TGT_WORD_VEC_SIZE] [--word_vec_size WORD_VEC_SIZE] [--share_decoder_embeddings] @@ -377,7 +385,7 @@

    Data

    Default: “warning”

    -transforms, --transforms
    -

    Possible choices: filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, filtertoolong, prefix, denoising

    +

    Possible choices: bart, filtertoolong, filterwordratio, filterrepetitions, filterterminalpunct, filternonzeronumerals, filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, prefix

    Default transform pipeline to apply to data. Can be specified in each corpus of data to override.

    Default: []

    @@ -468,6 +476,102 @@

    Embeddings +

    Transform/BART

    +
    +
    --permute_sent_ratio, -permute_sent_ratio
    +

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    +

    Default: 0.0

    +
    +
    --rotate_ratio, -rotate_ratio
    +

    Rotate this proportion of inputs.

    +

    Default: 0.0

    +
    +
    --insert_ratio, -insert_ratio
    +

    Insert this percentage of additional random tokens.

    +

    Default: 0.0

    +
    +
    --random_ratio, -random_ratio
    +

    Instead of using <mask>, use random token this often.

    +

    Default: 0.0

    +
    +
    --mask_ratio, -mask_ratio
    +

    Fraction of words/subwords that will be masked.

    +

    Default: 0.0

    +
    +
    --mask_length, -mask_length
    +

    Possible choices: subword, word, span-poisson

    +

    Length of masking window to apply.

    +

    Default: “subword”

    +
    +
    --poisson_lambda, -poisson_lambda
    +

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    +

    Default: 3.0

    +
    +
    --replace_length, -replace_length
    +

    Possible choices: -1, 0, 1

    +

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    +

    Default: -1

    +
    +
    +

    +
    +

    Transform/Filter

    +
    +
    --src_seq_length, -src_seq_length
    +

    Maximum source sequence length.

    +

    Default: 200

    +
    +
    --tgt_seq_length, -tgt_seq_length
    +

    Maximum target sequence length.

    +

    Default: 200

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --word_ratio_threshold, -word_ratio_threshold
    +

    Threshold for discarding sentences based on word ratio.

    +

    Default: 3

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --rep_threshold, -rep_threshold
    +

    Number of times the substring is repeated.

    +

    Default: 2

    +
    +
    --rep_min_len, -rep_min_len
    +

    Minimum length of the repeated pattern.

    +

    Default: 3

    +
    +
    --rep_max_len, -rep_max_len
    +

    Maximum length of the repeated pattern.

    +

    Default: 100

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --punct_threshold, -punct_threshold
    +

    Minimum penalty score for discarding sentences based on their terminal punctuation signs

    +

    Default: -2

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --nonzero_threshold, -nonzero_threshold
    +

    Threshold for discarding sentences based on numerals between the segments with zeros removed

    +

    Default: 0.5

    +
    +
    +

    Transform/InferFeats

    @@ -579,63 +683,6 @@

    Transform/Subword/ONMTTOK -

    Transform/Filter

    -
    -
    --src_seq_length, -src_seq_length
    -

    Maximum source sequence length.

    -

    Default: 200

    -
    -
    --tgt_seq_length, -tgt_seq_length
    -

    Maximum target sequence length.

    -

    Default: 200

    -
    -
    -

    -
    -

    Transform/Denoising AE

    -
    -
    --permute_sent_ratio, -permute_sent_ratio
    -

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    -

    Default: 0.0

    -
    -
    --rotate_ratio, -rotate_ratio
    -

    Rotate this proportion of inputs.

    -

    Default: 0.0

    -
    -
    --insert_ratio, -insert_ratio
    -

    Insert this percentage of additional random tokens.

    -

    Default: 0.0

    -
    -
    --random_ratio, -random_ratio
    -

    Instead of using <mask>, use random token this often. Incompatible with MASS

    -

    Default: 0.0

    -
    -
    --mask_ratio, -mask_ratio
    -

    Fraction of words/subwords that will be masked.

    -

    Default: 0.0

    -
    -
    --mask_length, -mask_length
    -

    Possible choices: subword, word, span-poisson

    -

    Length of masking window to apply.

    -

    Default: “subword”

    -
    -
    --poisson_lambda, -poisson_lambda
    -

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    -

    Default: 3.0

    -
    -
    --replace_length, -replace_length
    -

    Possible choices: -1, 0, 1

    -

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    -

    Default: -1

    -
    -
    --denoising_objective
    -

    Possible choices: bart, mass

    -

    choose between BART-style or MASS-style denoising objectives

    -

    Default: “bart”

    -
    -
    -

    Model-Embeddings

    diff --git a/options/translate.html b/options/translate.html index f583e4cb..5bbccc07 100644 --- a/options/translate.html +++ b/options/translate.html @@ -116,14 +116,18 @@
  • Decoding tricks
  • Logging
  • Efficiency
  • +
  • Transform/BART
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • +
  • Transform/Filter
  • Transform/InferFeats
  • Transform/SwitchOut
  • Transform/Token_Drop
  • Transform/Token_Mask
  • Transform/Subword/Common
  • Transform/Subword/ONMTTOK
  • -
  • Transform/Filter
  • -
  • Transform/Denoising AE
  • Source and Target Languages
  • @@ -228,7 +232,21 @@

    Translate[--dump_beam DUMP_BEAM] [--n_best N_BEST] [--batch_size BATCH_SIZE] [--batch_type {sents,tokens}] [--gpu GPU] - [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]] + [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]] + [--permute_sent_ratio PERMUTE_SENT_RATIO] + [--rotate_ratio ROTATE_RATIO] + [--insert_ratio INSERT_RATIO] + [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO] + [--mask_length {subword,word,span-poisson}] + [--poisson_lambda POISSON_LAMBDA] + [--replace_length {-1,0,1}] + [--src_seq_length SRC_SEQ_LENGTH] + [--tgt_seq_length TGT_SEQ_LENGTH] + [--word_ratio_threshold WORD_RATIO_THRESHOLD] + [--rep_threshold REP_THRESHOLD] + [--rep_min_len REP_MIN_LEN] [--rep_max_len REP_MAX_LEN] + [--punct_threshold PUNCT_THRESHOLD] + [--nonzero_threshold NONZERO_THRESHOLD] [--reversible_tokenization {joiner,spacer}] [--prior_tokenization] [-switchout_temperature SWITCHOUT_TEMPERATURE] @@ -247,18 +265,8 @@

    Translate[-src_subword_type {none,sentencepiece,bpe}] [-tgt_subword_type {none,sentencepiece,bpe}] [-src_onmttok_kwargs SRC_ONMTTOK_KWARGS] - [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] - [--src_seq_length SRC_SEQ_LENGTH] - [--tgt_seq_length TGT_SEQ_LENGTH] - [--permute_sent_ratio PERMUTE_SENT_RATIO] - [--rotate_ratio ROTATE_RATIO] - [--insert_ratio INSERT_RATIO] - [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO] - [--mask_length {subword,word,span-poisson}] - [--poisson_lambda POISSON_LAMBDA] - [--replace_length {-1,0,1}] - [--denoising_objective {bart,mass}] --src_lang SRC_LANG - --tgt_lang TGT_LANG --stack STACK + [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] --src_lang + SRC_LANG --tgt_lang TGT_LANG --stack STACK [--output_model OUTPUT_MODEL]

    @@ -499,12 +507,108 @@

    Efficiency +

    Transform/BART

    +
    +
    --permute_sent_ratio, -permute_sent_ratio
    +

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    +

    Default: 0.0

    +
    +
    --rotate_ratio, -rotate_ratio
    +

    Rotate this proportion of inputs.

    +

    Default: 0.0

    +
    +
    --insert_ratio, -insert_ratio
    +

    Insert this percentage of additional random tokens.

    +

    Default: 0.0

    +
    +
    --random_ratio, -random_ratio
    +

    Instead of using <mask>, use random token this often.

    +

    Default: 0.0

    +
    +
    --mask_ratio, -mask_ratio
    +

    Fraction of words/subwords that will be masked.

    +

    Default: 0.0

    +
    +
    --mask_length, -mask_length
    +

    Possible choices: subword, word, span-poisson

    +

    Length of masking window to apply.

    +

    Default: “subword”

    +
    +
    --poisson_lambda, -poisson_lambda
    +

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    +

    Default: 3.0

    +
    +
    --replace_length, -replace_length
    +

    Possible choices: -1, 0, 1

    +

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    +

    Default: -1

    +
    +
    +

    +
    +

    Transform/Filter

    +
    +
    --src_seq_length, -src_seq_length
    +

    Maximum source sequence length.

    +

    Default: 200

    +
    +
    --tgt_seq_length, -tgt_seq_length
    +

    Maximum target sequence length.

    +

    Default: 200

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --word_ratio_threshold, -word_ratio_threshold
    +

    Threshold for discarding sentences based on word ratio.

    +

    Default: 3

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --rep_threshold, -rep_threshold
    +

    Number of times the substring is repeated.

    +

    Default: 2

    +
    +
    --rep_min_len, -rep_min_len
    +

    Minimum length of the repeated pattern.

    +

    Default: 3

    +
    +
    --rep_max_len, -rep_max_len
    +

    Maximum length of the repeated pattern.

    +

    Default: 100

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --punct_threshold, -punct_threshold
    +

    Minimum penalty score for discarding sentences based on their terminal punctuation signs

    +

    Default: -2

    +
    +
    +
    +
    +

    Transform/Filter

    +
    +
    --nonzero_threshold, -nonzero_threshold
    +

    Threshold for discarding sentences based on numerals between the segments with zeros removed

    +

    Default: 0.5

    +
    +
    +

    Transform/InferFeats

    @@ -616,63 +720,6 @@

    Transform/Subword/ONMTTOK -

    Transform/Filter

    -
    -
    --src_seq_length, -src_seq_length
    -

    Maximum source sequence length.

    -

    Default: 200

    -
    -
    --tgt_seq_length, -tgt_seq_length
    -

    Maximum target sequence length.

    -

    Default: 200

    -
    -
    -

    -
    -

    Transform/Denoising AE

    -
    -
    --permute_sent_ratio, -permute_sent_ratio
    -

    Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.

    -

    Default: 0.0

    -
    -
    --rotate_ratio, -rotate_ratio
    -

    Rotate this proportion of inputs.

    -

    Default: 0.0

    -
    -
    --insert_ratio, -insert_ratio
    -

    Insert this percentage of additional random tokens.

    -

    Default: 0.0

    -
    -
    --random_ratio, -random_ratio
    -

    Instead of using <mask>, use random token this often. Incompatible with MASS

    -

    Default: 0.0

    -
    -
    --mask_ratio, -mask_ratio
    -

    Fraction of words/subwords that will be masked.

    -

    Default: 0.0

    -
    -
    --mask_length, -mask_length
    -

    Possible choices: subword, word, span-poisson

    -

    Length of masking window to apply.

    -

    Default: “subword”

    -
    -
    --poisson_lambda, -poisson_lambda
    -

    Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.

    -

    Default: 3.0

    -
    -
    --replace_length, -replace_length
    -

    Possible choices: -1, 0, 1

    -

    When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)

    -

    Default: -1

    -
    -
    --denoising_objective
    -

    Possible choices: bart, mass

    -

    choose between BART-style or MASS-style denoising objectives

    -

    Default: “bart”

    -
    -
    -

    Source and Target Languages

    diff --git a/searchindex.js b/searchindex.js index 7e3f7143..859f8a1a 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/Translation","index","install","main","onmt","onmt.inputters","onmt.modules","onmt.translate.translation_server","onmt.translation","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/Translation.md","index.rst","install.md","main.md","onmt.rst","onmt.inputters.rst","onmt.modules.rst","onmt.translate.translation_server.rst","onmt.translation.rst","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"onmt.Trainer":{train:[8,1,1,""],validate:[8,1,1,""]},"onmt.decoders":{CNNDecoder:[10,0,1,""],DecoderBase:[10,0,1,""],InputFeedRNNDecoder:[10,0,1,""],StdRNNDecoder:[10,0,1,""],TransformerDecoder:[10,0,1,""]},"onmt.decoders.CNNDecoder":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.decoders.DecoderBase":{from_opt:[10,1,1,""]},"onmt.decoders.TransformerDecoder":{forward:[10,1,1,""]},"onmt.decoders.decoder":{RNNDecoderBase:[10,0,1,""]},"onmt.decoders.decoder.RNNDecoderBase":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.encoders":{CNNEncoder:[10,0,1,""],EncoderBase:[10,0,1,""],MeanEncoder:[10,0,1,""],RNNEncoder:[10,0,1,""],TransformerEncoder:[10,0,1,""]},"onmt.encoders.CNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.EncoderBase":{forward:[10,1,1,""]},"onmt.encoders.MeanEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.RNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.TransformerEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.models":{NMTModel:[8,0,1,""]},"onmt.models.NMTModel":{count_parameters:[8,1,1,""],forward:[8,1,1,""]},"onmt.models.sru":{SRU:[10,0,1,""]},"onmt.models.sru.SRU":{forward:[10,1,1,""]},"onmt.modules":{AverageAttention:[10,0,1,""],ConvMultiStepAttention:[10,0,1,""],CopyGenerator:[10,0,1,""],Embeddings:[10,0,1,""],GlobalAttention:[10,0,1,""],MultiHeadedAttention:[10,0,1,""],PositionalEncoding:[10,0,1,""],WeightNormConv2d:[10,0,1,""]},"onmt.modules.AverageAttention":{cumulative_average:[10,1,1,""],cumulative_average_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.ConvMultiStepAttention":{apply_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.CopyGenerator":{forward:[10,1,1,""]},"onmt.modules.Embeddings":{emb_luts:[10,1,1,""],forward:[10,1,1,""],load_pretrained_vectors:[10,1,1,""],word_lut:[10,1,1,""]},"onmt.modules.GlobalAttention":{forward:[10,1,1,""],score:[10,1,1,""]},"onmt.modules.MultiHeadedAttention":{forward:[10,1,1,""],training:[10,2,1,""],update_dropout:[10,1,1,""]},"onmt.modules.PositionalEncoding":{forward:[10,1,1,""]},"onmt.modules.WeightNormConv2d":{forward:[10,1,1,""]},"onmt.modules.position_ffn":{PositionwiseFeedForward:[10,0,1,""]},"onmt.modules.position_ffn.PositionwiseFeedForward":{forward:[10,1,1,""]},"onmt.modules.structured_attention":{MatrixTree:[10,0,1,""]},"onmt.modules.structured_attention.MatrixTree":{forward:[10,1,1,""]},"onmt.translate":{BeamSearch:[12,0,1,""],DecodeStrategy:[12,0,1,""],GNMTGlobalScorer:[12,0,1,""],GreedySearch:[12,0,1,""],Translation:[12,0,1,""],TranslationBuilder:[12,0,1,""],Translator:[12,0,1,""]},"onmt.translate.BeamSearch":{initialize:[12,1,1,""]},"onmt.translate.DecodeStrategy":{advance:[12,1,1,""],block_ngram_repeats:[12,1,1,""],initialize:[12,1,1,""],maybe_update_forbidden_tokens:[12,1,1,""],maybe_update_target_prefix:[12,1,1,""],target_prefixing:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.GreedySearch":{advance:[12,1,1,""],initialize:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.Translation":{log:[12,1,1,""]},"onmt.translate.Translator":{translate_batch:[12,1,1,""]},"onmt.translate.greedy_search":{sample_with_temperature:[12,3,1,""]},"onmt.translate.penalties":{PenaltyBuilder:[12,0,1,""]},"onmt.translate.penalties.PenaltyBuilder":{coverage_none:[12,1,1,""],coverage_summary:[12,1,1,""],coverage_wu:[12,1,1,""],length_average:[12,1,1,""],length_none:[12,1,1,""],length_wu:[12,1,1,""]},"onmt.translate.translation_server":{ServerModel:[11,0,1,""],ServerModelError:[11,4,1,""],Timer:[11,0,1,""],TranslationServer:[11,0,1,""]},"onmt.translate.translation_server.ServerModel":{build_tokenizer:[11,1,1,""],detokenize:[11,1,1,""],do_timeout:[11,1,1,""],maybe_convert_align:[11,1,1,""],maybe_detokenize:[11,1,1,""],maybe_detokenize_with_align:[11,1,1,""],maybe_postprocess:[11,1,1,""],maybe_preprocess:[11,1,1,""],maybe_tokenize:[11,1,1,""],parse_opt:[11,1,1,""],postprocess:[11,1,1,""],preprocess:[11,1,1,""],rebuild_seg_packages:[11,1,1,""],to_gpu:[11,1,1,""],tokenize:[11,1,1,""],tokenizer_marker:[11,1,1,""]},"onmt.translate.translation_server.TranslationServer":{clone_model:[11,1,1,""],list_models:[11,1,1,""],load_model:[11,1,1,""],preload_model:[11,1,1,""],run:[11,1,1,""],start:[11,1,1,""],unload_model:[11,1,1,""]},"onmt.utils":{Optimizer:[8,0,1,""],Statistics:[8,0,1,""]},"onmt.utils.Optimizer":{amp:[8,1,1,""],backward:[8,1,1,""],from_opt:[8,1,1,""],learning_rate:[8,1,1,""],step:[8,1,1,""],training_step:[8,1,1,""],zero_grad:[8,1,1,""]},"onmt.utils.Statistics":{accuracy:[8,1,1,""],all_gather_stats:[8,1,1,""],all_gather_stats_list:[8,1,1,""],elapsed_time:[8,1,1,""],log_tensorboard:[8,1,1,""],output:[8,1,1,""],ppl:[8,1,1,""],update:[8,1,1,""],xent:[8,1,1,""]},"onmt.utils.loss":{LossComputeBase:[8,0,1,""]},onmt:{Trainer:[8,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:function","4":"py:exception"},terms:{"0473v3":19,"25g":6,"abstract":10,"boolean":[8,12],"break":17,"class":[0,5,8,10,11],"default":[11,13,14,15,16,17],"export":6,"final":[2,4,10,12],"float":[3,10,12],"function":[0,1,2,3,8,10,11,12,15],"import":0,"int":[8,10,11,12],"long":0,"new":[0,2,4],"public":6,"return":[0,8,10,11,12],"static":[8,15],"true":[3,4,8,10,12,15,16,17],"try":[0,6],"while":[3,10],And:[0,10],But:1,EOS:12,For:[0,1,3,12,15,18],IDs:12,IFS:17,LPs:3,Not:0,One:3,The:[1,2,4,8,10,11,12,15,16],Then:[0,4,10],There:[2,3],These:[2,3,10,12],Use:[3,15,16],Used:12,Will:[1,3],__init__:11,_compute_loss:8,a_j:10,aan:[10,15],aan_useffn:[10,15],ab_fixed_length:15,ab_lay:15,ab_layer_norm:15,abbrevi:0,abigail:19,abil:10,about:0,abov:[0,12],abs:[2,15,16,19],acceler:[10,19],accept:[0,3,12],access:[2,3,6],accord:3,account:[3,6],accross:8,accum:8,accum_count:[4,8,15],accum_step:[4,8,15],accumul:[8,15],accuraci:[8,12],achiev:3,achin:19,acl:[7,19],aclweb:15,act:1,action:[10,12,15],activ:[1,2,6,10,15],activation_fn:10,activationfunct:10,actual:12,adadelta:15,adafactor:15,adagrad:15,adagrad_accumulator_init:15,adam:[4,15],adam_beta1:15,adam_beta2:[4,15],adamoptim:15,adamw:15,adapt:[5,7],adapter_nam:3,add:[0,4,10],added:3,adding:0,addit:[0,10,13,15,16],addition:10,address:12,adjust:3,adopt:15,adpot:10,advanc:[12,15],advic:0,after:[0,2,10,12,15],afterward:10,again:0,agnost:1,aidan:19,alexand:7,algorithm:19,align:[5,8,10,11,12,16,19],align_debug:16,alignment_head:[10,15],alignment_lay:[10,15],aliv:12,alive_attn:12,alive_seq:12,all:[0,3,8,10,12,13,15,16,19],all_gather_stat:8,all_gather_stats_list:8,all_preprocess:11,allennlp:0,alloc:3,allow:[0,2,3,15],almost:[12,15],alon:0,along:2,alpha:[2,12,16],alphabet:3,alreadi:[13,15,16],also:[0,3,6,8,10,15],altern:10,although:10,alwai:[0,3],amp:[8,15],ani:[0,1,3,12,13,15],annurev:19,anoth:[0,2,8],antholog:15,apex:15,apex_opt_level:15,api:[0,5],api_doc:15,appear:3,append:[6,17],appli:[2,3,10,12,13,15,16],applic:16,apply_mask:10,approach:10,appropri:12,approxim:15,architectur:[2,5],arg:[0,10,11],argmax:16,argpars:11,argument:[0,5],around:10,artzi:19,arxiv:[0,2,15,16,19],ashish:19,assig:3,assign:[3,16],assing:3,assum:[10,12],att_typ:2,attend:2,attent:[0,1,5,8,12,16,19],attention_bridg:8,attention_dropout:[4,10,15],attentionbridgenorm:2,attet:10,attn:[10,12,16],attn_debug:[12,16],attn_func:10,attn_typ:10,attr:11,attribut:12,augment:19,auli:19,author:[7,10],autodoc:0,autogener:15,avail:[8,10,11,15,16],available_model:14,averag:[10,15,16,19],average_decai:[4,8,15],average_everi:[8,15],average_output:10,averageattent:10,avg:[10,16],avg_raw_prob:16,avoid:[0,3],aws:6,axi:12,back:8,backend:15,backward:8,bahdanau:[10,15,19],ban_unk_token:[12,16],bank:[5,10],barri:19,bart:[13,15,16],base:[0,2,3,4,6,7,8,10,11,12,13,15,16,19],base_target_emb:10,baselin:15,basemodel:8,basenam:[4,17],bash:6,batch:[2,4,8,10,12,15,16],batch_siz:[4,10,12,15,16],batch_size_multipl:[4,15],batch_typ:[4,15,16],bcb14:[10,19],beam:[5,12],beam_search:12,beam_siz:[4,12,16],beamsearch:12,beamsearchbas:12,becaus:[3,16],becom:3,been:[10,12,13,15,16],befor:[0,4,11,12,15,16],begin:[8,12],behind:5,below:0,ben:3,benefit:1,bengali:3,bengio:19,best:[12,16],beta1:15,beta2:15,beta:[12,16],better:[0,13,15,16],between:[2,5,10,13,15,16,19],beyond:8,biao:19,bib:0,bibtex:0,bibtext:0,bidir_edg:15,bidirect:[10,15],bidirectional_encod:10,bin:[6,15],binari:[4,10],bit:16,blank:0,bleu:4,blob:10,block:[12,16],block_ngram_repeat:[12,16],booktitl:7,bool:[8,10,11,12],bos:12,both:[3,12,15],both_embed:15,boundari:[13,15,16],bpe:[13,15,16],bptt:[8,15],bridg:[1,5,19],bridge_extra_nod:15,bring:1,brnn:15,browser:0,bucket_s:[4,15],buffer:8,build:[0,5,8,10,11,12,16,17],build_token:11,build_vocab:13,built:8,bytetensor:12,cach:10,calcul:[2,8,10,12],call:[10,12],callabl:12,callback:8,can:[2,3,4,6,8,11,12,13,15,16],cancel:11,candid:[3,13,15,16],cao:19,capit:0,captur:2,care:10,cat:17,categor:12,categori:12,challeng:5,chang:[0,3,8,15],channel:2,charact:[0,16],character_coverag:17,check:[0,7,18],checklist:0,checkpoint:[4,8,15],chen:19,chmod:[4,6],cho:19,choic:[0,10,13,15,16],choos:[0,13,15,16],chosen:12,christoph:19,citat:[0,5],cite:[0,7],classmethod:[8,10],clear:0,clone:[7,11,18],clone_model:11,close:0,cls:8,cluster:[3,7,18],clutter:0,cnn:[10,15,19],cnn_kernel_width:[10,15],cnndecod:10,cnnencod:10,code:[0,3,6,16],code_dir:6,codebas:6,column:3,com:[7,10,18],combin:[10,16],comma:3,command:[4,5],comment:0,commentari:4,common:[0,5],commoncrawl:4,commun:0,complet:12,complex:[3,12],compon:[2,3],composit:15,comput:[2,3,4,8,10,15,16],concat:[10,15],concaten:15,condit:[12,15,16],conf:[14,16],config:[4,5,11,13,14,15,16],config_fil:11,configur:[3,4,5],connect:2,consid:[3,10,17],consider:15,consist:[0,10],constant:3,construct:10,constructor:[0,10],consum:15,contain:[1,3,10,11,12],content:[0,16],context:[2,10,15],context_attn:10,context_g:[10,15],contextg:10,continu:0,contribut:[0,2,10],contributor:5,control:[3,8],conv2conv:5,conv2d:10,conv:[10,15],conveni:3,convent:0,convers:12,convert:11,convex:10,convmultistepattent:10,convolut:[10,19],copi:[0,3,5,6,15,16],copy_attn:[10,12,15],copy_attn_forc:15,copy_attn_typ:[10,15],copy_loss_by_seqlength:15,copygener:10,core:[2,5,8],corpora:4,corpu:[3,4,13,15,17],corr:[0,19],correct:3,correspand:11,correspond:[2,16],could:12,count:[3,8,12,13,15,16],count_paramet:8,cov:12,cov_pen:12,coverag:[10,12,15,16],coverage_attn:[10,15],coverage_non:12,coverage_penalti:[12,16],coverage_summari:12,coverage_wu:12,cpu:[11,15,16],crai:6,crayon:15,creat:[3,6,8],creation:3,criteria:15,criterion:8,critic:[15,16],cross:[8,10,15],cross_queri:1,csc:17,csv:3,ct2_model:11,ct2_translate_batch_arg:11,ct2_translator_arg:11,ctrl:0,cuda_funct:10,cudnn:10,cumbersom:3,cumul:[10,12,16],cumulative_averag:10,cumulative_average_mask:10,cur_dir:17,cur_len:12,current:[3,8,10,12,15],curricula:3,curriculum:3,custom:[11,15],custom_opt:11,cut:[0,17],cutoff:12,d_ff:10,d_model:10,dai:19,data:[2,3,5,8,12,19],data_path:17,data_typ:[8,12,15,16],dataset:[4,5,13,15,16,17],datastructur:11,dauphin:19,david:19,dblp:0,ddress:19,deal:3,debug:[14,15,16],dec:3,dec_lay:[4,15],dec_out:10,dec_rnn_siz:15,decai:15,decay_method:[4,15],decay_step:15,decod:[2,3,5,8],decode_strategi:12,decoder_typ:[4,15],decoderbas:[8,10],decodestrategi:12,def:0,defin:[3,4,10,13,15,16],definit:10,delai:3,delet:[13,15,16],delimit:16,deng:7,deni:19,denois:[3,5],denoising_object:[13,15,16],denot:2,depend:[0,1,3,6,8,10,11],deprec:[15,16],describ:[2,10,11,15],descript:0,desir:[3,4],detail:[7,13,15],determin:3,detoken:[4,11],dev:[6,17],develop:0,devic:[3,10,12,16],device_context:8,deyi:19,diagon:3,dict:[3,8,10,11,12,13,15,16],dict_kei:15,dictionari:[8,10,12,15],differ:[0,2,3,5,10,11,16],dilat:10,dim:10,dimens:[2,10,12,15],dimension:[2,10],dir:17,direct:[0,3,12],directli:[0,10,16],directori:[3,6,11,15],disabl:15,discard:15,discourag:15,disk:15,displai:8,dist:8,distanc:[10,15],distribtut:10,distribut:[3,8,10,12,13,15,16],divers:[2,13,15,16],divid:[2,3,15,16],divis:10,do_timeout:11,doc:0,document:[0,7],doe:[1,3,16],doesn:17,doi:[7,19],doing:[3,16],don:0,done:[4,12,17],dot:[2,10,15],dotprod:15,down:[12,13],download:6,dropout:[4,8,10,13,15,16],dropout_step:[4,8,15],due:15,dump:[13,15,16],dump_beam:[12,16],dump_sampl:13,dump_transform:15,dure:[11,15,16],dynam:[5,10,16],dzmitri:19,each:[2,3,10,12,13,15,16],earli:15,earlier:[2,13,15,16],early_stop:15,early_stopping_criteria:15,earlystopp:8,eas:3,easi:0,easili:3,echo:[4,17],edg:15,effect:[2,10,11,13],effici:[5,8,19],either:[12,15],elaps:8,elapsed_tim:8,element:[2,3],els:[1,17],emb:10,emb_fil:10,emb_lut:10,embed:[2,5,10,13],embedding_s:10,embeddings_typ:15,emerg:2,emnlp:19,emploi:[2,8],empti:[4,10,12,13,15],enabl:[10,16],enc:3,enc_hidden:10,enc_lay:[4,15],enc_rnn_siz:15,encapsul:2,encod:[2,3,5,8,12],encoder_fin:10,encoder_out_combin:10,encoder_out_top:10,encoder_output:5,encoder_typ:[4,15],encoderbas:[8,10],encordec:[13,15],encount:[13,15],encout:[13,15],end:12,eng:3,english:[1,3,4,17],enhanc:[1,2],ensembl:16,ensur:2,entir:17,entri:0,entropi:8,env_dir:6,environ:6,eos:12,epoch:15,eps:10,epsilon:15,equal:[12,15],equat:10,equival:15,error:[0,13,15,16],especi:3,essenti:12,establish:2,eural:19,europarl:4,evalu:8,even:3,event:12,everi:[8,10,15,16],exactli:0,exampl:[0,1,3,4,13,15,18],exce:15,except:[0,11,13,15,16],exclusion_token:12,execut:[4,13,15],exist:[13,15,16,17],exp:15,exp_host:15,expand:1,expect:[3,12],experi:[13,15,16],experiment:15,exponenti:15,extend:[0,10],extern:0,extra:[6,10,15],extra_word:10,extract:17,facilit:2,fail:12,fairseq:0,fals:[8,10,11,12,13,14,15,16],familiar:7,fast:[10,19],faster:[10,15],feat_0:16,feat_1:16,feat_dim_expon:10,feat_merg:[10,15],feat_merge_s:15,feat_padding_idx:10,feat_vec_expon:[10,15],feat_vec_s:[10,15],feat_vocab_s:10,feats0:16,feats1:16,featur:[2,5,8,10,13,16,19],fed:2,feed:[3,10,15],feedforward:[2,15],feedforwardattentionbridgelay:5,feel:0,few:0,ffectiv:19,ffn:[10,15],figur:10,file:[0,3,11,13,15,16,17],filenam:15,filter:[4,5,17],filterfeat:[13,15,16],filtertoolong:[3,4,13,15,16],find:0,firefox:0,first:[0,3,10,12,15],five:2,fix:[0,5,12,15],flag:8,flake8:0,floattensor:[8,10,12],flow:2,fly:4,fnn:10,focu:[0,2],folder:0,follow:[0,2,3,4,16,18],foo:0,forbidden:12,forbidden_token:12,forc:[12,16],format:[0,11,13,15,16,17],former:10,forward:[3,8,10,15],fotran:3,found:17,foundat:2,fp16:[15,16],fp32:[4,8,15,16],frac:2,fraction:[13,15,16],framework:[5,15],free:[0,11],freez:[10,15],freeze_word_vec:10,freeze_word_vecs_dec:15,freeze_word_vecs_enc:15,french:1,frequenc:[13,15,16],from:[2,3,8,10,12,15,16,17],from_opt:[8,10],frozenset:12,full:[0,3,10,11,13,15,16,17],full_context_align:[10,15],fulli:[3,10],further:[13,15],fusedadam:15,gag:[10,19],gao:19,gap:19,garg:15,gate:15,gather:8,gating_output:10,gehr:19,gelu:15,gener:[0,2,3,4,5,8,10,12,16,19],generator_funct:15,german:4,get:[5,6,19],ggnn:15,git:[7,18],github:[7,10,15,18],give:[3,15,16],given:[2,3,11],global:10,global_attent:15,global_attention_funct:15,global_scor:12,globalattent:10,glove:15,gnmt:12,gnmtglobalscor:12,going:12,gold:12,gold_scor:12,gold_sent:12,gomez:19,gone:15,good:[0,15],googl:[0,12,16,19],gpu:[3,4,6,11,12,15,16],gpu_backend:15,gpu_rank:[4,15],gpu_verbose_level:[8,15],gpuid:15,grad:8,gradient:[8,15],graham:19,gram:12,grangier:19,graph:15,gre:6,greater:12,greedy_search:12,greedysearch:12,group:[10,15,16],groupwis:3,grow:12,gru:[10,15],gtx1080:16,guid:[7,10,18],guidelin:5,guillaum:7,h_j:10,h_s:10,h_t:10,had:16,haddow:19,hand:3,handl:[0,8],happen:12,has:[2,3,12,13,15,16],has_cov_pen:12,has_len_pen:12,has_tgt:12,have:[0,3,4,10,12,15,16],head:[2,4,10,15],head_count:10,help:[0,2,16],helsinki:[7,18],here:[2,12,17],hidden:[1,8,10,15],hidden_ab_s:15,hidden_dim:2,hidden_s:10,hieu:19,high:3,higher:[12,15,16],highest:16,hold:12,hook:10,hop:[1,2],host:6,how:[0,10],howev:[0,8,10],html:[0,15],http:[2,6,7,10,15,16,17,18,19],huge:15,human:[3,19],hyp_:4,hyperbol:2,hyphen:3,hypothesi:4,iclr:19,identifi:16,idl:3,ids:3,ignor:[4,10,13,15,16],ignore_when_block:[12,16],illia:19,ilya:19,imag:8,impact:15,implement:[2,8,10,15],impli:[2,10],improv:[10,12,15,19],in_channel:10,in_config:3,includ:[0,3,10,13,15,16],incompat:[13,15,16],incorpor:15,increas:3,independ:1,index:[6,10,15],indic:[2,8,10,12,13,15,16],individu:3,inf:12,infer:12,inferfeat:5,info:[15,16],inform:[1,2,3,15,16],ingredi:12,init:[10,15],init_scal:10,init_st:[8,10],initi:[5,8,10,11,12],initial_accumulator_valu:15,inner:10,inp:12,inp_seq_len:12,inproceed:7,input:[2,5,8,10,11,12,13,15,16,17,19],input_fe:15,input_feed:10,input_format:4,input_from_dec:10,input_len:10,input_s:10,input_sentence_s:17,inputfeedrnndecod:10,inputs_len:10,inputt:12,insert:[13,15,16],insert_ratio:[13,15,16],instal:[0,4,5],instanc:[8,10,12],instanti:8,instead:[0,3,6,10,13,15,16],instruct:15,int8:16,integ:12,integr:0,interact:6,interfac:[8,10],intermedi:[1,2],intermediate_output:[2,5],intern:11,interv:15,introduc:[2,3],introduct:3,intuit:5,invalid:[13,15,16],involv:2,is_finish:12,is_on_top:10,isn:12,item:10,iter:8,its:[0,3,10],itself:3,jakob:19,jean:7,jinsong:19,job:6,joiner:[13,15,16],jointli:[10,19],jona:19,jone:19,journal:0,json:14,kaiser:19,keep:[11,12,15],keep_checkpoint:[4,15],keep_stat:15,keep_topk:12,keep_topp:12,kei:10,kera:15,kernel_s:[10,15],key_len:10,kim:7,klau:19,klein:7,krikun:19,kwarg:10,kyunghyun:19,label:15,label_smooth:[4,15],lambda:[13,15,16],lambda_align:15,lambda_coverag:15,lang:3,lang_a:3,lang_b:3,lang_pair:[3,16],languag:[1,2,5,13,15,17],language_pair:17,lapata:19,last:[3,10,15,16],latent_arrai:1,lattent_arrai:1,latter:10,layer:[1,2,10,15,16],layer_cach:10,layer_norm_modul:10,layer_type_to_cl:2,layernorm:15,layerstack:3,lead:12,learn:[2,8,10,15,19],learning_r:[4,8,15],learning_rate_decai:15,learning_rate_decay_fn:8,least:0,leav:[3,15],left:2,lei:19,len:[8,10,12],length:[3,5,8,10,12,13,15,16,17],length_averag:12,length_non:12,length_pen:12,length_penalti:[12,16],length_wu:12,less:3,let:[3,4],level:[13,15],lib:6,librari:15,like:[0,12,16],limit:16,lin:[1,2,15],linattentionbridgelay:5,line:[0,4,13,15,16],linear:[1,2],linear_warmup:15,linguist:[10,19],link:[0,2,6],list:[0,3,8,10,11,12,13,15,16],list_model:11,liter:1,literatur:15,liu:19,ll17:[10,19],llion:19,load:[6,8,10,11,15],load_model:11,load_pretrained_vector:10,loader:5,local:[0,3],localhost:15,log:[5,8,12],log_fil:[15,16],log_file_level:[15,16],log_prob:12,log_tensorboard:8,logger:12,login:6,logit:[12,16],logsumexp:12,longer:[1,16],longest:12,longtensor:[8,10,12],look:[0,7,10,16],loop:8,loss:[5,15],loss_scal:15,losscomputebas:8,love:0,lower:[1,3,15],lpm15:[10,19],lsl:[12,19],lstm:[10,15],lua:11,lukasz:19,luong:[10,15,19],lustrep1:6,lustrep2:6,lza17:[10,19],macherei:19,machin:[7,10,12,19],made:3,magic:12,mai:[3,8,11,12,13,15],main:[0,7,8,13,15,16],maintain:12,make:[0,6,8,13,15,16],make_shard_st:8,mammoth:[5,6,7],man:19,manag:8,mani:[8,12,15],manipul:8,manual:[11,12],many2on:1,map:[3,8,10],margin:10,marian:15,mark:15,marker:11,mask:[10,13,15,16],mask_length:[13,15,16],mask_or_step:10,mask_ratio:[13,15,16],mass:[13,15,16],massiv:[3,7],master:[10,15],master_ip:15,master_port:15,match:11,mathbb:2,mathbf:2,mathemat:2,matric:2,matrix:[2,10,15],matrixtre:10,max:[8,10,12,17],max_generator_batch:[4,15],max_grad_norm:[4,8,15],max_len:10,max_length:[12,16],max_relative_posit:[10,15],max_sent_length:16,max_sentence_length:17,max_siz:8,maxim:19,maximum:[13,15,16],maybe_convert_align:11,maybe_detoken:11,maybe_detokenize_with_align:11,maybe_postprocess:11,maybe_preprocess:11,maybe_token:11,maybe_update_forbidden_token:12,maybe_update_target_prefix:12,mean:[3,10,11,15,16],meanencod:10,mechan:[2,3,10],mem:6,memori:[5,10,11,15],memory_bank:[10,12],memory_length:10,merg:[10,15],meta:3,metadata:8,method:[8,10,15],metric:16,mi250:6,michael:19,mike:19,min_length:[12,16],minh:19,minimum:16,mirella:19,mirror:15,mix:8,mkdir:[6,17],mlp:[10,15],mode:[3,13,15,16],model:[2,3,5,10,12,13],model_dim:10,model_dtyp:[4,8,15],model_id:11,model_kwarg:11,model_prefix:17,model_root:11,model_sav:8,model_step:4,model_task:15,model_typ:15,modelsaverbas:8,modif:8,modifi:[0,12],modul:[0,1,2,5,6,8,15,16],modular:7,mohammad:19,monolingu:3,more:[0,1,3,12,13,15,16],most:[12,16],mostli:8,move:[11,15],moving_averag:[8,15],much:15,multi:[0,1,2,10],multiheadedattent:[2,10],multilingu:[3,7],multipl:[0,2,3,8,10,15,16],multipli:2,multplic:0,must:[3,10,11,15],mymodul:6,n_batch:8,n_best:[11,12,16],n_bucket:15,n_correct:8,n_edge_typ:15,n_node:15,n_sampl:[4,13,15],n_seg:11,n_src_word:8,n_step:15,n_word:8,name:[0,3,5,12,13,15,17],namespac:11,napoleon:0,nccl:15,necessari:[0,4,6,8,12,15,16],necessit:3,need:[0,3,4,8,10,15,19],neg:[11,15],network:[10,19],neubig:19,neural:[7,10,12,19],neuro:19,never:12,news_commentari:4,next:[3,8,12,16],nfeat:10,ngram:[12,16],nightmar:3,niki:19,nlp:[7,18],nmt:[8,12,15,16],nmtmodel:[8,10],noam:[4,15,19],noamwd:15,node:[3,6,8,15],node_rank:15,nois:3,non:[10,12,15],none:[1,8,10,11,12,13,15,16],nonetyp:[10,12],norm:[10,15],norm_method:8,normal:[2,4,8,15],normalz:8,norouzi:19,note:[0,3,4,6,12],noth:[0,8],notset:[15,16],ntask:6,nucleu:16,num_lay:10,num_step:8,num_thread:13,number:[2,3,8,10,12,13,15,16],nvidia:15,obj:[0,8],object:[0,8,11,12,13,15,16,17],oder:3,off:15,ofi:6,often:[13,15,16],on_timemout:11,on_timeout:11,onc:[12,15],one2mani:1,one:[0,2,3,8,10,13,15,16],onli:[3,8,12,13,15,16],onmt:[0,8,10,11,12,15,17],onmt_build_vocab:4,onmt_token:[13,15,16],onmt_transl:4,onmttok:5,open:7,opennmt:[0,3,6,7,8,14],oper:[2,10],operatornam:2,opt:[4,8,10,11,15,16],opt_level:15,optim:[4,5],option:[0,3,4,6,8,10,11,12,13,15,16,17],opu:5,opus100:[17,18],ord:19,order:[3,15],org:[2,6,7,15,16,19],origin:[2,15,17],oriol:19,other:[2,6,8,12,13,15,16,17,19],other_lang:17,otherwis:[3,10,15,16],our:[6,12],our_stat:8,out:[2,3,7,8,18],out_channel:10,out_config:3,out_fil:12,outcom:2,output:[1,2,3,4,8,10,11,12,13,15,16],output_model:16,output_s:10,over:[0,1,3,4,8,10,12,15,16,17],overal:[1,2],overrid:[10,12,13,15],overridden:10,overview:5,overwrit:[6,13,15],own:[8,16],ownership:8,p17:7,p18:15,packag:[6,11],pad:[8,10,12],pad_idx:10,pair:[3,8,11,15,16,17],paper:[0,2,10,15],parallel:[10,12,13,15],parallel_path:12,parallelcorpu:12,param:8,param_init:[4,15],param_init_glorot:[4,15],paramet:[1,4,8,10,11,12,13,15,16],parameter:10,parenthes:0,parmar:19,pars:[10,11],parse_opt:11,part:[1,2,12],partial:1,particular:[0,3,10],partit:6,pass:[2,3,8,10,11,15],past:[0,15],path:[3,6,10,11,12,13,15,16],path_src:4,path_tgt:4,patienc:8,pattern:3,pdf:15,pen:12,penalti:[5,12,15],penaltybuild:12,peopl:6,per:[0,3,13,15,16],perceiv:[2,15],perceiverattentionbridgelay:[1,5],percentag:[13,15,16],perfom:15,perform:[2,10,15],permut:[13,15,16],permute_sent_ratio:[13,15,16],perplex:8,peter:19,pfs:6,pham:19,phrase_t:[12,16],piec:4,pip3:[6,7,18],pip:[0,6],pipelin:[13,15,16],pleas:[0,7],plu:15,point:19,pointer:[10,19],poisson:[13,15,16],poisson_lambda:[13,15,16],polosukhin:19,polyak_decai:10,pool:[1,10,15],port:[14,15],portal:7,pos_ffn_activation_fn:[10,15],posit:[10,15],position_encod:[10,15],position_ffn:10,positionalencod:10,positionwisefeedforward:[10,15],possibl:[3,8,10,11,12,13,15,16],postprocess:11,postprocess_opt:11,potenti:12,pouta:17,ppl:8,pproach:19,pre:[8,11,12],pre_word_vecs_dec:15,pre_word_vecs_enc:15,preced:3,precis:8,pred:16,pred_scor:12,pred_sent:12,predict:[8,12,16],prefer:0,prefix:[3,8,13,15,16],prefix_seq_len:12,preliminari:4,preload:11,preload_model:11,prepar:[5,12],prepare_wmt_data:4,preprint:19,preprocess:11,preprocess_opt:11,presenc:3,presum:12,pretrain:[10,15],prevent:[12,16],previou:[2,3,10,12],previous:2,primari:3,prime:2,print:[8,15,16],prior:4,prior_token:[13,15,16],prob:12,proba:16,probabl:[10,12,13,15,16],probil:10,problem:12,proc:[7,19],procedur:3,process:[2,8,11,13,15],processu:11,produc:[1,2,12,13,15,16],product:2,projappl:6,project:[0,1,2,6,7,10],project_2005099:6,project_462000125:6,propag:8,proper:11,properli:6,properti:[8,10],proport:[3,13,15,16],provid:[7,16],prune:5,pty:6,pull_request_chk:0,punctuat:0,push:1,put:12,pwd:17,pyonmttok:[13,15,16],python3:[3,6],python:[0,3,6,15],pythonpath:6,pythonuserbas:6,pytorch:[0,6,10],qin:19,quantiz:16,queri:10,query_len:10,question:5,queue:[13,15],queue_siz:[4,15],quickstart:[5,7],quoc:19,quot:0,rais:[13,15],random:[5,13,15],random_ratio:[13,15,16],random_sampling_temp:[12,16],random_sampling_topk:[12,16],random_sampling_topp:[12,16],randomli:12,rang:16,rank:[12,15],ranslat:19,rare:12,rate:[5,8],rather:0,ratio:[12,16],raw:[10,12,16],rccl:6,reach:12,read:[0,3,11,17],readabl:[0,3],reader:5,readm:15,rebuild:11,rebuild_seg_packag:11,receiv:3,recent:15,recip:10,recommend:15,recommonmark:0,rectifi:2,recurr:10,redund:3,ref:0,refer:[0,2,5],regardless:3,regist:10,regular:[13,15,16],rel:[10,15],relat:[4,13,15,16],relationship:2,relev:[10,12],relu:[2,10,15],rememb:0,remov:3,renorm:15,reorder:12,repeat:[12,16],repetit:16,replac:[1,12,13,15,16],replace_length:[13,15,16],replace_unk:[12,16],replic:10,report:[7,8,15,16],report_align:[12,16],report_everi:[4,15],report_manag:8,report_scor:12,report_stats_from_paramet:[8,15],report_tim:[12,16],reportmgrbas:8,repres:[2,8],represent:[1,2,10,15,19],reproduc:5,requir:[0,8,10,15],research:7,reset:8,reset_optim:15,resett:15,residu:10,resourc:3,respect:[2,3],respons:8,rest:14,restrict:[13,15,16],result:[2,11,15],return_attent:12,return_hidden:10,reus:[1,10,15],reuse_copy_attn:[10,15],revers:[13,15,16],reversible_token:[13,15,16],rico:19,right:[0,2],rmsnorm:15,rnn:[8,10,15,19],rnn_dropout:10,rnn_size:[4,15],rnn_type:[10,15],rnndecoderbas:10,rnnencod:10,roblem:19,rocm5:6,rocm:6,root:[2,3],rotat:[13,15,16],rotate_ratio:[13,15,16],roundrobin:15,row:3,rsqrt:15,rst:0,run:[0,3,4,8,10,11,15,16],rush:7,sacrebleu:[4,6,7,18],sai:3,samantao:6,same:[0,3,4,10,11,15],sampl:[5,12,13,15,17],sample_with_temperatur:12,sampling_temp:12,saniti:16,save:[5,8,13,15,16,17],save_all_gpu:15,save_checkpoint_step:[4,8,15],save_config:[13,15,16],save_data:[4,13,15],save_model:[4,15],saver:8,scale:[10,12,15],schedul:[8,15],schuster:19,score:[5,10,11,16],scorer:12,scratch:6,script:[0,4,5,6],search:[0,3,5,12],second:[2,3,10,11],secur:[13,15],see:[3,10,11,12,13,15,19],seed:[4,12,13,15,16],seem:1,seemingli:15,seen:2,segment:[3,11,16],select:[10,12,15],select_index:12,self:[1,2,10,11,12,15],self_attn_typ:[10,15],send:[0,15],senellart:7,sennrich:19,sensibl:0,sent:[8,15,16],sent_numb:12,sentenc:[1,12,13,15,16,17],sentencepiec:[3,4,6,7,13,15,16,18],separ:[3,10],seper:11,seq2seq:[12,15],seq:12,seq_len:[2,10,12],seqlength:10,sequenc:[1,2,3,8,10,11,12,13,15,16,19],serial:10,serv:2,server:[5,15,17],servermodel:11,servermodelerror:11,session:6,set:[2,3,4,6,8,10,11,12,13,15,16],setup:[4,10],sever:[3,10,12],sgd:15,sh16:[10,19],shape:[0,1,10,12],shard:[8,15,16],shard_siz:[8,16],share:[1,6,13,15,16],share_decoder_embed:[4,15],share_embed:[4,15],share_vocab:[13,15],shazeer:19,shortest:12,shot:3,should:[3,4,10,12,15],shuf:17,shuffle_input_sent:17,side:[3,8,11,13,15,16],side_a:3,side_b:3,silent:[4,10,13,15],similar:[2,3,10,15],simpl:[2,8,15],simpleattentionbridgelay:5,simpli:10,simulatan:10,sin:15,sinc:10,singl:[0,11,15],single_pass:15,sinusoid:10,site:6,size:[3,8,10,12,13,15,16,17],skip:[3,13,15],skip_embed:10,skip_empty_level:[4,13,15],slen:10,slm17:[10,19],slow:[13,16],slurm:[3,6],smaller:[13,15,16],smooth:[13,15,16],softmax:[1,2,10,15,16],some:[0,1,3,8,16],someth:0,sometim:0,sort:[11,17],sorted_pair:3,sourc:[0,1,3,5,6,7,8,10,11,12,13,15],sp_path:17,space:[0,2,15],spacer:[13,15,16],span:[13,15,16],spars:10,sparseadam:15,sparsemax:[10,15],sparsesoftmax:1,specif:[1,2,3,7,12,13,15,18],specifi:[2,10,13,15,16],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm_decod:4,spm_encod:[4,17],spm_train:17,sqrt:2,squar:[2,3],src:[3,4,8,10,11,12,13,15,16,17],src_embed:15,src_feat:16,src_feats_vocab:[13,15],src_file_path:12,src_ggnn_size:15,src_group:3,src_lang:[3,16],src_languag:3,src_len:[8,10],src_length:12,src_map:[10,12],src_onmttok_kwarg:[13,15,16],src_raw:12,src_seq_length:[4,13,15,16],src_seq_length_trunc:15,src_subword_alpha:[4,13,15,16],src_subword_model:[4,13,15,16],src_subword_nbest:[4,13,15,16],src_subword_typ:[13,15,16],src_subword_vocab:[13,15,16],src_vocab:[4,12,13,15],src_vocab_s:15,src_vocab_threshold:[13,15,16],src_word_vec_s:15,src_words_min_frequ:15,sru:[5,15],srun:6,stabl:2,stack:[1,10,15,16],stage:2,stand:0,standard:[10,15,16],start:[3,5,6,8,11,15,17],start_decay_step:15,stat:[8,15],stat_list:8,state:[8,10,12,15],state_dict:15,state_dim:15,statist:[8,15],stdout:8,stdrnndecod:10,step:[2,3,5,8,10,12,15,16],stepwis:10,stepwise_penalti:[12,16],still:0,stop:[13,15,16],store:15,str:[0,8,10,11,12],strategi:[5,8,15],stride:10,string:[8,10,13,15,16],structur:[1,2,5,19],structured_attent:10,style:[0,10,13,15,16],styleguid:0,subclass:[8,10,12],subcompon:3,subdirectori:6,subsequ:2,subset:17,substitut:3,subword:[3,5],suggest:15,sum:[8,10,12,15],sum_:10,sume:8,summar:19,summari:[0,12,16],superclass:0,supervis:[3,10,15],support:[0,3,10,15],suppos:17,sure:[6,12],sutskev:19,switchout:[5,19],switchout_temperatur:[13,15,16],symmetr:3,system:[12,15,19],tab:[13,15],tabl:[10,16],take:[2,3,7,10,13,15,16],taken:10,tangent:2,tanh:[2,10],tao:19,taolei87:10,tar:17,target:[3,5,8,10,11,12,13,15],target_prefix:12,task:[3,4,5,8,12],task_distribution_strategi:15,task_queue_manag:8,tatoeba:[3,5],tau:[13,15,16],technic:7,temperatur:[3,12,13,15,16],templat:3,tend:1,tensor:[0,8,10,12],tensorboard:[8,15],tensorboard_log_dir:15,tensorflow:15,term:[2,10],test:[0,4,6,10],testset:4,text:[8,10,12,15,16,19],tgt:[3,4,8,10,11,13,15,16],tgt_dict:10,tgt_embed:15,tgt_file_path:12,tgt_group:3,tgt_lang:[3,16],tgt_languag:3,tgt_len:[8,10],tgt_onmttok_kwarg:[13,15,16],tgt_pad_mask:10,tgt_prefix:[12,16],tgt_sent:12,tgt_seq_length:[4,13,15,16],tgt_seq_length_trunc:15,tgt_subword_alpha:[4,13,15,16],tgt_subword_model:[4,13,15,16],tgt_subword_nbest:[4,13,15,16],tgt_subword_typ:[13,15,16],tgt_subword_vocab:[13,15,16],tgt_vocab:[4,8,13,15],tgt_vocab_s:15,tgt_vocab_threshold:[13,15,16],tgt_word_vec_s:15,tgt_words_min_frequ:15,than:[0,1,12,15,17],thang:19,thant:12,thei:[2,10,12],them:[3,10],theorem:10,thi:[0,2,3,4,6,7,8,10,12,13,15,16],thin:8,thing:[0,3],thoroughli:10,thread:13,three:[2,10],through:[2,3,8],thu:8,tic:0,tick:0,time:[2,3,6,8,12,15,16],timeout:11,timer:11,titl:7,tlen:10,to_cpu:11,to_gpu:11,todo:[6,10,17],tok:11,token:[4,8,10,11,12,13,15,16],token_drop:5,token_mask:5,tokendrop:[13,15,16],tokendrop_temperatur:[13,15,16],tokenizer_mark:11,tokenizer_opt:11,tokenmask:[13,15,16],tokenmask_temperatur:[13,15,16],too:12,tool:5,toolkit:7,top:[2,10,12,16],topk_id:12,topk_scor:12,torch:[0,6,8,10,15],torchtext:8,total:[3,8,15],trail:0,train:[3,5,6,7,8,10,19],train_extremely_large_corpu:17,train_from:15,train_it:8,train_loss:8,train_loss_md:8,train_step:[4,8,15],trainabl:[1,8],trainer:5,training_step:8,transform:[1,2,4,5,8,19],transformer_ff:[4,15],transformer_lm:15,transformerattentionbridgelay:5,transformerdecod:10,transformerdecoderbas:10,transformerencod:10,transformerencoderlay:2,translat:[1,3,5,7,8,10,11,14,19],translate_batch:12,translation_serv:11,translationbuild:12,translationserv:11,transpos:1,travi:0,tree:10,trg:3,triang:3,trick:[5,10],trivial:10,trunc_siz:8,truncat:[8,15],truncated_decod:15,trust:17,ttention:19,turn:[10,15],tutori:[5,18],two:[2,3,10],txt:[0,16,17],type:[0,2,3,5,8,10,11,12,13,16],typic:[8,15],u_a:10,under:[3,15,16],undergo:2,undergon:2,underli:12,uniform:15,unigram:[13,15,16],union:0,unit:[2,10],unittest:0,unk:[12,16],unknown:12,unless:3,unload:11,unload_model:11,unmodifi:12,unnecessari:[0,1,3],unnorm:10,unset:3,unsqueez:1,until:[12,16],unwieldli:3,updat:[6,8,11,12,15],update_dropout:10,update_finish:12,update_learning_r:15,update_n_src_word:8,update_vocab:15,upgrad:6,upper:3,url:[6,7,19],url_root:14,usag:[5,13,14,15,16],use:[0,2,3,4,6,8,10,11,12,13,15,16,17],use_bridg:10,use_relu:10,use_tanh:10,used:[1,2,3,4,8,10,11,12,13,15,16],useful:8,user:[6,8,10,11],uses:[0,3,10,12,15],using:[0,2,3,7,10,11,12,13,15,16],uszkoreit:19,util:[2,8],v11:4,v_a:10,valid:[4,8,13,15,16],valid_batch_s:[4,15],valid_it:8,valid_loss:8,valid_loss_md:8,valid_step:[4,8,15],valu:[2,3,8,10,11,12,13,15,16],variabl:[3,6,12],variat:0,vaswani:19,vaswanispujgkp17:0,vector:[10,15],venv:6,verbos:[12,15,16],veri:[0,16],version:[10,11,12],via:[1,10,15,19],view:1,vinyal:19,virtual:6,visit:0,visual:15,vocab:[4,5,8,10,12],vocab_path:[13,15],vocab_s:[12,15,17],vocab_sample_queue_s:13,vocab_size_multipl:15,vocabulari:[3,8,10,13,15,16,17],vsp:[10,19],w_a:10,wai:[3,12],wait:3,wang:19,want:[3,16],warmup:15,warmup_step:[4,15],warn:[13,15,16],weight:[2,3,4,10,15,16],weight_decai:15,weighted_sampl:15,weightnormconv2d:10,well:[0,15],wget:17,what:[3,5,8,11],when:[0,3,7,10,12,13,15,16,17],where:[1,2,4,6,10,12,13,15,16],wherea:[12,15],whether:[8,10,11,12,13,15,16],which:[1,3,10,12,15],whl:6,whole:[4,12],whose:16,why:2,wiki:15,wikipedia:15,window:[13,15,16],wise:2,with_align:8,within:[2,10,11],without:[0,10,15],wmt14_en_d:4,wmt:4,wmtend:4,wojciech:19,wolfgang:19,word2vec:15,word:[2,10,12,13,15,16],word_align:12,word_lut:10,word_padding_idx:10,word_vec_s:[4,10,15],word_vocab_s:10,work:[0,3,12,15],workflow:7,world_siz:[4,15],would:[3,12,15],wpdn18:[13,15,16,19],wrap:11,wrapper:8,writabl:3,write:[3,8],writer:8,written:4,wsc:[12,19],www:15,xavier_uniform:15,xent:8,xinyi:19,xiong:19,xzvf:17,yaml:[4,13,15,16],yang:19,yann:19,yarat:19,year:7,yet:[10,12],yml:0,yoav:19,yonghui:19,yoon:7,yoshua:19,you:[0,3,4,6,10,15,16,19],your:[0,3,6,16,17],your_venv_nam:6,your_vevn_nam:6,yourself:7,yuan:19,yuntian:7,zaremba:19,zero:[3,8,10,12,15,16],zero_grad:8,zhang:19,zhifeng:19,zihang:19,zxs18:[10,19]},titles:["Contributors","Questions","Attention Bridge","Config-config tool","Translation","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"class":12,The:3,actual:3,adapt:[3,15],adapter_config:3,ae_path:3,ae_transform:3,align:15,allocate_devic:3,altern:3,architectur:10,argument:14,attent:[2,10,15],autoencod:3,bank:1,beam:16,behind:1,between:1,bridg:[2,15],build:[4,13],challeng:17,citat:7,cluster_languag:3,command:3,common:[13,15,16],complete_language_pair:3,config:3,config_al:3,config_config:3,configur:[13,15,16],content:5,contributor:0,conv2conv:10,copi:10,core:[10,11],corpora:3,corpora_schedul:3,data:[4,9,13,15,16,17,18],dataset:9,dec_sharing_group:3,decod:[1,10,12,15,16],denois:[13,15,16],differ:1,direct:17,distanc:3,distance_matrix:3,docstr:0,download:[4,17],dynam:15,effici:16,embed:15,enc_sharing_group:3,encod:[1,10,15],encoder_output:1,evalu:4,featur:15,feedforwardattentionbridgelay:2,filter:[13,15,16],fix:1,framework:8,gener:[1,15],get:17,group:3,guidelin:0,inferfeat:[13,15,16],initi:15,input:3,instal:[6,7,18],intermediate_output:1,intuit:1,kei:3,languag:[3,16],length:1,level:3,linattentionbridgelay:2,line:3,loader:9,log:[15,16],loss:8,lumi:6,mahti:6,mammoth:18,manual:3,matrix:3,memori:1,model:[1,4,8,11,15,16,17],modul:10,n_gpus_per_nod:3,n_group:3,n_node:3,name:14,need:1,onmttok:[13,15,16],optim:[8,15],opu:17,other:3,overrid:3,overview:7,paramet:3,pars:17,path:17,penalti:16,perceiverattentionbridgelay:2,prepar:[4,17,18],prune:15,puhti:6,question:1,quickstart:18,random:16,rate:15,reader:9,refer:19,relev:17,remove_temporary_kei:3,reproduc:[13,15,16],run:6,sampl:16,save:1,score:12,search:16,sentencepiec:17,separ:1,server:[11,14],set:17,set_transform:3,share:3,sharing_group:3,shot:17,simpleattentionbridgelay:2,sourc:16,specifi:3,src_path:3,sru:10,stage:3,step:[4,17,18],strategi:12,structur:10,subword:[4,13,15,16],supervis:17,switchout:[13,15,16],target:16,task:15,tatoeba:17,test:17,tgt_path:3,than:3,token_drop:[13,15,16],token_mask:[13,15,16],tool:3,top:3,train:[4,15,17],trainer:8,transform:[3,10,13,15,16],transformerattentionbridgelay:2,translat:[4,12,16,17],translation_config:3,translation_config_dir:3,trick:16,type:15,usag:3,use_introduce_at_training_step:3,use_weight:3,valid:17,variabl:17,vocab:[13,15,17],vocabulari:4,what:1,why:1,yaml:3,zero:17,zero_shot:3}}) \ No newline at end of file +Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/Translation","index","install","main","onmt","onmt.inputters","onmt.modules","onmt.translate.translation_server","onmt.translation","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/Translation.md","index.rst","install.md","main.md","onmt.rst","onmt.inputters.rst","onmt.modules.rst","onmt.translate.translation_server.rst","onmt.translation.rst","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"onmt.Trainer":{train:[8,1,1,""],validate:[8,1,1,""]},"onmt.decoders":{CNNDecoder:[10,0,1,""],DecoderBase:[10,0,1,""],InputFeedRNNDecoder:[10,0,1,""],StdRNNDecoder:[10,0,1,""],TransformerDecoder:[10,0,1,""]},"onmt.decoders.CNNDecoder":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.decoders.DecoderBase":{from_opt:[10,1,1,""]},"onmt.decoders.TransformerDecoder":{forward:[10,1,1,""]},"onmt.decoders.decoder":{RNNDecoderBase:[10,0,1,""]},"onmt.decoders.decoder.RNNDecoderBase":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.encoders":{CNNEncoder:[10,0,1,""],EncoderBase:[10,0,1,""],MeanEncoder:[10,0,1,""],RNNEncoder:[10,0,1,""],TransformerEncoder:[10,0,1,""]},"onmt.encoders.CNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.EncoderBase":{forward:[10,1,1,""]},"onmt.encoders.MeanEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.RNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.TransformerEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.models":{NMTModel:[8,0,1,""]},"onmt.models.NMTModel":{count_parameters:[8,1,1,""],forward:[8,1,1,""]},"onmt.models.sru":{SRU:[10,0,1,""]},"onmt.models.sru.SRU":{forward:[10,1,1,""]},"onmt.modules":{AverageAttention:[10,0,1,""],ConvMultiStepAttention:[10,0,1,""],CopyGenerator:[10,0,1,""],Embeddings:[10,0,1,""],GlobalAttention:[10,0,1,""],MultiHeadedAttention:[10,0,1,""],PositionalEncoding:[10,0,1,""],WeightNormConv2d:[10,0,1,""]},"onmt.modules.AverageAttention":{cumulative_average:[10,1,1,""],cumulative_average_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.ConvMultiStepAttention":{apply_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.CopyGenerator":{forward:[10,1,1,""]},"onmt.modules.Embeddings":{emb_luts:[10,1,1,""],forward:[10,1,1,""],load_pretrained_vectors:[10,1,1,""],word_lut:[10,1,1,""]},"onmt.modules.GlobalAttention":{forward:[10,1,1,""],score:[10,1,1,""]},"onmt.modules.MultiHeadedAttention":{forward:[10,1,1,""],training:[10,2,1,""],update_dropout:[10,1,1,""]},"onmt.modules.PositionalEncoding":{forward:[10,1,1,""]},"onmt.modules.WeightNormConv2d":{forward:[10,1,1,""]},"onmt.modules.position_ffn":{PositionwiseFeedForward:[10,0,1,""]},"onmt.modules.position_ffn.PositionwiseFeedForward":{forward:[10,1,1,""]},"onmt.modules.structured_attention":{MatrixTree:[10,0,1,""]},"onmt.modules.structured_attention.MatrixTree":{forward:[10,1,1,""]},"onmt.translate":{BeamSearch:[12,0,1,""],DecodeStrategy:[12,0,1,""],GNMTGlobalScorer:[12,0,1,""],GreedySearch:[12,0,1,""],Translation:[12,0,1,""],TranslationBuilder:[12,0,1,""],Translator:[12,0,1,""]},"onmt.translate.BeamSearch":{initialize:[12,1,1,""]},"onmt.translate.DecodeStrategy":{advance:[12,1,1,""],block_ngram_repeats:[12,1,1,""],initialize:[12,1,1,""],maybe_update_forbidden_tokens:[12,1,1,""],maybe_update_target_prefix:[12,1,1,""],target_prefixing:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.GreedySearch":{advance:[12,1,1,""],initialize:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.Translation":{log:[12,1,1,""]},"onmt.translate.Translator":{translate_batch:[12,1,1,""]},"onmt.translate.greedy_search":{sample_with_temperature:[12,3,1,""]},"onmt.translate.penalties":{PenaltyBuilder:[12,0,1,""]},"onmt.translate.penalties.PenaltyBuilder":{coverage_none:[12,1,1,""],coverage_summary:[12,1,1,""],coverage_wu:[12,1,1,""],length_average:[12,1,1,""],length_none:[12,1,1,""],length_wu:[12,1,1,""]},"onmt.translate.translation_server":{ServerModel:[11,0,1,""],ServerModelError:[11,4,1,""],Timer:[11,0,1,""],TranslationServer:[11,0,1,""]},"onmt.translate.translation_server.ServerModel":{build_tokenizer:[11,1,1,""],detokenize:[11,1,1,""],do_timeout:[11,1,1,""],maybe_convert_align:[11,1,1,""],maybe_detokenize:[11,1,1,""],maybe_detokenize_with_align:[11,1,1,""],maybe_postprocess:[11,1,1,""],maybe_preprocess:[11,1,1,""],maybe_tokenize:[11,1,1,""],parse_opt:[11,1,1,""],postprocess:[11,1,1,""],preprocess:[11,1,1,""],rebuild_seg_packages:[11,1,1,""],to_gpu:[11,1,1,""],tokenize:[11,1,1,""],tokenizer_marker:[11,1,1,""]},"onmt.translate.translation_server.TranslationServer":{clone_model:[11,1,1,""],list_models:[11,1,1,""],load_model:[11,1,1,""],preload_model:[11,1,1,""],run:[11,1,1,""],start:[11,1,1,""],unload_model:[11,1,1,""]},"onmt.utils":{Optimizer:[8,0,1,""],Statistics:[8,0,1,""]},"onmt.utils.Optimizer":{amp:[8,1,1,""],backward:[8,1,1,""],from_opt:[8,1,1,""],learning_rate:[8,1,1,""],step:[8,1,1,""],training_step:[8,1,1,""],zero_grad:[8,1,1,""]},"onmt.utils.Statistics":{accuracy:[8,1,1,""],all_gather_stats:[8,1,1,""],all_gather_stats_list:[8,1,1,""],elapsed_time:[8,1,1,""],log_tensorboard:[8,1,1,""],output:[8,1,1,""],ppl:[8,1,1,""],update:[8,1,1,""],xent:[8,1,1,""]},"onmt.utils.loss":{LossComputeBase:[8,0,1,""]},onmt:{Trainer:[8,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:function","4":"py:exception"},terms:{"0473v3":19,"25g":6,"abstract":10,"boolean":[8,12],"break":17,"class":[0,5,8,10,11],"default":[11,13,14,15,16,17],"export":6,"final":[2,4,10,12],"float":[3,10,12],"function":[0,1,2,3,8,10,11,12,15],"import":0,"int":[8,10,11,12],"long":0,"new":[0,2,4],"public":6,"return":[0,8,10,11,12],"static":[8,15],"true":[3,4,8,10,12,15,16,17],"try":[0,6],"while":[3,10],And:[0,10],But:1,EOS:12,For:[0,1,3,12,15,18],IDs:12,IFS:17,LPs:3,Not:0,One:3,The:[1,2,4,8,10,11,12,15,16],Then:[0,4,10],There:[2,3],These:[2,3,10,12],Use:[3,15,16],Used:12,Will:[1,3],__init__:11,_compute_loss:8,a_j:10,aan:[10,15],aan_useffn:[10,15],ab_fixed_length:15,ab_lay:15,ab_layer_norm:15,abbrevi:0,abigail:19,abil:10,about:0,abov:[0,12],abs:[2,15,16,19],acceler:[10,19],accept:[0,3,12],access:[2,3,6],accord:3,account:[3,6],accross:8,accum:8,accum_count:[4,8,15],accum_step:[4,8,15],accumul:[8,15],accuraci:[8,12],achiev:3,achin:19,acl:[7,19],aclweb:15,act:1,action:[10,12,15],activ:[1,2,6,10,15],activation_fn:10,activationfunct:10,actual:12,adadelta:15,adafactor:15,adagrad:15,adagrad_accumulator_init:15,adam:[4,15],adam_beta1:15,adam_beta2:[4,15],adamoptim:15,adamw:15,adapt:[5,7],adapter_nam:3,add:[0,4,10],added:3,adding:0,addit:[0,10,13,15,16],addition:10,address:12,adjust:3,adopt:15,adpot:10,advanc:[12,15],advic:0,after:[0,2,10,12,15],afterward:10,again:0,agnost:1,aidan:19,alexand:7,algorithm:19,align:[5,8,10,11,12,16,19],align_debug:16,alignment_head:[10,15],alignment_lay:[10,15],aliv:12,alive_attn:12,alive_seq:12,all:[0,3,8,10,12,13,15,16,19],all_gather_stat:8,all_gather_stats_list:8,all_preprocess:11,allennlp:0,alloc:3,allow:[0,2,3,15],almost:[12,15],alon:0,along:2,alpha:[2,12,16],alphabet:3,alreadi:[13,15,16],also:[0,3,6,8,10,15],altern:10,although:10,alwai:[0,3],amp:[8,15],ani:[0,1,3,12,13,15],annurev:19,anoth:[0,2,8],antholog:15,apex:15,apex_opt_level:15,api:[0,5],api_doc:15,appear:3,append:[6,17],appli:[2,3,10,12,13,15,16],applic:16,apply_mask:10,approach:10,appropri:12,approxim:15,architectur:[2,5],arg:[0,10,11],argmax:16,argpars:11,argument:[0,5],around:10,artzi:19,arxiv:[0,2,15,16,19],ashish:19,assig:3,assign:[3,16],assing:3,assum:[10,12],att_typ:2,attend:2,attent:[0,1,5,8,12,16,19],attention_bridg:8,attention_dropout:[4,10,15],attentionbridgenorm:2,attet:10,attn:[10,12,16],attn_debug:[12,16],attn_func:10,attn_typ:10,attr:11,attribut:12,augment:19,auli:19,author:[7,10],autodoc:0,autogener:15,avail:[8,10,11,15,16],available_model:14,averag:[10,15,16,19],average_decai:[4,8,15],average_everi:[8,15],average_output:10,averageattent:10,avg:[10,16],avg_raw_prob:16,avoid:[0,3],aws:6,axi:12,back:8,backend:15,backward:8,bahdanau:[10,15,19],ban_unk_token:[12,16],bank:[5,10],barri:19,bart:[3,5],base:[0,2,3,4,6,7,8,10,11,12,13,15,16,19],base_target_emb:10,baselin:15,basemodel:8,basenam:[4,17],bash:6,batch:[2,4,8,10,12,15,16],batch_siz:[4,10,12,15,16],batch_size_multipl:[4,15],batch_typ:[4,15,16],bcb14:[10,19],beam:[5,12],beam_search:12,beam_siz:[4,12,16],beamsearch:12,beamsearchbas:12,becaus:[3,16],becom:3,been:[10,12,13,15,16],befor:[0,4,11,12,15,16],begin:[8,12],behind:5,below:0,ben:3,benefit:1,bengali:3,bengio:19,best:[12,16],beta1:15,beta2:15,beta:[12,16],better:[0,13,15,16],between:[2,5,10,13,15,16,19],beyond:8,biao:19,bib:0,bibtex:0,bibtext:0,bidir_edg:15,bidirect:[10,15],bidirectional_encod:10,bin:[6,15],binari:[4,10],bit:16,blank:0,bleu:4,blob:10,block:[12,16],block_ngram_repeat:[12,16],booktitl:7,bool:[8,10,11,12],bos:12,both:[3,12,15],both_embed:15,boundari:[13,15,16],bpe:[13,15,16],bptt:[8,15],bridg:[1,5,19],bridge_extra_nod:15,bring:1,brnn:15,browser:0,bucket_s:[4,15],buffer:8,build:[0,5,8,10,11,12,16,17],build_token:11,build_vocab:13,built:8,bytetensor:12,cach:10,calcul:[2,8,10,12],call:[10,12],callabl:12,callback:8,can:[2,3,4,6,8,11,12,13,15,16],cancel:11,candid:[3,13,15,16],cao:19,capit:0,captur:2,care:10,cat:17,categor:12,categori:12,challeng:5,chang:[0,3,8,15],channel:2,charact:[0,16],character_coverag:17,check:[0,7,18],checklist:0,checkpoint:[4,8,15],chen:19,chmod:[4,6],cho:19,choic:[0,10,13,15,16],choos:0,chosen:12,christoph:19,citat:[0,5],cite:[0,7],classmethod:[8,10],clear:0,clone:[7,11,18],clone_model:11,close:0,cls:8,cluster:[3,7,18],clutter:0,cnn:[10,15,19],cnn_kernel_width:[10,15],cnndecod:10,cnnencod:10,code:[0,3,6,16],code_dir:6,codebas:6,column:3,com:[7,10,18],combin:[10,16],comma:3,command:[4,5],comment:0,commentari:4,common:[0,5],commoncrawl:4,commun:0,complet:12,complex:[3,12],compon:[2,3],composit:15,comput:[2,3,4,8,10,15,16],concat:[10,15],concaten:15,condit:[12,15,16],conf:[14,16],config:[4,5,11,13,14,15,16],config_fil:11,configur:[3,4,5],connect:2,consid:[3,10,17],consider:15,consist:[0,10],constant:3,construct:10,constructor:[0,10],consum:15,contain:[1,3,10,11,12],content:[0,16],context:[2,10,15],context_attn:10,context_g:[10,15],contextg:10,continu:0,contribut:[0,2,10],contributor:5,control:[3,8],conv2conv:5,conv2d:10,conv:[10,15],conveni:3,convent:0,convers:12,convert:11,convex:10,convmultistepattent:10,convolut:[10,19],copi:[0,3,5,6,15,16],copy_attn:[10,12,15],copy_attn_forc:15,copy_attn_typ:[10,15],copy_loss_by_seqlength:15,copygener:10,core:[2,5,8],corpora:4,corpu:[3,4,13,15,17],corr:[0,19],correct:3,correspand:11,correspond:[2,16],could:12,count:[3,8,12,13,15,16],count_paramet:8,cov:12,cov_pen:12,coverag:[10,12,15,16],coverage_attn:[10,15],coverage_non:12,coverage_penalti:[12,16],coverage_summari:12,coverage_wu:12,cpu:[11,15,16],crai:6,crayon:15,creat:[3,6,8],creation:3,criteria:15,criterion:8,critic:[15,16],cross:[8,10,15],cross_queri:1,csc:17,csv:3,ct2_model:11,ct2_translate_batch_arg:11,ct2_translator_arg:11,ctrl:0,cuda_funct:10,cudnn:10,cumbersom:3,cumul:[10,12,16],cumulative_averag:10,cumulative_average_mask:10,cur_dir:17,cur_len:12,current:[3,8,10,12,15],curricula:3,curriculum:3,custom:[11,15],custom_opt:11,cut:[0,17],cutoff:12,d_ff:10,d_model:10,dai:19,data:[2,3,5,8,12,19],data_path:17,data_typ:[8,12,15,16],dataset:[4,5,13,15,16,17],datastructur:11,dauphin:19,david:19,dblp:0,ddress:19,deal:3,debug:[14,15,16],dec:3,dec_lay:[4,15],dec_out:10,dec_rnn_siz:15,decai:15,decay_method:[4,15],decay_step:15,decod:[2,3,5,8],decode_strategi:12,decoder_typ:[4,15],decoderbas:[8,10],decodestrategi:12,def:0,defin:[3,4,10,13,15,16],definit:10,delai:3,delet:[13,15,16],delimit:16,deng:7,deni:19,denot:2,depend:[0,1,3,6,8,10,11],deprec:[15,16],describ:[2,10,11,15],descript:0,desir:[3,4],detail:[7,13,15],determin:3,detoken:[4,11],dev:[6,17],develop:0,devic:[3,10,12,16],device_context:8,deyi:19,diagon:3,dict:[3,8,10,11,12,13,15,16],dict_kei:15,dictionari:[8,10,12,15],differ:[0,2,3,5,10,11,16],dilat:10,dim:10,dimens:[2,10,12,15],dimension:[2,10],dir:17,direct:[0,3,12],directli:[0,10,16],directori:[3,6,11,15],disabl:15,discard:[13,15,16],discourag:15,disk:15,displai:8,dist:8,distanc:[10,15],distribtut:10,distribut:[3,8,10,12,13,15,16],divers:[2,13,15,16],divid:[2,3,15,16],divis:10,do_timeout:11,doc:0,document:[0,7],doe:[1,3,16],doesn:17,doi:[7,19],doing:[3,16],don:0,done:[4,12,17],dot:[2,10,15],dotprod:15,down:[12,13],download:6,dropout:[4,8,10,13,15,16],dropout_step:[4,8,15],due:15,dump:[13,15,16],dump_beam:[12,16],dump_sampl:13,dump_transform:15,dure:[11,15,16],dynam:[5,10,16],dzmitri:19,each:[2,3,10,12,13,15,16],earli:15,earlier:[2,13,15,16],early_stop:15,early_stopping_criteria:15,earlystopp:8,eas:3,easi:0,easili:3,echo:[4,17],edg:15,effect:[2,10,11,13],effici:[5,8,19],either:[12,15],elaps:8,elapsed_tim:8,element:[2,3],els:[1,17],emb:10,emb_fil:10,emb_lut:10,embed:[2,5,10,13],embedding_s:10,embeddings_typ:15,emerg:2,emnlp:19,emploi:[2,8],empti:[4,10,12,13,15],enabl:[10,16],enc:3,enc_hidden:10,enc_lay:[4,15],enc_rnn_siz:15,encapsul:2,encod:[2,3,5,8,12],encoder_fin:10,encoder_out_combin:10,encoder_out_top:10,encoder_output:5,encoder_typ:[4,15],encoderbas:[8,10],encordec:[13,15],encount:[13,15],encout:[13,15],end:12,eng:3,english:[1,3,4,17],enhanc:[1,2],ensembl:16,ensur:2,entir:17,entri:0,entropi:8,env_dir:6,environ:6,eos:12,epoch:15,eps:10,epsilon:15,equal:[12,15],equat:10,equival:15,error:[0,13,15,16],especi:3,essenti:12,establish:2,eural:19,europarl:4,evalu:8,even:3,event:12,everi:[8,10,15,16],exactli:0,exampl:[0,1,3,4,13,15,18],exce:15,except:[0,11,13,15,16],exclusion_token:12,execut:[4,13,15],exist:[13,15,16,17],exp:15,exp_host:15,expand:1,expect:[3,12],experi:[13,15,16],experiment:15,exponenti:15,extend:[0,10],extern:0,extra:[6,10,15],extra_word:10,extract:17,facilit:2,fail:12,fairseq:0,fals:[8,10,11,12,13,14,15,16],familiar:7,fast:[10,19],faster:[10,15],feat_0:16,feat_1:16,feat_dim_expon:10,feat_merg:[10,15],feat_merge_s:15,feat_padding_idx:10,feat_vec_expon:[10,15],feat_vec_s:[10,15],feat_vocab_s:10,feats0:16,feats1:16,featur:[2,5,8,10,13,16,19],fed:2,feed:[3,10,15],feedforward:[2,15],feedforwardattentionbridgelay:5,feel:0,few:0,ffectiv:19,ffn:[10,15],figur:10,file:[0,3,11,13,15,16,17],filenam:15,filter:[4,5,17],filterfeat:[13,15,16],filternonzeronumer:[13,15,16],filterrepetit:[13,15,16],filterterminalpunct:[13,15,16],filtertoolong:[3,4,13,15,16],filterwordratio:[13,15,16],find:0,firefox:0,first:[0,3,10,12,15],five:2,fix:[0,5,12,15],flag:8,flake8:0,floattensor:[8,10,12],flow:2,fly:4,fnn:10,focu:[0,2],folder:0,follow:[0,2,3,4,16,18],foo:0,forbidden:12,forbidden_token:12,forc:[12,16],format:[0,11,13,15,16,17],former:10,forward:[3,8,10,15],fotran:3,found:17,foundat:2,fp16:[15,16],fp32:[4,8,15,16],frac:2,fraction:[13,15,16],framework:[5,15],free:[0,11],freez:[10,15],freeze_word_vec:10,freeze_word_vecs_dec:15,freeze_word_vecs_enc:15,french:1,frequenc:[13,15,16],from:[2,3,8,10,12,15,16,17],from_opt:[8,10],frozenset:12,full:[0,3,10,11,13,15,16,17],full_context_align:[10,15],fulli:[3,10],further:[13,15],fusedadam:15,gag:[10,19],gao:19,gap:19,garg:15,gate:15,gather:8,gating_output:10,gehr:19,gelu:15,gener:[0,2,3,4,5,8,10,12,16,19],generator_funct:15,german:4,get:[5,6,19],ggnn:15,git:[7,18],github:[7,10,15,18],give:[3,15,16],given:[2,3,11],global:10,global_attent:15,global_attention_funct:15,global_scor:12,globalattent:10,glove:15,gnmt:12,gnmtglobalscor:12,going:12,gold:12,gold_scor:12,gold_sent:12,gomez:19,gone:15,good:[0,15],googl:[0,12,16,19],gpu:[3,4,6,11,12,15,16],gpu_backend:15,gpu_rank:[4,15],gpu_verbose_level:[8,15],gpuid:15,grad:8,gradient:[8,15],graham:19,gram:12,grangier:19,graph:15,gre:6,greater:12,greedy_search:12,greedysearch:12,group:[10,15,16],groupwis:3,grow:12,gru:[10,15],gtx1080:16,guid:[7,10,18],guidelin:5,guillaum:7,h_j:10,h_s:10,h_t:10,had:16,haddow:19,hand:3,handl:[0,8],happen:12,has:[2,3,12,13,15,16],has_cov_pen:12,has_len_pen:12,has_tgt:12,have:[0,3,4,10,12,15,16],head:[2,4,10,15],head_count:10,help:[0,2,16],helsinki:[7,18],here:[2,12,17],hidden:[1,8,10,15],hidden_ab_s:15,hidden_dim:2,hidden_s:10,hieu:19,high:3,higher:[12,15,16],highest:16,hold:12,hook:10,hop:[1,2],host:6,how:[0,10],howev:[0,8,10],html:[0,15],http:[2,6,7,10,15,16,17,18,19],huge:15,human:[3,19],hyp_:4,hyperbol:2,hyphen:3,hypothesi:4,iclr:19,identifi:16,idl:3,ids:3,ignor:[4,10,13,15,16],ignore_when_block:[12,16],illia:19,ilya:19,imag:8,impact:15,implement:[2,8,10,15],impli:[2,10],improv:[10,12,15,19],in_channel:10,in_config:3,includ:[0,3,10,13,15,16],incorpor:15,increas:3,independ:1,index:[6,10,15],indic:[2,8,10,12,13,15,16],individu:3,inf:12,infer:12,inferfeat:5,info:[15,16],inform:[1,2,3,15,16],ingredi:12,init:[10,15],init_scal:10,init_st:[8,10],initi:[5,8,10,11,12],initial_accumulator_valu:15,inner:10,inp:12,inp_seq_len:12,inproceed:7,input:[2,5,8,10,11,12,13,15,16,17,19],input_fe:15,input_feed:10,input_format:4,input_from_dec:10,input_len:10,input_s:10,input_sentence_s:17,inputfeedrnndecod:10,inputs_len:10,inputt:12,insert:[13,15,16],insert_ratio:[13,15,16],instal:[0,4,5],instanc:[8,10,12],instanti:8,instead:[0,3,6,10,13,15,16],instruct:15,int8:16,integ:12,integr:0,interact:6,interfac:[8,10],intermedi:[1,2],intermediate_output:[2,5],intern:11,interv:15,introduc:[2,3],introduct:3,intuit:5,invalid:[13,15,16],involv:2,is_finish:12,is_on_top:10,isn:12,item:10,iter:8,its:[0,3,10],itself:3,jakob:19,jean:7,jinsong:19,job:6,joiner:[13,15,16],jointli:[10,19],jona:19,jone:19,journal:0,json:14,kaiser:19,keep:[11,12,15],keep_checkpoint:[4,15],keep_stat:15,keep_topk:12,keep_topp:12,kei:10,kera:15,kernel_s:[10,15],key_len:10,kim:7,klau:19,klein:7,krikun:19,kwarg:10,kyunghyun:19,label:15,label_smooth:[4,15],lambda:[13,15,16],lambda_align:15,lambda_coverag:15,lang:3,lang_a:3,lang_b:3,lang_pair:[3,16],languag:[1,2,5,13,15,17],language_pair:17,lapata:19,last:[3,10,15,16],latent_arrai:1,lattent_arrai:1,latter:10,layer:[1,2,10,15,16],layer_cach:10,layer_norm_modul:10,layer_type_to_cl:2,layernorm:15,layerstack:3,lead:12,learn:[2,8,10,15,19],learning_r:[4,8,15],learning_rate_decai:15,learning_rate_decay_fn:8,least:0,leav:[3,15],left:2,lei:19,len:[8,10,12],length:[3,5,8,10,12,13,15,16,17],length_averag:12,length_non:12,length_pen:12,length_penalti:[12,16],length_wu:12,less:3,let:[3,4],level:[13,15],lib:6,librari:15,like:[0,12,16],limit:16,lin:[1,2,15],linattentionbridgelay:5,line:[0,4,13,15,16],linear:[1,2],linear_warmup:15,linguist:[10,19],link:[0,2,6],list:[0,3,8,10,11,12,13,15,16],list_model:11,liter:1,literatur:15,liu:19,ll17:[10,19],llion:19,load:[6,8,10,11,15],load_model:11,load_pretrained_vector:10,loader:5,local:[0,3],localhost:15,log:[5,8,12],log_fil:[15,16],log_file_level:[15,16],log_prob:12,log_tensorboard:8,logger:12,login:6,logit:[12,16],logsumexp:12,longer:[1,16],longest:12,longtensor:[8,10,12],look:[0,7,10,16],loop:8,loss:[5,15],loss_scal:15,losscomputebas:8,love:0,lower:[1,3,15],lpm15:[10,19],lsl:[12,19],lstm:[10,15],lua:11,lukasz:19,luong:[10,15,19],lustrep1:6,lustrep2:6,lza17:[10,19],macherei:19,machin:[7,10,12,19],made:3,magic:12,mai:[3,8,11,12,13,15],main:[0,7,8,13,15,16],maintain:12,make:[0,6,8,13,15,16],make_shard_st:8,mammoth:[5,6,7],man:19,manag:8,mani:[8,12,15],manipul:8,manual:[11,12],many2on:1,map:[3,8,10],margin:10,marian:15,mark:15,marker:11,mask:[10,13,15,16],mask_length:[13,15,16],mask_or_step:10,mask_ratio:[13,15,16],massiv:[3,7],master:[10,15],master_ip:15,master_port:15,match:11,mathbb:2,mathbf:2,mathemat:2,matric:2,matrix:[2,10,15],matrixtre:10,max:[8,10,12,17],max_generator_batch:[4,15],max_grad_norm:[4,8,15],max_len:10,max_length:[12,16],max_relative_posit:[10,15],max_sent_length:16,max_sentence_length:17,max_siz:8,maxim:19,maximum:[13,15,16],maybe_convert_align:11,maybe_detoken:11,maybe_detokenize_with_align:11,maybe_postprocess:11,maybe_preprocess:11,maybe_token:11,maybe_update_forbidden_token:12,maybe_update_target_prefix:12,mean:[3,10,11,15,16],meanencod:10,mechan:[2,3,10],mem:6,memori:[5,10,11,15],memory_bank:[10,12],memory_length:10,merg:[10,15],meta:3,metadata:8,method:[8,10,15],metric:16,mi250:6,michael:19,mike:19,min_length:[12,16],minh:19,minimum:[13,15,16],mirella:19,mirror:15,mix:8,mkdir:[6,17],mlp:[10,15],mode:[3,13,15,16],model:[2,3,5,10,12,13],model_dim:10,model_dtyp:[4,8,15],model_id:11,model_kwarg:11,model_prefix:17,model_root:11,model_sav:8,model_step:4,model_task:15,model_typ:15,modelsaverbas:8,modif:8,modifi:[0,12],modul:[0,1,2,5,6,8,15,16],modular:7,mohammad:19,monolingu:3,more:[0,1,3,12,13,15,16],most:[12,16],mostli:8,move:[11,15],moving_averag:[8,15],much:15,multi:[0,1,2,10],multiheadedattent:[2,10],multilingu:[3,7],multipl:[0,2,3,8,10,15,16],multipli:2,multplic:0,must:[3,10,11,15],mymodul:6,n_batch:8,n_best:[11,12,16],n_bucket:15,n_correct:8,n_edge_typ:15,n_node:15,n_sampl:[4,13,15],n_seg:11,n_src_word:8,n_step:15,n_word:8,name:[0,3,5,12,13,15,17],namespac:11,napoleon:0,nccl:15,necessari:[0,4,6,8,12,15,16],necessit:3,need:[0,3,4,8,10,15,19],neg:[11,15],network:[10,19],neubig:19,neural:[7,10,12,19],neuro:19,never:12,news_commentari:4,next:[3,8,12,16],nfeat:10,ngram:[12,16],nightmar:3,niki:19,nlp:[7,18],nmt:[8,12,15,16],nmtmodel:[8,10],noam:[4,15,19],noamwd:15,node:[3,6,8,15],node_rank:15,nois:3,non:[10,12,15],none:[1,8,10,11,12,13,15,16],nonetyp:[10,12],nonzero_threshold:[13,15,16],norm:[10,15],norm_method:8,normal:[2,4,8,15],normalz:8,norouzi:19,note:[0,3,4,6,12],noth:[0,8],notset:[15,16],ntask:6,nucleu:16,num_lay:10,num_step:8,num_thread:13,number:[2,3,8,10,12,13,15,16],numer:[13,15,16],nvidia:15,obj:[0,8],object:[0,8,11,12,13,15,17],oder:3,off:15,ofi:6,often:[13,15,16],on_timemout:11,on_timeout:11,onc:[12,15],one2mani:1,one:[0,2,3,8,10,13,15,16],onli:[3,8,12,13,15,16],onmt:[0,8,10,11,12,15,17],onmt_build_vocab:4,onmt_token:[13,15,16],onmt_transl:4,onmttok:5,open:7,opennmt:[0,3,6,7,8,14],oper:[2,10],operatornam:2,opt:[4,8,10,11,15,16],opt_level:15,optim:[4,5],option:[0,3,4,6,8,10,11,12,13,15,16,17],opu:5,opus100:[17,18],ord:19,order:[3,15],org:[2,6,7,15,16,19],origin:[2,15,17],oriol:19,other:[2,6,8,12,13,15,16,17,19],other_lang:17,otherwis:[3,10,15,16],our:[6,12],our_stat:8,out:[2,3,7,8,18],out_channel:10,out_config:3,out_fil:12,outcom:2,output:[1,2,3,4,8,10,11,12,13,15,16],output_model:16,output_s:10,over:[0,1,3,4,8,10,12,15,16,17],overal:[1,2],overrid:[10,12,13,15],overridden:10,overview:5,overwrit:[6,13,15],own:[8,16],ownership:8,p17:7,p18:15,packag:[6,11],pad:[8,10,12],pad_idx:10,pair:[3,8,11,15,16,17],paper:[0,2,10,15],parallel:[10,12,13,15],parallel_path:12,parallelcorpu:12,param:8,param_init:[4,15],param_init_glorot:[4,15],paramet:[1,4,8,10,11,12,13,15,16],parameter:10,parenthes:0,parmar:19,pars:[10,11],parse_opt:11,part:[1,2,12],partial:1,particular:[0,3,10],partit:6,pass:[2,3,8,10,11,15],past:[0,15],path:[3,6,10,11,12,13,15,16],path_src:4,path_tgt:4,patienc:8,pattern:[3,13,15,16],pdf:15,pen:12,penalti:[5,12,13,15],penaltybuild:12,peopl:6,per:[0,3,13,15,16],perceiv:[2,15],perceiverattentionbridgelay:[1,5],percentag:[13,15,16],perfom:15,perform:[2,10,15],permut:[13,15,16],permute_sent_ratio:[13,15,16],perplex:8,peter:19,pfs:6,pham:19,phrase_t:[12,16],piec:4,pip3:[6,7,18],pip:[0,6],pipelin:[13,15,16],pleas:[0,7],plu:15,point:19,pointer:[10,19],poisson:[13,15,16],poisson_lambda:[13,15,16],polosukhin:19,polyak_decai:10,pool:[1,10,15],port:[14,15],portal:7,pos_ffn_activation_fn:[10,15],posit:[10,15],position_encod:[10,15],position_ffn:10,positionalencod:10,positionwisefeedforward:[10,15],possibl:[3,8,10,11,12,13,15,16],postprocess:11,postprocess_opt:11,potenti:12,pouta:17,ppl:8,pproach:19,pre:[8,11,12],pre_word_vecs_dec:15,pre_word_vecs_enc:15,preced:3,precis:8,pred:16,pred_scor:12,pred_sent:12,predict:[8,12,16],prefer:0,prefix:[3,8,13,15,16],prefix_seq_len:12,preliminari:4,preload:11,preload_model:11,prepar:[5,12],prepare_wmt_data:4,preprint:19,preprocess:11,preprocess_opt:11,presenc:3,presum:12,pretrain:[10,15],prevent:[12,16],previou:[2,3,10,12],previous:2,primari:3,prime:2,print:[8,15,16],prior:4,prior_token:[13,15,16],prob:12,proba:16,probabl:[10,12,13,15,16],probil:10,problem:12,proc:[7,19],procedur:3,process:[2,8,11,13,15],processu:11,produc:[1,2,12,13,15,16],product:2,projappl:6,project:[0,1,2,6,7,10],project_2005099:6,project_462000125:6,propag:8,proper:11,properli:6,properti:[8,10],proport:[3,13,15,16],provid:[7,16],prune:5,pty:6,pull_request_chk:0,punct_threshold:[13,15,16],punctuat:[0,13,15,16],push:1,put:12,pwd:17,pyonmttok:[13,15,16],python3:[3,6],python:[0,3,6,15],pythonpath:6,pythonuserbas:6,pytorch:[0,6,10],qin:19,quantiz:16,queri:10,query_len:10,question:5,queue:[13,15],queue_siz:[4,15],quickstart:[5,7],quoc:19,quot:0,rais:[13,15],random:[5,13,15],random_ratio:[13,15,16],random_sampling_temp:[12,16],random_sampling_topk:[12,16],random_sampling_topp:[12,16],randomli:12,rang:16,rank:[12,15],ranslat:19,rare:12,rate:[5,8],rather:0,ratio:[12,13,15,16],raw:[10,12,16],rccl:6,reach:12,read:[0,3,11,17],readabl:[0,3],reader:5,readm:15,rebuild:11,rebuild_seg_packag:11,receiv:3,recent:15,recip:10,recommend:15,recommonmark:0,rectifi:2,recurr:10,redund:3,ref:0,refer:[0,2,5],regardless:3,regist:10,regular:[13,15,16],rel:[10,15],relat:[4,13,15,16],relationship:2,relev:[10,12],relu:[2,10,15],rememb:0,remov:[3,13,15,16],renorm:15,reorder:12,rep_max_len:[13,15,16],rep_min_len:[13,15,16],rep_threshold:[13,15,16],repeat:[12,13,15,16],repetit:16,replac:[1,12,13,15,16],replace_length:[13,15,16],replace_unk:[12,16],replic:10,report:[7,8,15,16],report_align:[12,16],report_everi:[4,15],report_manag:8,report_scor:12,report_stats_from_paramet:[8,15],report_tim:[12,16],reportmgrbas:8,repres:[2,8],represent:[1,2,10,15,19],reproduc:5,requir:[0,8,10,15],research:7,reset:8,reset_optim:15,resett:15,residu:10,resourc:3,respect:[2,3],respons:8,rest:14,restrict:[13,15,16],result:[2,11,15],return_attent:12,return_hidden:10,reus:[1,10,15],reuse_copy_attn:[10,15],revers:[13,15,16],reversible_token:[13,15,16],rico:19,right:[0,2],rmsnorm:15,rnn:[8,10,15,19],rnn_dropout:10,rnn_size:[4,15],rnn_type:[10,15],rnndecoderbas:10,rnnencod:10,roblem:19,rocm5:6,rocm:6,root:[2,3],rotat:[13,15,16],rotate_ratio:[13,15,16],roundrobin:15,row:3,rsqrt:15,rst:0,run:[0,3,4,8,10,11,15,16],rush:7,sacrebleu:[4,6,7,18],sai:3,samantao:6,same:[0,3,4,10,11,15],sampl:[5,12,13,15,17],sample_with_temperatur:12,sampling_temp:12,saniti:16,save:[5,8,13,15,16,17],save_all_gpu:15,save_checkpoint_step:[4,8,15],save_config:[13,15,16],save_data:[4,13,15],save_model:[4,15],saver:8,scale:[10,12,15],schedul:[8,15],schuster:19,score:[5,10,11,13,15,16],scorer:12,scratch:6,script:[0,4,5,6],search:[0,3,5,12],second:[2,3,10,11],secur:[13,15],see:[3,10,11,12,13,15,19],seed:[4,12,13,15,16],seem:1,seemingli:15,seen:2,segment:[3,11,13,15,16],select:[10,12,15],select_index:12,self:[1,2,10,11,12,15],self_attn_typ:[10,15],send:[0,15],senellart:7,sennrich:19,sensibl:0,sent:[8,15,16],sent_numb:12,sentenc:[1,12,13,15,16,17],sentencepiec:[3,4,6,7,13,15,16,18],separ:[3,10],seper:11,seq2seq:[12,15],seq:12,seq_len:[2,10,12],seqlength:10,sequenc:[1,2,3,8,10,11,12,13,15,16,19],serial:10,serv:2,server:[5,15,17],servermodel:11,servermodelerror:11,session:6,set:[2,3,4,6,8,10,11,12,13,15,16],setup:[4,10],sever:[3,10,12],sgd:15,sh16:[10,19],shape:[0,1,10,12],shard:[8,15,16],shard_siz:[8,16],share:[1,6,13,15,16],share_decoder_embed:[4,15],share_embed:[4,15],share_vocab:[13,15],shazeer:19,shortest:12,shot:3,should:[3,4,10,12,15],shuf:17,shuffle_input_sent:17,side:[3,8,11,13,15,16],side_a:3,side_b:3,sign:[13,15,16],silent:[4,10,13,15],similar:[2,3,10,15],simpl:[2,8,15],simpleattentionbridgelay:5,simpli:10,simulatan:10,sin:15,sinc:10,singl:[0,11,15],single_pass:15,sinusoid:10,site:6,size:[3,8,10,12,13,15,16,17],skip:[3,13,15],skip_embed:10,skip_empty_level:[4,13,15],slen:10,slm17:[10,19],slow:[13,16],slurm:[3,6],smaller:[13,15,16],smooth:[13,15,16],softmax:[1,2,10,15,16],some:[0,1,3,8,16],someth:0,sometim:0,sort:[11,17],sorted_pair:3,sourc:[0,1,3,5,6,7,8,10,11,12,13,15],sp_path:17,space:[0,2,15],spacer:[13,15,16],span:[13,15,16],spars:10,sparseadam:15,sparsemax:[10,15],sparsesoftmax:1,specif:[1,2,3,7,12,13,15,18],specifi:[2,10,13,15,16],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm_decod:4,spm_encod:[4,17],spm_train:17,sqrt:2,squar:[2,3],src:[3,4,8,10,11,12,13,15,16,17],src_embed:15,src_feat:16,src_feats_vocab:[13,15],src_file_path:12,src_ggnn_size:15,src_group:3,src_lang:[3,16],src_languag:3,src_len:[8,10],src_length:12,src_map:[10,12],src_onmttok_kwarg:[13,15,16],src_raw:12,src_seq_length:[4,13,15,16],src_seq_length_trunc:15,src_subword_alpha:[4,13,15,16],src_subword_model:[4,13,15,16],src_subword_nbest:[4,13,15,16],src_subword_typ:[13,15,16],src_subword_vocab:[13,15,16],src_vocab:[4,12,13,15],src_vocab_s:15,src_vocab_threshold:[13,15,16],src_word_vec_s:15,src_words_min_frequ:15,sru:[5,15],srun:6,stabl:2,stack:[1,10,15,16],stage:2,stand:0,standard:[10,15,16],start:[3,5,6,8,11,15,17],start_decay_step:15,stat:[8,15],stat_list:8,state:[8,10,12,15],state_dict:15,state_dim:15,statist:[8,15],stdout:8,stdrnndecod:10,step:[2,3,5,8,10,12,15,16],stepwis:10,stepwise_penalti:[12,16],still:0,stop:[13,15,16],store:15,str:[0,8,10,11,12],strategi:[5,8,15],stride:10,string:[8,10,13,15,16],structur:[1,2,5,19],structured_attent:10,style:[0,10,15],styleguid:0,subclass:[8,10,12],subcompon:3,subdirectori:6,subsequ:2,subset:17,substitut:3,substr:[13,15,16],subword:[3,5],suggest:15,sum:[8,10,12,15],sum_:10,sume:8,summar:19,summari:[0,12,16],superclass:0,supervis:[3,10,15],support:[0,3,10,15],suppos:17,sure:[6,12],sutskev:19,switchout:[5,19],switchout_temperatur:[13,15,16],symmetr:3,system:[12,15,19],tab:[13,15],tabl:[10,16],take:[2,3,7,10,13,15,16],taken:10,tangent:2,tanh:[2,10],tao:19,taolei87:10,tar:17,target:[3,5,8,10,11,12,13,15],target_prefix:12,task:[3,4,5,8,12],task_distribution_strategi:15,task_queue_manag:8,tatoeba:[3,5],tau:[13,15,16],technic:7,temperatur:[3,12,13,15,16],templat:3,tend:1,tensor:[0,8,10,12],tensorboard:[8,15],tensorboard_log_dir:15,tensorflow:15,term:[2,10],termin:[13,15,16],test:[0,4,6,10],testset:4,text:[8,10,12,15,16,19],tgt:[3,4,8,10,11,13,15,16],tgt_dict:10,tgt_embed:15,tgt_file_path:12,tgt_group:3,tgt_lang:[3,16],tgt_languag:3,tgt_len:[8,10],tgt_onmttok_kwarg:[13,15,16],tgt_pad_mask:10,tgt_prefix:[12,16],tgt_sent:12,tgt_seq_length:[4,13,15,16],tgt_seq_length_trunc:15,tgt_subword_alpha:[4,13,15,16],tgt_subword_model:[4,13,15,16],tgt_subword_nbest:[4,13,15,16],tgt_subword_typ:[13,15,16],tgt_subword_vocab:[13,15,16],tgt_vocab:[4,8,13,15],tgt_vocab_s:15,tgt_vocab_threshold:[13,15,16],tgt_word_vec_s:15,tgt_words_min_frequ:15,than:[0,1,12,15,17],thang:19,thant:12,thei:[2,10,12],them:[3,10],theorem:10,thi:[0,2,3,4,6,7,8,10,12,13,15,16],thin:8,thing:[0,3],thoroughli:10,thread:13,three:[2,10],threshold:[13,15,16],through:[2,3,8],thu:8,tic:0,tick:0,time:[2,3,6,8,12,13,15,16],timeout:11,timer:11,titl:7,tlen:10,to_cpu:11,to_gpu:11,todo:[6,10,17],tok:11,token:[4,8,10,11,12,13,15,16],token_drop:5,token_mask:5,tokendrop:[13,15,16],tokendrop_temperatur:[13,15,16],tokenizer_mark:11,tokenizer_opt:11,tokenmask:[13,15,16],tokenmask_temperatur:[13,15,16],too:12,tool:5,toolkit:7,top:[2,10,12,16],topk_id:12,topk_scor:12,torch:[0,6,8,10,15],torchtext:8,total:[3,8,15],trail:0,train:[3,5,6,7,8,10,19],train_extremely_large_corpu:17,train_from:15,train_it:8,train_loss:8,train_loss_md:8,train_step:[4,8,15],trainabl:[1,8],trainer:5,training_step:8,transform:[1,2,4,5,8,19],transformer_ff:[4,15],transformer_lm:15,transformerattentionbridgelay:5,transformerdecod:10,transformerdecoderbas:10,transformerencod:10,transformerencoderlay:2,translat:[1,3,5,7,8,10,11,14,19],translate_batch:12,translation_serv:11,translationbuild:12,translationserv:11,transpos:1,travi:0,tree:10,trg:3,triang:3,trick:[5,10],trivial:10,trunc_siz:8,truncat:[8,15],truncated_decod:15,trust:17,ttention:19,turn:[10,15],tutori:[5,18],two:[2,3,10],txt:[0,16,17],type:[0,2,3,5,8,10,11,12,13,16],typic:[8,15],u_a:10,under:[3,15,16],undergo:2,undergon:2,underli:12,uniform:15,unigram:[13,15,16],union:0,unit:[2,10],unittest:0,unk:[12,16],unknown:12,unless:3,unload:11,unload_model:11,unmodifi:12,unnecessari:[0,1,3],unnorm:10,unset:3,unsqueez:1,until:[12,16],unwieldli:3,updat:[6,8,11,12,15],update_dropout:10,update_finish:12,update_learning_r:15,update_n_src_word:8,update_vocab:15,upgrad:6,upper:3,url:[6,7,19],url_root:14,usag:[5,13,14,15,16],use:[0,2,3,4,6,8,10,11,12,13,15,16,17],use_bridg:10,use_relu:10,use_tanh:10,used:[1,2,3,4,8,10,11,12,13,15,16],useful:8,user:[6,8,10,11],uses:[0,3,10,12,15],using:[0,2,3,7,10,11,12,13,15,16],uszkoreit:19,util:[2,8],v11:4,v_a:10,valid:[4,8,13,15,16],valid_batch_s:[4,15],valid_it:8,valid_loss:8,valid_loss_md:8,valid_step:[4,8,15],valu:[2,3,8,10,11,12,13,15,16],variabl:[3,6,12],variat:0,vaswani:19,vaswanispujgkp17:0,vector:[10,15],venv:6,verbos:[12,15,16],veri:[0,16],version:[10,11,12],via:[1,10,15,19],view:1,vinyal:19,virtual:6,visit:0,visual:15,vocab:[4,5,8,10,12],vocab_path:[13,15],vocab_s:[12,15,17],vocab_sample_queue_s:13,vocab_size_multipl:15,vocabulari:[3,8,10,13,15,16,17],vsp:[10,19],w_a:10,wai:[3,12],wait:3,wang:19,want:[3,16],warmup:15,warmup_step:[4,15],warn:[13,15,16],weight:[2,3,4,10,15,16],weight_decai:15,weighted_sampl:15,weightnormconv2d:10,well:[0,15],wget:17,what:[3,5,8,11],when:[0,3,7,10,12,13,15,16,17],where:[1,2,4,6,10,12,13,15,16],wherea:[12,15],whether:[8,10,11,12,13,15,16],which:[1,3,10,12,15],whl:6,whole:[4,12],whose:16,why:2,wiki:15,wikipedia:15,window:[13,15,16],wise:2,with_align:8,within:[2,10,11],without:[0,10,15],wmt14_en_d:4,wmt:4,wmtend:4,wojciech:19,wolfgang:19,word2vec:15,word:[2,10,12,13,15,16],word_align:12,word_lut:10,word_padding_idx:10,word_ratio_threshold:[13,15,16],word_vec_s:[4,10,15],word_vocab_s:10,work:[0,3,12,15],workflow:7,world_siz:[4,15],would:[3,12,15],wpdn18:[13,15,16,19],wrap:11,wrapper:8,writabl:3,write:[3,8],writer:8,written:4,wsc:[12,19],www:15,xavier_uniform:15,xent:8,xinyi:19,xiong:19,xzvf:17,yaml:[4,13,15,16],yang:19,yann:19,yarat:19,year:7,yet:[10,12],yml:0,yoav:19,yonghui:19,yoon:7,yoshua:19,you:[0,3,4,6,10,15,16,19],your:[0,3,6,16,17],your_venv_nam:6,your_vevn_nam:6,yourself:7,yuan:19,yuntian:7,zaremba:19,zero:[3,8,10,12,13,15,16],zero_grad:8,zhang:19,zhifeng:19,zihang:19,zxs18:[10,19]},titles:["Contributors","Questions","Attention Bridge","Config-config tool","Translation","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"class":12,The:3,actual:3,adapt:[3,15],adapter_config:3,ae_path:3,ae_transform:3,align:15,allocate_devic:3,altern:3,architectur:10,argument:14,attent:[2,10,15],autoencod:3,bank:1,bart:[13,15,16],beam:16,behind:1,between:1,bridg:[2,15],build:[4,13],challeng:17,citat:7,cluster_languag:3,command:3,common:[13,15,16],complete_language_pair:3,config:3,config_al:3,config_config:3,configur:[13,15,16],content:5,contributor:0,conv2conv:10,copi:10,core:[10,11],corpora:3,corpora_schedul:3,data:[4,9,13,15,16,17,18],dataset:9,dec_sharing_group:3,decod:[1,10,12,15,16],differ:1,direct:17,distanc:3,distance_matrix:3,docstr:0,download:[4,17],dynam:15,effici:16,embed:15,enc_sharing_group:3,encod:[1,10,15],encoder_output:1,evalu:4,featur:15,feedforwardattentionbridgelay:2,filter:[13,15,16],fix:1,framework:8,gener:[1,15],get:17,group:3,guidelin:0,inferfeat:[13,15,16],initi:15,input:3,instal:[6,7,18],intermediate_output:1,intuit:1,kei:3,languag:[3,16],length:1,level:3,linattentionbridgelay:2,line:3,loader:9,log:[15,16],loss:8,lumi:6,mahti:6,mammoth:18,manual:3,matrix:3,memori:1,model:[1,4,8,11,15,16,17],modul:10,n_gpus_per_nod:3,n_group:3,n_node:3,name:14,need:1,onmttok:[13,15,16],optim:[8,15],opu:17,other:3,overrid:3,overview:7,paramet:3,pars:17,path:17,penalti:16,perceiverattentionbridgelay:2,prepar:[4,17,18],prune:15,puhti:6,question:1,quickstart:18,random:16,rate:15,reader:9,refer:19,relev:17,remove_temporary_kei:3,reproduc:[13,15,16],run:6,sampl:16,save:1,score:12,search:16,sentencepiec:17,separ:1,server:[11,14],set:17,set_transform:3,share:3,sharing_group:3,shot:17,simpleattentionbridgelay:2,sourc:16,specifi:3,src_path:3,sru:10,stage:3,step:[4,17,18],strategi:12,structur:10,subword:[4,13,15,16],supervis:17,switchout:[13,15,16],target:16,task:15,tatoeba:17,test:17,tgt_path:3,than:3,token_drop:[13,15,16],token_mask:[13,15,16],tool:3,top:3,train:[4,15,17],trainer:8,transform:[3,10,13,15,16],transformerattentionbridgelay:2,translat:[4,12,16,17],translation_config:3,translation_config_dir:3,trick:16,type:15,usag:3,use_introduce_at_training_step:3,use_weight:3,valid:17,variabl:17,vocab:[13,15,17],vocabulari:4,what:1,why:1,yaml:3,zero:17,zero_shot:3}}) \ No newline at end of file