diff --git a/_modules/onmt/trainer.html b/_modules/onmt/trainer.html index 53e16c0b..971bd762 100644 --- a/_modules/onmt/trainer.html +++ b/_modules/onmt/trainer.html @@ -558,12 +558,12 @@
valid_stats=valid_stats,
)
- # Run patience mechanism
- if self.earlystopper is not None:
- self.earlystopper(valid_stats, step)
- # If the patience has reached the limit, stop training
- if self.earlystopper.has_stopped():
- break
+ # # Run patience mechanism
+ # if self.earlystopper is not None:
+ # self.earlystopper(valid_stats, step)
+ # # If the patience has reached the limit, stop training
+ # if self.earlystopper.has_stopped():
+ # break
if self.model_saver is not None and (save_checkpoint_steps != 0 and step % save_checkpoint_steps == 0):
self.model_saver.save(step, moving_average=self.moving_average)
@@ -625,10 +625,6 @@ Source code for onmt.trainer
# Set model back to training mode.
valid_model.train()
- for p in self.model.parameters():
- if hasattr(p, 'has_grad'):
- p.has_grad = False
-
return stats
def _gradient_accumulation_over_lang_pairs(
@@ -643,7 +639,7 @@ Source code for onmt.trainer
seen_comm_batches.add(comm_batch)
if self.norm_method == "tokens":
num_tokens = (
- batch.labels[1:, :, 0].ne(self.train_loss_md[f'trainloss{metadata.tgt_lang}'].padding_idx).sum()
+ batch.tgt[1:, :, 0].ne(self.train_loss_md[f'trainloss{metadata.tgt_lang}'].padding_idx).sum()
)
normalization += num_tokens.item()
else:
@@ -663,9 +659,6 @@ Source code for onmt.trainer
if src_lengths is not None:
report_stats.n_src_words += src_lengths.sum().item()
- # tgt_outer corresponds to the target-side input. The expected
- # decoder output will be read directly from the batch:
- # cf. `onmt.utils.loss.CommonLossCompute._make_shard_state`
tgt_outer = batch.tgt
bptt = False
diff --git a/_modules/onmt/utils/loss.html b/_modules/onmt/utils/loss.html
index 003c46ea..10e968a6 100644
--- a/_modules/onmt/utils/loss.html
+++ b/_modules/onmt/utils/loss.html
@@ -357,19 +357,19 @@ Source code for onmt.utils.loss
batch_stats.update(stats)
return None, batch_stats
- def _stats(self, loss, scores, labels):
+ def _stats(self, loss, scores, target):
"""
Args:
loss (:obj:`FloatTensor`): the loss computed by the loss criterion.
scores (:obj:`FloatTensor`): a score for each possible output
- labels (:obj:`FloatTensor`): true targets
+ target (:obj:`FloatTensor`): true targets
Returns:
:obj:`onmt.utils.Statistics` : statistics for this batch.
"""
pred = scores.max(1)[1]
- non_padding = labels.ne(self.padding_idx)
- num_correct = pred.eq(labels).masked_select(non_padding).sum().item()
+ non_padding = target.ne(self.padding_idx)
+ num_correct = pred.eq(target).masked_select(non_padding).sum().item()
num_non_padding = non_padding.sum().item()
return onmt.utils.Statistics(loss.item(), num_non_padding, num_correct)
@@ -399,14 +399,14 @@ Source code for onmt.utils.loss
self.confidence = 1.0 - label_smoothing
- def forward(self, output, labels):
+ def forward(self, output, target):
"""
output (FloatTensor): batch_size x n_classes
- labels (LongTensor): batch_size
+ target (LongTensor): batch_size
"""
- model_prob = self.one_hot.repeat(labels.size(0), 1)
- model_prob.scatter_(1, labels.unsqueeze(1), self.confidence)
- model_prob.masked_fill_((labels == self.ignore_index).unsqueeze(1), 0)
+ model_prob = self.one_hot.repeat(target.size(0), 1)
+ model_prob.scatter_(1, target.unsqueeze(1), self.confidence)
+ model_prob.masked_fill_((target == self.ignore_index).unsqueeze(1), 0)
return F.kl_div(output, model_prob, reduction='sum')
@@ -440,14 +440,12 @@ Source code for onmt.utils.loss
)
shard_state.update({"std_attn": attns.get("std"), "coverage_attn": coverage})
- def _compute_loss(
- self, batch, output, target, labels, std_attn=None, coverage_attn=None, align_head=None, ref_align=None
- ):
+ def _compute_loss(self, batch, output, target, std_attn=None, coverage_attn=None, align_head=None, ref_align=None):
bottled_output = self._bottle(output)
scores = self.generator(bottled_output)
- gtruth = labels.view(-1)
+ gtruth = target.view(-1)
loss = self.criterion(scores, gtruth)
if self.lambda_coverage != 0.0:
@@ -507,9 +505,7 @@ Source code for onmt.utils.loss
range_end = range_[1]
shard_state = {
"output": output,
- # TODO: target here is likely unnecessary, as it now corresponds to target-side input
"target": batch.tgt[range_start:range_end, :, 0],
- "labels": batch.labels[range_start:range_end, :, 0],
}
if self.lambda_coverage != 0.0:
self._add_coverage_shard_state(shard_state, attns)
diff --git a/_sources/config_config.md.txt b/_sources/config_config.md.txt
index d64773b1..650439de 100644
--- a/_sources/config_config.md.txt
+++ b/_sources/config_config.md.txt
@@ -34,7 +34,7 @@ The meta-parameters under the `config_config` key:
Path templates for source and target corpora, respectively.
The path templates can contain the following variables that will be substituted by `config_config`:
-- Directional corpus mode
+- Directional corpus mode
- `{src_lang}`: The source language of the task
- `{tgt_lang}`: The target language of the task
- `{lang_pair}`: `{src_lang}-{tgt_lang}` for convenience
@@ -99,7 +99,7 @@ Generate translation configs for zero-shot directions.
#### `transforms` and `ae_transforms`
A list of transforms, for translation tasks and autoencoder tasks, respectively.
-Use this to apply subword segmentation, e.g. using `sentencepiece`, and `denoising` noise for autoencoder.
+Use this to apply subword segmentation, e.g. using `sentencepiece`, and `bart` noise for autoencoder.
Both of these may change the sequence length, necessitating a `filtertoolong` transform.
#### `enc_sharing_groups` and `dec_sharing_groups`
diff --git a/config_config.html b/config_config.html
index c5cd31bf..9c38542f 100644
--- a/config_config.html
+++ b/config_config.html
@@ -321,7 +321,7 @@ zero_shot
transforms
and ae_transforms
¶
A list of transforms, for translation tasks and autoencoder tasks, respectively.
-Use this to apply subword segmentation, e.g. using sentencepiece
, and denoising
noise for autoencoder.
+Use this to apply subword segmentation, e.g. using sentencepiece
, and bart
noise for autoencoder.
Both of these may change the sequence length, necessitating a filtertoolong
transform.
diff --git a/index.html b/index.html
index 2de8cc8b..d5511b43 100644
--- a/index.html
+++ b/index.html
@@ -241,14 +241,18 @@ ContentsConfiguration
Data
Vocab
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Reproducibility
@@ -258,14 +262,18 @@ ContentsVocab
Pruning
Embeddings
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Model-Embeddings
Model-Embedding Features
Model- Task
@@ -295,14 +303,18 @@ ContentsDecoding tricks
Logging
Efficiency
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Source and Target Languages
diff --git a/options/build_vocab.html b/options/build_vocab.html
index a54d1cbc..a678e8b7 100644
--- a/options/build_vocab.html
+++ b/options/build_vocab.html
@@ -107,14 +107,18 @@
Configuration
Data
Vocab
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Reproducibility
@@ -199,13 +203,27 @@ Build Vocabusage: build_vocab.py [-h] [-config CONFIG] [-save_config SAVE_CONFIG] -data
DATA [-skip_empty_level {silent,warning,error}]
- [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]]
+ [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]]
-save_data SAVE_DATA [-overwrite] [-n_sample N_SAMPLE]
[-dump_samples] [-num_threads NUM_THREADS]
[-vocab_sample_queue_size VOCAB_SAMPLE_QUEUE_SIZE]
-src_vocab SRC_VOCAB [-tgt_vocab TGT_VOCAB]
[-share_vocab] [-vocab_paths VOCAB_PATHS]
[-src_feats_vocab SRC_FEATS_VOCAB]
+ [--permute_sent_ratio PERMUTE_SENT_RATIO]
+ [--rotate_ratio ROTATE_RATIO]
+ [--insert_ratio INSERT_RATIO]
+ [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
+ [--mask_length {subword,word,span-poisson}]
+ [--poisson_lambda POISSON_LAMBDA]
+ [--replace_length {-1,0,1}]
+ [--src_seq_length SRC_SEQ_LENGTH]
+ [--tgt_seq_length TGT_SEQ_LENGTH]
+ [--word_ratio_threshold WORD_RATIO_THRESHOLD]
+ [--rep_threshold REP_THRESHOLD]
+ [--rep_min_len REP_MIN_LEN] [--rep_max_len REP_MAX_LEN]
+ [--punct_threshold PUNCT_THRESHOLD]
+ [--nonzero_threshold NONZERO_THRESHOLD]
[--reversible_tokenization {joiner,spacer}]
[--prior_tokenization]
[-switchout_temperature SWITCHOUT_TEMPERATURE]
@@ -224,17 +242,7 @@ Build Vocab[-src_subword_type {none,sentencepiece,bpe}]
[-tgt_subword_type {none,sentencepiece,bpe}]
[-src_onmttok_kwargs SRC_ONMTTOK_KWARGS]
- [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS]
- [--src_seq_length SRC_SEQ_LENGTH]
- [--tgt_seq_length TGT_SEQ_LENGTH]
- [--permute_sent_ratio PERMUTE_SENT_RATIO]
- [--rotate_ratio ROTATE_RATIO]
- [--insert_ratio INSERT_RATIO]
- [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
- [--mask_length {subword,word,span-poisson}]
- [--poisson_lambda POISSON_LAMBDA]
- [--replace_length {-1,0,1}]
- [--denoising_objective {bart,mass}] [--seed SEED]
+ [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] [--seed SEED]
@@ -260,7 +268,7 @@ Data¶
Default: “warning”
-transforms, --transforms
-Possible choices: filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, filtertoolong, prefix, denoising
+Possible choices: bart, filtertoolong, filterwordratio, filterrepetitions, filterterminalpunct, filternonzeronumerals, filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, prefix
Default transform pipeline to apply to data. Can be specified in each corpus of data to override.
Default: []
@@ -310,6 +318,106 @@ Vocab
+
+Transform/BART¶
+
+Caution
+This transform will not take effect when building vocabulary.
+
+
+- --permute_sent_ratio, -permute_sent_ratio
+Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
+Default: 0.0
+
+- --rotate_ratio, -rotate_ratio
+Rotate this proportion of inputs.
+Default: 0.0
+
+- --insert_ratio, -insert_ratio
+Insert this percentage of additional random tokens.
+Default: 0.0
+
+- --random_ratio, -random_ratio
+Instead of using <mask>, use random token this often.
+Default: 0.0
+
+- --mask_ratio, -mask_ratio
+Fraction of words/subwords that will be masked.
+Default: 0.0
+
+- --mask_length, -mask_length
+Possible choices: subword, word, span-poisson
+Length of masking window to apply.
+Default: “subword”
+
+- --poisson_lambda, -poisson_lambda
+Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
+Default: 3.0
+
+- --replace_length, -replace_length
+Possible choices: -1, 0, 1
+When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
+Default: -1
+
+
+
+
+Transform/Filter¶
+
+- --src_seq_length, -src_seq_length
+Maximum source sequence length.
+Default: 200
+
+- --tgt_seq_length, -tgt_seq_length
+Maximum target sequence length.
+Default: 200
+
+
+
+
+Transform/Filter¶
+
+- --word_ratio_threshold, -word_ratio_threshold
+Threshold for discarding sentences based on word ratio.
+Default: 3
+
+
+
+
+Transform/Filter¶
+
+- --rep_threshold, -rep_threshold
+Number of times the substring is repeated.
+Default: 2
+
+- --rep_min_len, -rep_min_len
+Minimum length of the repeated pattern.
+Default: 3
+
+- --rep_max_len, -rep_max_len
+Maximum length of the repeated pattern.
+Default: 100
+
+
+
+
+Transform/Filter¶
+
+- --punct_threshold, -punct_threshold
+Minimum penalty score for discarding sentences based on their terminal punctuation signs
+Default: -2
+
+
+
+
+Transform/Filter¶
+
+- --nonzero_threshold, -nonzero_threshold
+Threshold for discarding sentences based on numerals between the segments with zeros removed
+Default: 0.5
+
+
+
Transform/InferFeats¶
-
-Transform/Denoising AE¶
-
-- --permute_sent_ratio, -permute_sent_ratio
-Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
-Default: 0.0
-
-- --rotate_ratio, -rotate_ratio
-Rotate this proportion of inputs.
-Default: 0.0
-
-- --insert_ratio, -insert_ratio
-Insert this percentage of additional random tokens.
-Default: 0.0
-
-- --random_ratio, -random_ratio
-Instead of using <mask>, use random token this often. Incompatible with MASS
-Default: 0.0
-
-- --mask_ratio, -mask_ratio
-Fraction of words/subwords that will be masked.
-Default: 0.0
-
-- --mask_length, -mask_length
-Possible choices: subword, word, span-poisson
-Length of masking window to apply.
-Default: “subword”
-
-- --poisson_lambda, -poisson_lambda
-Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
-Default: 3.0
-
-- --replace_length, -replace_length
-Possible choices: -1, 0, 1
-When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
-Default: -1
-
-- --denoising_objective
-Possible choices: bart, mass
-choose between BART-style or MASS-style denoising objectives
-Default: “bart”
-
-
-
Reproducibility¶
diff --git a/options/train.html b/options/train.html
index 129969dc..b47a45bc 100644
--- a/options/train.html
+++ b/options/train.html
@@ -110,14 +110,18 @@
Vocab
Pruning
Embeddings
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Model-Embeddings
Model-Embedding Features
Model- Task
@@ -216,7 +220,7 @@ Train
usage: train.py [-h] [-config CONFIG] [-save_config SAVE_CONFIG] -data DATA
[-skip_empty_level {silent,warning,error}]
- [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]]
+ [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]]
[-save_data SAVE_DATA] [-overwrite] [-n_sample N_SAMPLE]
[-dump_transforms] -src_vocab SRC_VOCAB [-tgt_vocab TGT_VOCAB]
[-share_vocab] [-vocab_paths VOCAB_PATHS]
@@ -232,6 +236,18 @@ Train
[-src_embeddings SRC_EMBEDDINGS]
[-tgt_embeddings TGT_EMBEDDINGS]
[-embeddings_type {GloVe,word2vec}]
+ [--permute_sent_ratio PERMUTE_SENT_RATIO]
+ [--rotate_ratio ROTATE_RATIO] [--insert_ratio INSERT_RATIO]
+ [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
+ [--mask_length {subword,word,span-poisson}]
+ [--poisson_lambda POISSON_LAMBDA] [--replace_length {-1,0,1}]
+ [--src_seq_length SRC_SEQ_LENGTH]
+ [--tgt_seq_length TGT_SEQ_LENGTH]
+ [--word_ratio_threshold WORD_RATIO_THRESHOLD]
+ [--rep_threshold REP_THRESHOLD] [--rep_min_len REP_MIN_LEN]
+ [--rep_max_len REP_MAX_LEN]
+ [--punct_threshold PUNCT_THRESHOLD]
+ [--nonzero_threshold NONZERO_THRESHOLD]
[--reversible_tokenization {joiner,spacer}]
[--prior_tokenization]
[-switchout_temperature SWITCHOUT_TEMPERATURE]
@@ -251,14 +267,6 @@ Train
[-tgt_subword_type {none,sentencepiece,bpe}]
[-src_onmttok_kwargs SRC_ONMTTOK_KWARGS]
[-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS]
- [--src_seq_length SRC_SEQ_LENGTH]
- [--tgt_seq_length TGT_SEQ_LENGTH]
- [--permute_sent_ratio PERMUTE_SENT_RATIO]
- [--rotate_ratio ROTATE_RATIO] [--insert_ratio INSERT_RATIO]
- [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
- [--mask_length {subword,word,span-poisson}]
- [--poisson_lambda POISSON_LAMBDA] [--replace_length {-1,0,1}]
- [--denoising_objective {bart,mass}]
[--src_word_vec_size SRC_WORD_VEC_SIZE]
[--tgt_word_vec_size TGT_WORD_VEC_SIZE]
[--word_vec_size WORD_VEC_SIZE] [--share_decoder_embeddings]
@@ -377,7 +385,7 @@ Data¶
Default: “warning”
-transforms, --transforms
-Possible choices: filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, filtertoolong, prefix, denoising
+Possible choices: bart, filtertoolong, filterwordratio, filterrepetitions, filterterminalpunct, filternonzeronumerals, filterfeats, inferfeats, switchout, tokendrop, tokenmask, sentencepiece, bpe, onmt_tokenize, prefix
Default transform pipeline to apply to data. Can be specified in each corpus of data to override.
Default: []
@@ -468,6 +476,102 @@ Embeddings
+Transform/BART¶
+
+- --permute_sent_ratio, -permute_sent_ratio
+Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
+Default: 0.0
+
+- --rotate_ratio, -rotate_ratio
+Rotate this proportion of inputs.
+Default: 0.0
+
+- --insert_ratio, -insert_ratio
+Insert this percentage of additional random tokens.
+Default: 0.0
+
+- --random_ratio, -random_ratio
+Instead of using <mask>, use random token this often.
+Default: 0.0
+
+- --mask_ratio, -mask_ratio
+Fraction of words/subwords that will be masked.
+Default: 0.0
+
+- --mask_length, -mask_length
+Possible choices: subword, word, span-poisson
+Length of masking window to apply.
+Default: “subword”
+
+- --poisson_lambda, -poisson_lambda
+Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
+Default: 3.0
+
+- --replace_length, -replace_length
+Possible choices: -1, 0, 1
+When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
+Default: -1
+
+
+
+
+Transform/Filter¶
+
+- --src_seq_length, -src_seq_length
+Maximum source sequence length.
+Default: 200
+
+- --tgt_seq_length, -tgt_seq_length
+Maximum target sequence length.
+Default: 200
+
+
+
+
+Transform/Filter¶
+
+- --word_ratio_threshold, -word_ratio_threshold
+Threshold for discarding sentences based on word ratio.
+Default: 3
+
+
+
+
+Transform/Filter¶
+
+- --rep_threshold, -rep_threshold
+Number of times the substring is repeated.
+Default: 2
+
+- --rep_min_len, -rep_min_len
+Minimum length of the repeated pattern.
+Default: 3
+
+- --rep_max_len, -rep_max_len
+Maximum length of the repeated pattern.
+Default: 100
+
+
+
+
+Transform/Filter¶
+
+- --punct_threshold, -punct_threshold
+Minimum penalty score for discarding sentences based on their terminal punctuation signs
+Default: -2
+
+
+
+
+Transform/Filter¶
+
+- --nonzero_threshold, -nonzero_threshold
+Threshold for discarding sentences based on numerals between the segments with zeros removed
+Default: 0.5
+
+
+
Transform/InferFeats¶
-
-Transform/Denoising AE¶
-
-- --permute_sent_ratio, -permute_sent_ratio
-Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
-Default: 0.0
-
-- --rotate_ratio, -rotate_ratio
-Rotate this proportion of inputs.
-Default: 0.0
-
-- --insert_ratio, -insert_ratio
-Insert this percentage of additional random tokens.
-Default: 0.0
-
-- --random_ratio, -random_ratio
-Instead of using <mask>, use random token this often. Incompatible with MASS
-Default: 0.0
-
-- --mask_ratio, -mask_ratio
-Fraction of words/subwords that will be masked.
-Default: 0.0
-
-- --mask_length, -mask_length
-Possible choices: subword, word, span-poisson
-Length of masking window to apply.
-Default: “subword”
-
-- --poisson_lambda, -poisson_lambda
-Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
-Default: 3.0
-
-- --replace_length, -replace_length
-Possible choices: -1, 0, 1
-When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
-Default: -1
-
-- --denoising_objective
-Possible choices: bart, mass
-choose between BART-style or MASS-style denoising objectives
-Default: “bart”
-
-
-
Model-Embeddings¶
diff --git a/options/translate.html b/options/translate.html
index f583e4cb..5bbccc07 100644
--- a/options/translate.html
+++ b/options/translate.html
@@ -116,14 +116,18 @@
Decoding tricks
Logging
Efficiency
+Transform/BART
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
+Transform/Filter
Transform/InferFeats
Transform/SwitchOut
Transform/Token_Drop
Transform/Token_Mask
Transform/Subword/Common
Transform/Subword/ONMTTOK
-Transform/Filter
-Transform/Denoising AE
Source and Target Languages
@@ -228,7 +232,21 @@ Translate[--dump_beam DUMP_BEAM] [--n_best N_BEST]
[--batch_size BATCH_SIZE] [--batch_type {sents,tokens}]
[--gpu GPU]
- [-transforms {filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} [{filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,filtertoolong,prefix,denoising} ...]]
+ [-transforms {bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} [{bart,filtertoolong,filterwordratio,filterrepetitions,filterterminalpunct,filternonzeronumerals,filterfeats,inferfeats,switchout,tokendrop,tokenmask,sentencepiece,bpe,onmt_tokenize,prefix} ...]]
+ [--permute_sent_ratio PERMUTE_SENT_RATIO]
+ [--rotate_ratio ROTATE_RATIO]
+ [--insert_ratio INSERT_RATIO]
+ [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
+ [--mask_length {subword,word,span-poisson}]
+ [--poisson_lambda POISSON_LAMBDA]
+ [--replace_length {-1,0,1}]
+ [--src_seq_length SRC_SEQ_LENGTH]
+ [--tgt_seq_length TGT_SEQ_LENGTH]
+ [--word_ratio_threshold WORD_RATIO_THRESHOLD]
+ [--rep_threshold REP_THRESHOLD]
+ [--rep_min_len REP_MIN_LEN] [--rep_max_len REP_MAX_LEN]
+ [--punct_threshold PUNCT_THRESHOLD]
+ [--nonzero_threshold NONZERO_THRESHOLD]
[--reversible_tokenization {joiner,spacer}]
[--prior_tokenization]
[-switchout_temperature SWITCHOUT_TEMPERATURE]
@@ -247,18 +265,8 @@ Translate[-src_subword_type {none,sentencepiece,bpe}]
[-tgt_subword_type {none,sentencepiece,bpe}]
[-src_onmttok_kwargs SRC_ONMTTOK_KWARGS]
- [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS]
- [--src_seq_length SRC_SEQ_LENGTH]
- [--tgt_seq_length TGT_SEQ_LENGTH]
- [--permute_sent_ratio PERMUTE_SENT_RATIO]
- [--rotate_ratio ROTATE_RATIO]
- [--insert_ratio INSERT_RATIO]
- [--random_ratio RANDOM_RATIO] [--mask_ratio MASK_RATIO]
- [--mask_length {subword,word,span-poisson}]
- [--poisson_lambda POISSON_LAMBDA]
- [--replace_length {-1,0,1}]
- [--denoising_objective {bart,mass}] --src_lang SRC_LANG
- --tgt_lang TGT_LANG --stack STACK
+ [-tgt_onmttok_kwargs TGT_ONMTTOK_KWARGS] --src_lang
+ SRC_LANG --tgt_lang TGT_LANG --stack STACK
[--output_model OUTPUT_MODEL]
@@ -499,12 +507,108 @@ Efficiency
+Transform/BART¶
+
+- --permute_sent_ratio, -permute_sent_ratio
+Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
+Default: 0.0
+
+- --rotate_ratio, -rotate_ratio
+Rotate this proportion of inputs.
+Default: 0.0
+
+- --insert_ratio, -insert_ratio
+Insert this percentage of additional random tokens.
+Default: 0.0
+
+- --random_ratio, -random_ratio
+Instead of using <mask>, use random token this often.
+Default: 0.0
+
+- --mask_ratio, -mask_ratio
+Fraction of words/subwords that will be masked.
+Default: 0.0
+
+- --mask_length, -mask_length
+Possible choices: subword, word, span-poisson
+Length of masking window to apply.
+Default: “subword”
+
+- --poisson_lambda, -poisson_lambda
+Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
+Default: 3.0
+
+- --replace_length, -replace_length
+Possible choices: -1, 0, 1
+When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
+Default: -1
+
+
+
+
+Transform/Filter¶
+
+- --src_seq_length, -src_seq_length
+Maximum source sequence length.
+Default: 200
+
+- --tgt_seq_length, -tgt_seq_length
+Maximum target sequence length.
+Default: 200
+
+
+
+
+Transform/Filter¶
+
+- --word_ratio_threshold, -word_ratio_threshold
+Threshold for discarding sentences based on word ratio.
+Default: 3
+
+
+
+
+Transform/Filter¶
+
+- --rep_threshold, -rep_threshold
+Number of times the substring is repeated.
+Default: 2
+
+- --rep_min_len, -rep_min_len
+Minimum length of the repeated pattern.
+Default: 3
+
+- --rep_max_len, -rep_max_len
+Maximum length of the repeated pattern.
+Default: 100
+
+
+
+
+Transform/Filter¶
+
+- --punct_threshold, -punct_threshold
+Minimum penalty score for discarding sentences based on their terminal punctuation signs
+Default: -2
+
+
+
+
+Transform/Filter¶
+
+- --nonzero_threshold, -nonzero_threshold
+Threshold for discarding sentences based on numerals between the segments with zeros removed
+Default: 0.5
+
+
+
Transform/InferFeats¶
-
-Transform/Denoising AE¶
-
-- --permute_sent_ratio, -permute_sent_ratio
-Permute this proportion of sentences (boundaries defined by [‘.’, ‘?’, ‘!’]) in all inputs.
-Default: 0.0
-
-- --rotate_ratio, -rotate_ratio
-Rotate this proportion of inputs.
-Default: 0.0
-
-- --insert_ratio, -insert_ratio
-Insert this percentage of additional random tokens.
-Default: 0.0
-
-- --random_ratio, -random_ratio
-Instead of using <mask>, use random token this often. Incompatible with MASS
-Default: 0.0
-
-- --mask_ratio, -mask_ratio
-Fraction of words/subwords that will be masked.
-Default: 0.0
-
-- --mask_length, -mask_length
-Possible choices: subword, word, span-poisson
-Length of masking window to apply.
-Default: “subword”
-
-- --poisson_lambda, -poisson_lambda
-Lambda for Poisson distribution to sample span length if -mask_length set to span-poisson.
-Default: 3.0
-
-- --replace_length, -replace_length
-Possible choices: -1, 0, 1
-When masking N tokens, replace with 0, 1, or N tokens. (use -1 for N)
-Default: -1
-
-- --denoising_objective
-Possible choices: bart, mass
-choose between BART-style or MASS-style denoising objectives
-Default: “bart”
-
-
-
Source and Target Languages¶
diff --git a/searchindex.js b/searchindex.js
index 7e3f7143..859f8a1a 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/Translation","index","install","main","onmt","onmt.inputters","onmt.modules","onmt.translate.translation_server","onmt.translation","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/Translation.md","index.rst","install.md","main.md","onmt.rst","onmt.inputters.rst","onmt.modules.rst","onmt.translate.translation_server.rst","onmt.translation.rst","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"onmt.Trainer":{train:[8,1,1,""],validate:[8,1,1,""]},"onmt.decoders":{CNNDecoder:[10,0,1,""],DecoderBase:[10,0,1,""],InputFeedRNNDecoder:[10,0,1,""],StdRNNDecoder:[10,0,1,""],TransformerDecoder:[10,0,1,""]},"onmt.decoders.CNNDecoder":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.decoders.DecoderBase":{from_opt:[10,1,1,""]},"onmt.decoders.TransformerDecoder":{forward:[10,1,1,""]},"onmt.decoders.decoder":{RNNDecoderBase:[10,0,1,""]},"onmt.decoders.decoder.RNNDecoderBase":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.encoders":{CNNEncoder:[10,0,1,""],EncoderBase:[10,0,1,""],MeanEncoder:[10,0,1,""],RNNEncoder:[10,0,1,""],TransformerEncoder:[10,0,1,""]},"onmt.encoders.CNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.EncoderBase":{forward:[10,1,1,""]},"onmt.encoders.MeanEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.RNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.TransformerEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.models":{NMTModel:[8,0,1,""]},"onmt.models.NMTModel":{count_parameters:[8,1,1,""],forward:[8,1,1,""]},"onmt.models.sru":{SRU:[10,0,1,""]},"onmt.models.sru.SRU":{forward:[10,1,1,""]},"onmt.modules":{AverageAttention:[10,0,1,""],ConvMultiStepAttention:[10,0,1,""],CopyGenerator:[10,0,1,""],Embeddings:[10,0,1,""],GlobalAttention:[10,0,1,""],MultiHeadedAttention:[10,0,1,""],PositionalEncoding:[10,0,1,""],WeightNormConv2d:[10,0,1,""]},"onmt.modules.AverageAttention":{cumulative_average:[10,1,1,""],cumulative_average_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.ConvMultiStepAttention":{apply_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.CopyGenerator":{forward:[10,1,1,""]},"onmt.modules.Embeddings":{emb_luts:[10,1,1,""],forward:[10,1,1,""],load_pretrained_vectors:[10,1,1,""],word_lut:[10,1,1,""]},"onmt.modules.GlobalAttention":{forward:[10,1,1,""],score:[10,1,1,""]},"onmt.modules.MultiHeadedAttention":{forward:[10,1,1,""],training:[10,2,1,""],update_dropout:[10,1,1,""]},"onmt.modules.PositionalEncoding":{forward:[10,1,1,""]},"onmt.modules.WeightNormConv2d":{forward:[10,1,1,""]},"onmt.modules.position_ffn":{PositionwiseFeedForward:[10,0,1,""]},"onmt.modules.position_ffn.PositionwiseFeedForward":{forward:[10,1,1,""]},"onmt.modules.structured_attention":{MatrixTree:[10,0,1,""]},"onmt.modules.structured_attention.MatrixTree":{forward:[10,1,1,""]},"onmt.translate":{BeamSearch:[12,0,1,""],DecodeStrategy:[12,0,1,""],GNMTGlobalScorer:[12,0,1,""],GreedySearch:[12,0,1,""],Translation:[12,0,1,""],TranslationBuilder:[12,0,1,""],Translator:[12,0,1,""]},"onmt.translate.BeamSearch":{initialize:[12,1,1,""]},"onmt.translate.DecodeStrategy":{advance:[12,1,1,""],block_ngram_repeats:[12,1,1,""],initialize:[12,1,1,""],maybe_update_forbidden_tokens:[12,1,1,""],maybe_update_target_prefix:[12,1,1,""],target_prefixing:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.GreedySearch":{advance:[12,1,1,""],initialize:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.Translation":{log:[12,1,1,""]},"onmt.translate.Translator":{translate_batch:[12,1,1,""]},"onmt.translate.greedy_search":{sample_with_temperature:[12,3,1,""]},"onmt.translate.penalties":{PenaltyBuilder:[12,0,1,""]},"onmt.translate.penalties.PenaltyBuilder":{coverage_none:[12,1,1,""],coverage_summary:[12,1,1,""],coverage_wu:[12,1,1,""],length_average:[12,1,1,""],length_none:[12,1,1,""],length_wu:[12,1,1,""]},"onmt.translate.translation_server":{ServerModel:[11,0,1,""],ServerModelError:[11,4,1,""],Timer:[11,0,1,""],TranslationServer:[11,0,1,""]},"onmt.translate.translation_server.ServerModel":{build_tokenizer:[11,1,1,""],detokenize:[11,1,1,""],do_timeout:[11,1,1,""],maybe_convert_align:[11,1,1,""],maybe_detokenize:[11,1,1,""],maybe_detokenize_with_align:[11,1,1,""],maybe_postprocess:[11,1,1,""],maybe_preprocess:[11,1,1,""],maybe_tokenize:[11,1,1,""],parse_opt:[11,1,1,""],postprocess:[11,1,1,""],preprocess:[11,1,1,""],rebuild_seg_packages:[11,1,1,""],to_gpu:[11,1,1,""],tokenize:[11,1,1,""],tokenizer_marker:[11,1,1,""]},"onmt.translate.translation_server.TranslationServer":{clone_model:[11,1,1,""],list_models:[11,1,1,""],load_model:[11,1,1,""],preload_model:[11,1,1,""],run:[11,1,1,""],start:[11,1,1,""],unload_model:[11,1,1,""]},"onmt.utils":{Optimizer:[8,0,1,""],Statistics:[8,0,1,""]},"onmt.utils.Optimizer":{amp:[8,1,1,""],backward:[8,1,1,""],from_opt:[8,1,1,""],learning_rate:[8,1,1,""],step:[8,1,1,""],training_step:[8,1,1,""],zero_grad:[8,1,1,""]},"onmt.utils.Statistics":{accuracy:[8,1,1,""],all_gather_stats:[8,1,1,""],all_gather_stats_list:[8,1,1,""],elapsed_time:[8,1,1,""],log_tensorboard:[8,1,1,""],output:[8,1,1,""],ppl:[8,1,1,""],update:[8,1,1,""],xent:[8,1,1,""]},"onmt.utils.loss":{LossComputeBase:[8,0,1,""]},onmt:{Trainer:[8,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:function","4":"py:exception"},terms:{"0473v3":19,"25g":6,"abstract":10,"boolean":[8,12],"break":17,"class":[0,5,8,10,11],"default":[11,13,14,15,16,17],"export":6,"final":[2,4,10,12],"float":[3,10,12],"function":[0,1,2,3,8,10,11,12,15],"import":0,"int":[8,10,11,12],"long":0,"new":[0,2,4],"public":6,"return":[0,8,10,11,12],"static":[8,15],"true":[3,4,8,10,12,15,16,17],"try":[0,6],"while":[3,10],And:[0,10],But:1,EOS:12,For:[0,1,3,12,15,18],IDs:12,IFS:17,LPs:3,Not:0,One:3,The:[1,2,4,8,10,11,12,15,16],Then:[0,4,10],There:[2,3],These:[2,3,10,12],Use:[3,15,16],Used:12,Will:[1,3],__init__:11,_compute_loss:8,a_j:10,aan:[10,15],aan_useffn:[10,15],ab_fixed_length:15,ab_lay:15,ab_layer_norm:15,abbrevi:0,abigail:19,abil:10,about:0,abov:[0,12],abs:[2,15,16,19],acceler:[10,19],accept:[0,3,12],access:[2,3,6],accord:3,account:[3,6],accross:8,accum:8,accum_count:[4,8,15],accum_step:[4,8,15],accumul:[8,15],accuraci:[8,12],achiev:3,achin:19,acl:[7,19],aclweb:15,act:1,action:[10,12,15],activ:[1,2,6,10,15],activation_fn:10,activationfunct:10,actual:12,adadelta:15,adafactor:15,adagrad:15,adagrad_accumulator_init:15,adam:[4,15],adam_beta1:15,adam_beta2:[4,15],adamoptim:15,adamw:15,adapt:[5,7],adapter_nam:3,add:[0,4,10],added:3,adding:0,addit:[0,10,13,15,16],addition:10,address:12,adjust:3,adopt:15,adpot:10,advanc:[12,15],advic:0,after:[0,2,10,12,15],afterward:10,again:0,agnost:1,aidan:19,alexand:7,algorithm:19,align:[5,8,10,11,12,16,19],align_debug:16,alignment_head:[10,15],alignment_lay:[10,15],aliv:12,alive_attn:12,alive_seq:12,all:[0,3,8,10,12,13,15,16,19],all_gather_stat:8,all_gather_stats_list:8,all_preprocess:11,allennlp:0,alloc:3,allow:[0,2,3,15],almost:[12,15],alon:0,along:2,alpha:[2,12,16],alphabet:3,alreadi:[13,15,16],also:[0,3,6,8,10,15],altern:10,although:10,alwai:[0,3],amp:[8,15],ani:[0,1,3,12,13,15],annurev:19,anoth:[0,2,8],antholog:15,apex:15,apex_opt_level:15,api:[0,5],api_doc:15,appear:3,append:[6,17],appli:[2,3,10,12,13,15,16],applic:16,apply_mask:10,approach:10,appropri:12,approxim:15,architectur:[2,5],arg:[0,10,11],argmax:16,argpars:11,argument:[0,5],around:10,artzi:19,arxiv:[0,2,15,16,19],ashish:19,assig:3,assign:[3,16],assing:3,assum:[10,12],att_typ:2,attend:2,attent:[0,1,5,8,12,16,19],attention_bridg:8,attention_dropout:[4,10,15],attentionbridgenorm:2,attet:10,attn:[10,12,16],attn_debug:[12,16],attn_func:10,attn_typ:10,attr:11,attribut:12,augment:19,auli:19,author:[7,10],autodoc:0,autogener:15,avail:[8,10,11,15,16],available_model:14,averag:[10,15,16,19],average_decai:[4,8,15],average_everi:[8,15],average_output:10,averageattent:10,avg:[10,16],avg_raw_prob:16,avoid:[0,3],aws:6,axi:12,back:8,backend:15,backward:8,bahdanau:[10,15,19],ban_unk_token:[12,16],bank:[5,10],barri:19,bart:[13,15,16],base:[0,2,3,4,6,7,8,10,11,12,13,15,16,19],base_target_emb:10,baselin:15,basemodel:8,basenam:[4,17],bash:6,batch:[2,4,8,10,12,15,16],batch_siz:[4,10,12,15,16],batch_size_multipl:[4,15],batch_typ:[4,15,16],bcb14:[10,19],beam:[5,12],beam_search:12,beam_siz:[4,12,16],beamsearch:12,beamsearchbas:12,becaus:[3,16],becom:3,been:[10,12,13,15,16],befor:[0,4,11,12,15,16],begin:[8,12],behind:5,below:0,ben:3,benefit:1,bengali:3,bengio:19,best:[12,16],beta1:15,beta2:15,beta:[12,16],better:[0,13,15,16],between:[2,5,10,13,15,16,19],beyond:8,biao:19,bib:0,bibtex:0,bibtext:0,bidir_edg:15,bidirect:[10,15],bidirectional_encod:10,bin:[6,15],binari:[4,10],bit:16,blank:0,bleu:4,blob:10,block:[12,16],block_ngram_repeat:[12,16],booktitl:7,bool:[8,10,11,12],bos:12,both:[3,12,15],both_embed:15,boundari:[13,15,16],bpe:[13,15,16],bptt:[8,15],bridg:[1,5,19],bridge_extra_nod:15,bring:1,brnn:15,browser:0,bucket_s:[4,15],buffer:8,build:[0,5,8,10,11,12,16,17],build_token:11,build_vocab:13,built:8,bytetensor:12,cach:10,calcul:[2,8,10,12],call:[10,12],callabl:12,callback:8,can:[2,3,4,6,8,11,12,13,15,16],cancel:11,candid:[3,13,15,16],cao:19,capit:0,captur:2,care:10,cat:17,categor:12,categori:12,challeng:5,chang:[0,3,8,15],channel:2,charact:[0,16],character_coverag:17,check:[0,7,18],checklist:0,checkpoint:[4,8,15],chen:19,chmod:[4,6],cho:19,choic:[0,10,13,15,16],choos:[0,13,15,16],chosen:12,christoph:19,citat:[0,5],cite:[0,7],classmethod:[8,10],clear:0,clone:[7,11,18],clone_model:11,close:0,cls:8,cluster:[3,7,18],clutter:0,cnn:[10,15,19],cnn_kernel_width:[10,15],cnndecod:10,cnnencod:10,code:[0,3,6,16],code_dir:6,codebas:6,column:3,com:[7,10,18],combin:[10,16],comma:3,command:[4,5],comment:0,commentari:4,common:[0,5],commoncrawl:4,commun:0,complet:12,complex:[3,12],compon:[2,3],composit:15,comput:[2,3,4,8,10,15,16],concat:[10,15],concaten:15,condit:[12,15,16],conf:[14,16],config:[4,5,11,13,14,15,16],config_fil:11,configur:[3,4,5],connect:2,consid:[3,10,17],consider:15,consist:[0,10],constant:3,construct:10,constructor:[0,10],consum:15,contain:[1,3,10,11,12],content:[0,16],context:[2,10,15],context_attn:10,context_g:[10,15],contextg:10,continu:0,contribut:[0,2,10],contributor:5,control:[3,8],conv2conv:5,conv2d:10,conv:[10,15],conveni:3,convent:0,convers:12,convert:11,convex:10,convmultistepattent:10,convolut:[10,19],copi:[0,3,5,6,15,16],copy_attn:[10,12,15],copy_attn_forc:15,copy_attn_typ:[10,15],copy_loss_by_seqlength:15,copygener:10,core:[2,5,8],corpora:4,corpu:[3,4,13,15,17],corr:[0,19],correct:3,correspand:11,correspond:[2,16],could:12,count:[3,8,12,13,15,16],count_paramet:8,cov:12,cov_pen:12,coverag:[10,12,15,16],coverage_attn:[10,15],coverage_non:12,coverage_penalti:[12,16],coverage_summari:12,coverage_wu:12,cpu:[11,15,16],crai:6,crayon:15,creat:[3,6,8],creation:3,criteria:15,criterion:8,critic:[15,16],cross:[8,10,15],cross_queri:1,csc:17,csv:3,ct2_model:11,ct2_translate_batch_arg:11,ct2_translator_arg:11,ctrl:0,cuda_funct:10,cudnn:10,cumbersom:3,cumul:[10,12,16],cumulative_averag:10,cumulative_average_mask:10,cur_dir:17,cur_len:12,current:[3,8,10,12,15],curricula:3,curriculum:3,custom:[11,15],custom_opt:11,cut:[0,17],cutoff:12,d_ff:10,d_model:10,dai:19,data:[2,3,5,8,12,19],data_path:17,data_typ:[8,12,15,16],dataset:[4,5,13,15,16,17],datastructur:11,dauphin:19,david:19,dblp:0,ddress:19,deal:3,debug:[14,15,16],dec:3,dec_lay:[4,15],dec_out:10,dec_rnn_siz:15,decai:15,decay_method:[4,15],decay_step:15,decod:[2,3,5,8],decode_strategi:12,decoder_typ:[4,15],decoderbas:[8,10],decodestrategi:12,def:0,defin:[3,4,10,13,15,16],definit:10,delai:3,delet:[13,15,16],delimit:16,deng:7,deni:19,denois:[3,5],denoising_object:[13,15,16],denot:2,depend:[0,1,3,6,8,10,11],deprec:[15,16],describ:[2,10,11,15],descript:0,desir:[3,4],detail:[7,13,15],determin:3,detoken:[4,11],dev:[6,17],develop:0,devic:[3,10,12,16],device_context:8,deyi:19,diagon:3,dict:[3,8,10,11,12,13,15,16],dict_kei:15,dictionari:[8,10,12,15],differ:[0,2,3,5,10,11,16],dilat:10,dim:10,dimens:[2,10,12,15],dimension:[2,10],dir:17,direct:[0,3,12],directli:[0,10,16],directori:[3,6,11,15],disabl:15,discard:15,discourag:15,disk:15,displai:8,dist:8,distanc:[10,15],distribtut:10,distribut:[3,8,10,12,13,15,16],divers:[2,13,15,16],divid:[2,3,15,16],divis:10,do_timeout:11,doc:0,document:[0,7],doe:[1,3,16],doesn:17,doi:[7,19],doing:[3,16],don:0,done:[4,12,17],dot:[2,10,15],dotprod:15,down:[12,13],download:6,dropout:[4,8,10,13,15,16],dropout_step:[4,8,15],due:15,dump:[13,15,16],dump_beam:[12,16],dump_sampl:13,dump_transform:15,dure:[11,15,16],dynam:[5,10,16],dzmitri:19,each:[2,3,10,12,13,15,16],earli:15,earlier:[2,13,15,16],early_stop:15,early_stopping_criteria:15,earlystopp:8,eas:3,easi:0,easili:3,echo:[4,17],edg:15,effect:[2,10,11,13],effici:[5,8,19],either:[12,15],elaps:8,elapsed_tim:8,element:[2,3],els:[1,17],emb:10,emb_fil:10,emb_lut:10,embed:[2,5,10,13],embedding_s:10,embeddings_typ:15,emerg:2,emnlp:19,emploi:[2,8],empti:[4,10,12,13,15],enabl:[10,16],enc:3,enc_hidden:10,enc_lay:[4,15],enc_rnn_siz:15,encapsul:2,encod:[2,3,5,8,12],encoder_fin:10,encoder_out_combin:10,encoder_out_top:10,encoder_output:5,encoder_typ:[4,15],encoderbas:[8,10],encordec:[13,15],encount:[13,15],encout:[13,15],end:12,eng:3,english:[1,3,4,17],enhanc:[1,2],ensembl:16,ensur:2,entir:17,entri:0,entropi:8,env_dir:6,environ:6,eos:12,epoch:15,eps:10,epsilon:15,equal:[12,15],equat:10,equival:15,error:[0,13,15,16],especi:3,essenti:12,establish:2,eural:19,europarl:4,evalu:8,even:3,event:12,everi:[8,10,15,16],exactli:0,exampl:[0,1,3,4,13,15,18],exce:15,except:[0,11,13,15,16],exclusion_token:12,execut:[4,13,15],exist:[13,15,16,17],exp:15,exp_host:15,expand:1,expect:[3,12],experi:[13,15,16],experiment:15,exponenti:15,extend:[0,10],extern:0,extra:[6,10,15],extra_word:10,extract:17,facilit:2,fail:12,fairseq:0,fals:[8,10,11,12,13,14,15,16],familiar:7,fast:[10,19],faster:[10,15],feat_0:16,feat_1:16,feat_dim_expon:10,feat_merg:[10,15],feat_merge_s:15,feat_padding_idx:10,feat_vec_expon:[10,15],feat_vec_s:[10,15],feat_vocab_s:10,feats0:16,feats1:16,featur:[2,5,8,10,13,16,19],fed:2,feed:[3,10,15],feedforward:[2,15],feedforwardattentionbridgelay:5,feel:0,few:0,ffectiv:19,ffn:[10,15],figur:10,file:[0,3,11,13,15,16,17],filenam:15,filter:[4,5,17],filterfeat:[13,15,16],filtertoolong:[3,4,13,15,16],find:0,firefox:0,first:[0,3,10,12,15],five:2,fix:[0,5,12,15],flag:8,flake8:0,floattensor:[8,10,12],flow:2,fly:4,fnn:10,focu:[0,2],folder:0,follow:[0,2,3,4,16,18],foo:0,forbidden:12,forbidden_token:12,forc:[12,16],format:[0,11,13,15,16,17],former:10,forward:[3,8,10,15],fotran:3,found:17,foundat:2,fp16:[15,16],fp32:[4,8,15,16],frac:2,fraction:[13,15,16],framework:[5,15],free:[0,11],freez:[10,15],freeze_word_vec:10,freeze_word_vecs_dec:15,freeze_word_vecs_enc:15,french:1,frequenc:[13,15,16],from:[2,3,8,10,12,15,16,17],from_opt:[8,10],frozenset:12,full:[0,3,10,11,13,15,16,17],full_context_align:[10,15],fulli:[3,10],further:[13,15],fusedadam:15,gag:[10,19],gao:19,gap:19,garg:15,gate:15,gather:8,gating_output:10,gehr:19,gelu:15,gener:[0,2,3,4,5,8,10,12,16,19],generator_funct:15,german:4,get:[5,6,19],ggnn:15,git:[7,18],github:[7,10,15,18],give:[3,15,16],given:[2,3,11],global:10,global_attent:15,global_attention_funct:15,global_scor:12,globalattent:10,glove:15,gnmt:12,gnmtglobalscor:12,going:12,gold:12,gold_scor:12,gold_sent:12,gomez:19,gone:15,good:[0,15],googl:[0,12,16,19],gpu:[3,4,6,11,12,15,16],gpu_backend:15,gpu_rank:[4,15],gpu_verbose_level:[8,15],gpuid:15,grad:8,gradient:[8,15],graham:19,gram:12,grangier:19,graph:15,gre:6,greater:12,greedy_search:12,greedysearch:12,group:[10,15,16],groupwis:3,grow:12,gru:[10,15],gtx1080:16,guid:[7,10,18],guidelin:5,guillaum:7,h_j:10,h_s:10,h_t:10,had:16,haddow:19,hand:3,handl:[0,8],happen:12,has:[2,3,12,13,15,16],has_cov_pen:12,has_len_pen:12,has_tgt:12,have:[0,3,4,10,12,15,16],head:[2,4,10,15],head_count:10,help:[0,2,16],helsinki:[7,18],here:[2,12,17],hidden:[1,8,10,15],hidden_ab_s:15,hidden_dim:2,hidden_s:10,hieu:19,high:3,higher:[12,15,16],highest:16,hold:12,hook:10,hop:[1,2],host:6,how:[0,10],howev:[0,8,10],html:[0,15],http:[2,6,7,10,15,16,17,18,19],huge:15,human:[3,19],hyp_:4,hyperbol:2,hyphen:3,hypothesi:4,iclr:19,identifi:16,idl:3,ids:3,ignor:[4,10,13,15,16],ignore_when_block:[12,16],illia:19,ilya:19,imag:8,impact:15,implement:[2,8,10,15],impli:[2,10],improv:[10,12,15,19],in_channel:10,in_config:3,includ:[0,3,10,13,15,16],incompat:[13,15,16],incorpor:15,increas:3,independ:1,index:[6,10,15],indic:[2,8,10,12,13,15,16],individu:3,inf:12,infer:12,inferfeat:5,info:[15,16],inform:[1,2,3,15,16],ingredi:12,init:[10,15],init_scal:10,init_st:[8,10],initi:[5,8,10,11,12],initial_accumulator_valu:15,inner:10,inp:12,inp_seq_len:12,inproceed:7,input:[2,5,8,10,11,12,13,15,16,17,19],input_fe:15,input_feed:10,input_format:4,input_from_dec:10,input_len:10,input_s:10,input_sentence_s:17,inputfeedrnndecod:10,inputs_len:10,inputt:12,insert:[13,15,16],insert_ratio:[13,15,16],instal:[0,4,5],instanc:[8,10,12],instanti:8,instead:[0,3,6,10,13,15,16],instruct:15,int8:16,integ:12,integr:0,interact:6,interfac:[8,10],intermedi:[1,2],intermediate_output:[2,5],intern:11,interv:15,introduc:[2,3],introduct:3,intuit:5,invalid:[13,15,16],involv:2,is_finish:12,is_on_top:10,isn:12,item:10,iter:8,its:[0,3,10],itself:3,jakob:19,jean:7,jinsong:19,job:6,joiner:[13,15,16],jointli:[10,19],jona:19,jone:19,journal:0,json:14,kaiser:19,keep:[11,12,15],keep_checkpoint:[4,15],keep_stat:15,keep_topk:12,keep_topp:12,kei:10,kera:15,kernel_s:[10,15],key_len:10,kim:7,klau:19,klein:7,krikun:19,kwarg:10,kyunghyun:19,label:15,label_smooth:[4,15],lambda:[13,15,16],lambda_align:15,lambda_coverag:15,lang:3,lang_a:3,lang_b:3,lang_pair:[3,16],languag:[1,2,5,13,15,17],language_pair:17,lapata:19,last:[3,10,15,16],latent_arrai:1,lattent_arrai:1,latter:10,layer:[1,2,10,15,16],layer_cach:10,layer_norm_modul:10,layer_type_to_cl:2,layernorm:15,layerstack:3,lead:12,learn:[2,8,10,15,19],learning_r:[4,8,15],learning_rate_decai:15,learning_rate_decay_fn:8,least:0,leav:[3,15],left:2,lei:19,len:[8,10,12],length:[3,5,8,10,12,13,15,16,17],length_averag:12,length_non:12,length_pen:12,length_penalti:[12,16],length_wu:12,less:3,let:[3,4],level:[13,15],lib:6,librari:15,like:[0,12,16],limit:16,lin:[1,2,15],linattentionbridgelay:5,line:[0,4,13,15,16],linear:[1,2],linear_warmup:15,linguist:[10,19],link:[0,2,6],list:[0,3,8,10,11,12,13,15,16],list_model:11,liter:1,literatur:15,liu:19,ll17:[10,19],llion:19,load:[6,8,10,11,15],load_model:11,load_pretrained_vector:10,loader:5,local:[0,3],localhost:15,log:[5,8,12],log_fil:[15,16],log_file_level:[15,16],log_prob:12,log_tensorboard:8,logger:12,login:6,logit:[12,16],logsumexp:12,longer:[1,16],longest:12,longtensor:[8,10,12],look:[0,7,10,16],loop:8,loss:[5,15],loss_scal:15,losscomputebas:8,love:0,lower:[1,3,15],lpm15:[10,19],lsl:[12,19],lstm:[10,15],lua:11,lukasz:19,luong:[10,15,19],lustrep1:6,lustrep2:6,lza17:[10,19],macherei:19,machin:[7,10,12,19],made:3,magic:12,mai:[3,8,11,12,13,15],main:[0,7,8,13,15,16],maintain:12,make:[0,6,8,13,15,16],make_shard_st:8,mammoth:[5,6,7],man:19,manag:8,mani:[8,12,15],manipul:8,manual:[11,12],many2on:1,map:[3,8,10],margin:10,marian:15,mark:15,marker:11,mask:[10,13,15,16],mask_length:[13,15,16],mask_or_step:10,mask_ratio:[13,15,16],mass:[13,15,16],massiv:[3,7],master:[10,15],master_ip:15,master_port:15,match:11,mathbb:2,mathbf:2,mathemat:2,matric:2,matrix:[2,10,15],matrixtre:10,max:[8,10,12,17],max_generator_batch:[4,15],max_grad_norm:[4,8,15],max_len:10,max_length:[12,16],max_relative_posit:[10,15],max_sent_length:16,max_sentence_length:17,max_siz:8,maxim:19,maximum:[13,15,16],maybe_convert_align:11,maybe_detoken:11,maybe_detokenize_with_align:11,maybe_postprocess:11,maybe_preprocess:11,maybe_token:11,maybe_update_forbidden_token:12,maybe_update_target_prefix:12,mean:[3,10,11,15,16],meanencod:10,mechan:[2,3,10],mem:6,memori:[5,10,11,15],memory_bank:[10,12],memory_length:10,merg:[10,15],meta:3,metadata:8,method:[8,10,15],metric:16,mi250:6,michael:19,mike:19,min_length:[12,16],minh:19,minimum:16,mirella:19,mirror:15,mix:8,mkdir:[6,17],mlp:[10,15],mode:[3,13,15,16],model:[2,3,5,10,12,13],model_dim:10,model_dtyp:[4,8,15],model_id:11,model_kwarg:11,model_prefix:17,model_root:11,model_sav:8,model_step:4,model_task:15,model_typ:15,modelsaverbas:8,modif:8,modifi:[0,12],modul:[0,1,2,5,6,8,15,16],modular:7,mohammad:19,monolingu:3,more:[0,1,3,12,13,15,16],most:[12,16],mostli:8,move:[11,15],moving_averag:[8,15],much:15,multi:[0,1,2,10],multiheadedattent:[2,10],multilingu:[3,7],multipl:[0,2,3,8,10,15,16],multipli:2,multplic:0,must:[3,10,11,15],mymodul:6,n_batch:8,n_best:[11,12,16],n_bucket:15,n_correct:8,n_edge_typ:15,n_node:15,n_sampl:[4,13,15],n_seg:11,n_src_word:8,n_step:15,n_word:8,name:[0,3,5,12,13,15,17],namespac:11,napoleon:0,nccl:15,necessari:[0,4,6,8,12,15,16],necessit:3,need:[0,3,4,8,10,15,19],neg:[11,15],network:[10,19],neubig:19,neural:[7,10,12,19],neuro:19,never:12,news_commentari:4,next:[3,8,12,16],nfeat:10,ngram:[12,16],nightmar:3,niki:19,nlp:[7,18],nmt:[8,12,15,16],nmtmodel:[8,10],noam:[4,15,19],noamwd:15,node:[3,6,8,15],node_rank:15,nois:3,non:[10,12,15],none:[1,8,10,11,12,13,15,16],nonetyp:[10,12],norm:[10,15],norm_method:8,normal:[2,4,8,15],normalz:8,norouzi:19,note:[0,3,4,6,12],noth:[0,8],notset:[15,16],ntask:6,nucleu:16,num_lay:10,num_step:8,num_thread:13,number:[2,3,8,10,12,13,15,16],nvidia:15,obj:[0,8],object:[0,8,11,12,13,15,16,17],oder:3,off:15,ofi:6,often:[13,15,16],on_timemout:11,on_timeout:11,onc:[12,15],one2mani:1,one:[0,2,3,8,10,13,15,16],onli:[3,8,12,13,15,16],onmt:[0,8,10,11,12,15,17],onmt_build_vocab:4,onmt_token:[13,15,16],onmt_transl:4,onmttok:5,open:7,opennmt:[0,3,6,7,8,14],oper:[2,10],operatornam:2,opt:[4,8,10,11,15,16],opt_level:15,optim:[4,5],option:[0,3,4,6,8,10,11,12,13,15,16,17],opu:5,opus100:[17,18],ord:19,order:[3,15],org:[2,6,7,15,16,19],origin:[2,15,17],oriol:19,other:[2,6,8,12,13,15,16,17,19],other_lang:17,otherwis:[3,10,15,16],our:[6,12],our_stat:8,out:[2,3,7,8,18],out_channel:10,out_config:3,out_fil:12,outcom:2,output:[1,2,3,4,8,10,11,12,13,15,16],output_model:16,output_s:10,over:[0,1,3,4,8,10,12,15,16,17],overal:[1,2],overrid:[10,12,13,15],overridden:10,overview:5,overwrit:[6,13,15],own:[8,16],ownership:8,p17:7,p18:15,packag:[6,11],pad:[8,10,12],pad_idx:10,pair:[3,8,11,15,16,17],paper:[0,2,10,15],parallel:[10,12,13,15],parallel_path:12,parallelcorpu:12,param:8,param_init:[4,15],param_init_glorot:[4,15],paramet:[1,4,8,10,11,12,13,15,16],parameter:10,parenthes:0,parmar:19,pars:[10,11],parse_opt:11,part:[1,2,12],partial:1,particular:[0,3,10],partit:6,pass:[2,3,8,10,11,15],past:[0,15],path:[3,6,10,11,12,13,15,16],path_src:4,path_tgt:4,patienc:8,pattern:3,pdf:15,pen:12,penalti:[5,12,15],penaltybuild:12,peopl:6,per:[0,3,13,15,16],perceiv:[2,15],perceiverattentionbridgelay:[1,5],percentag:[13,15,16],perfom:15,perform:[2,10,15],permut:[13,15,16],permute_sent_ratio:[13,15,16],perplex:8,peter:19,pfs:6,pham:19,phrase_t:[12,16],piec:4,pip3:[6,7,18],pip:[0,6],pipelin:[13,15,16],pleas:[0,7],plu:15,point:19,pointer:[10,19],poisson:[13,15,16],poisson_lambda:[13,15,16],polosukhin:19,polyak_decai:10,pool:[1,10,15],port:[14,15],portal:7,pos_ffn_activation_fn:[10,15],posit:[10,15],position_encod:[10,15],position_ffn:10,positionalencod:10,positionwisefeedforward:[10,15],possibl:[3,8,10,11,12,13,15,16],postprocess:11,postprocess_opt:11,potenti:12,pouta:17,ppl:8,pproach:19,pre:[8,11,12],pre_word_vecs_dec:15,pre_word_vecs_enc:15,preced:3,precis:8,pred:16,pred_scor:12,pred_sent:12,predict:[8,12,16],prefer:0,prefix:[3,8,13,15,16],prefix_seq_len:12,preliminari:4,preload:11,preload_model:11,prepar:[5,12],prepare_wmt_data:4,preprint:19,preprocess:11,preprocess_opt:11,presenc:3,presum:12,pretrain:[10,15],prevent:[12,16],previou:[2,3,10,12],previous:2,primari:3,prime:2,print:[8,15,16],prior:4,prior_token:[13,15,16],prob:12,proba:16,probabl:[10,12,13,15,16],probil:10,problem:12,proc:[7,19],procedur:3,process:[2,8,11,13,15],processu:11,produc:[1,2,12,13,15,16],product:2,projappl:6,project:[0,1,2,6,7,10],project_2005099:6,project_462000125:6,propag:8,proper:11,properli:6,properti:[8,10],proport:[3,13,15,16],provid:[7,16],prune:5,pty:6,pull_request_chk:0,punctuat:0,push:1,put:12,pwd:17,pyonmttok:[13,15,16],python3:[3,6],python:[0,3,6,15],pythonpath:6,pythonuserbas:6,pytorch:[0,6,10],qin:19,quantiz:16,queri:10,query_len:10,question:5,queue:[13,15],queue_siz:[4,15],quickstart:[5,7],quoc:19,quot:0,rais:[13,15],random:[5,13,15],random_ratio:[13,15,16],random_sampling_temp:[12,16],random_sampling_topk:[12,16],random_sampling_topp:[12,16],randomli:12,rang:16,rank:[12,15],ranslat:19,rare:12,rate:[5,8],rather:0,ratio:[12,16],raw:[10,12,16],rccl:6,reach:12,read:[0,3,11,17],readabl:[0,3],reader:5,readm:15,rebuild:11,rebuild_seg_packag:11,receiv:3,recent:15,recip:10,recommend:15,recommonmark:0,rectifi:2,recurr:10,redund:3,ref:0,refer:[0,2,5],regardless:3,regist:10,regular:[13,15,16],rel:[10,15],relat:[4,13,15,16],relationship:2,relev:[10,12],relu:[2,10,15],rememb:0,remov:3,renorm:15,reorder:12,repeat:[12,16],repetit:16,replac:[1,12,13,15,16],replace_length:[13,15,16],replace_unk:[12,16],replic:10,report:[7,8,15,16],report_align:[12,16],report_everi:[4,15],report_manag:8,report_scor:12,report_stats_from_paramet:[8,15],report_tim:[12,16],reportmgrbas:8,repres:[2,8],represent:[1,2,10,15,19],reproduc:5,requir:[0,8,10,15],research:7,reset:8,reset_optim:15,resett:15,residu:10,resourc:3,respect:[2,3],respons:8,rest:14,restrict:[13,15,16],result:[2,11,15],return_attent:12,return_hidden:10,reus:[1,10,15],reuse_copy_attn:[10,15],revers:[13,15,16],reversible_token:[13,15,16],rico:19,right:[0,2],rmsnorm:15,rnn:[8,10,15,19],rnn_dropout:10,rnn_size:[4,15],rnn_type:[10,15],rnndecoderbas:10,rnnencod:10,roblem:19,rocm5:6,rocm:6,root:[2,3],rotat:[13,15,16],rotate_ratio:[13,15,16],roundrobin:15,row:3,rsqrt:15,rst:0,run:[0,3,4,8,10,11,15,16],rush:7,sacrebleu:[4,6,7,18],sai:3,samantao:6,same:[0,3,4,10,11,15],sampl:[5,12,13,15,17],sample_with_temperatur:12,sampling_temp:12,saniti:16,save:[5,8,13,15,16,17],save_all_gpu:15,save_checkpoint_step:[4,8,15],save_config:[13,15,16],save_data:[4,13,15],save_model:[4,15],saver:8,scale:[10,12,15],schedul:[8,15],schuster:19,score:[5,10,11,16],scorer:12,scratch:6,script:[0,4,5,6],search:[0,3,5,12],second:[2,3,10,11],secur:[13,15],see:[3,10,11,12,13,15,19],seed:[4,12,13,15,16],seem:1,seemingli:15,seen:2,segment:[3,11,16],select:[10,12,15],select_index:12,self:[1,2,10,11,12,15],self_attn_typ:[10,15],send:[0,15],senellart:7,sennrich:19,sensibl:0,sent:[8,15,16],sent_numb:12,sentenc:[1,12,13,15,16,17],sentencepiec:[3,4,6,7,13,15,16,18],separ:[3,10],seper:11,seq2seq:[12,15],seq:12,seq_len:[2,10,12],seqlength:10,sequenc:[1,2,3,8,10,11,12,13,15,16,19],serial:10,serv:2,server:[5,15,17],servermodel:11,servermodelerror:11,session:6,set:[2,3,4,6,8,10,11,12,13,15,16],setup:[4,10],sever:[3,10,12],sgd:15,sh16:[10,19],shape:[0,1,10,12],shard:[8,15,16],shard_siz:[8,16],share:[1,6,13,15,16],share_decoder_embed:[4,15],share_embed:[4,15],share_vocab:[13,15],shazeer:19,shortest:12,shot:3,should:[3,4,10,12,15],shuf:17,shuffle_input_sent:17,side:[3,8,11,13,15,16],side_a:3,side_b:3,silent:[4,10,13,15],similar:[2,3,10,15],simpl:[2,8,15],simpleattentionbridgelay:5,simpli:10,simulatan:10,sin:15,sinc:10,singl:[0,11,15],single_pass:15,sinusoid:10,site:6,size:[3,8,10,12,13,15,16,17],skip:[3,13,15],skip_embed:10,skip_empty_level:[4,13,15],slen:10,slm17:[10,19],slow:[13,16],slurm:[3,6],smaller:[13,15,16],smooth:[13,15,16],softmax:[1,2,10,15,16],some:[0,1,3,8,16],someth:0,sometim:0,sort:[11,17],sorted_pair:3,sourc:[0,1,3,5,6,7,8,10,11,12,13,15],sp_path:17,space:[0,2,15],spacer:[13,15,16],span:[13,15,16],spars:10,sparseadam:15,sparsemax:[10,15],sparsesoftmax:1,specif:[1,2,3,7,12,13,15,18],specifi:[2,10,13,15,16],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm_decod:4,spm_encod:[4,17],spm_train:17,sqrt:2,squar:[2,3],src:[3,4,8,10,11,12,13,15,16,17],src_embed:15,src_feat:16,src_feats_vocab:[13,15],src_file_path:12,src_ggnn_size:15,src_group:3,src_lang:[3,16],src_languag:3,src_len:[8,10],src_length:12,src_map:[10,12],src_onmttok_kwarg:[13,15,16],src_raw:12,src_seq_length:[4,13,15,16],src_seq_length_trunc:15,src_subword_alpha:[4,13,15,16],src_subword_model:[4,13,15,16],src_subword_nbest:[4,13,15,16],src_subword_typ:[13,15,16],src_subword_vocab:[13,15,16],src_vocab:[4,12,13,15],src_vocab_s:15,src_vocab_threshold:[13,15,16],src_word_vec_s:15,src_words_min_frequ:15,sru:[5,15],srun:6,stabl:2,stack:[1,10,15,16],stage:2,stand:0,standard:[10,15,16],start:[3,5,6,8,11,15,17],start_decay_step:15,stat:[8,15],stat_list:8,state:[8,10,12,15],state_dict:15,state_dim:15,statist:[8,15],stdout:8,stdrnndecod:10,step:[2,3,5,8,10,12,15,16],stepwis:10,stepwise_penalti:[12,16],still:0,stop:[13,15,16],store:15,str:[0,8,10,11,12],strategi:[5,8,15],stride:10,string:[8,10,13,15,16],structur:[1,2,5,19],structured_attent:10,style:[0,10,13,15,16],styleguid:0,subclass:[8,10,12],subcompon:3,subdirectori:6,subsequ:2,subset:17,substitut:3,subword:[3,5],suggest:15,sum:[8,10,12,15],sum_:10,sume:8,summar:19,summari:[0,12,16],superclass:0,supervis:[3,10,15],support:[0,3,10,15],suppos:17,sure:[6,12],sutskev:19,switchout:[5,19],switchout_temperatur:[13,15,16],symmetr:3,system:[12,15,19],tab:[13,15],tabl:[10,16],take:[2,3,7,10,13,15,16],taken:10,tangent:2,tanh:[2,10],tao:19,taolei87:10,tar:17,target:[3,5,8,10,11,12,13,15],target_prefix:12,task:[3,4,5,8,12],task_distribution_strategi:15,task_queue_manag:8,tatoeba:[3,5],tau:[13,15,16],technic:7,temperatur:[3,12,13,15,16],templat:3,tend:1,tensor:[0,8,10,12],tensorboard:[8,15],tensorboard_log_dir:15,tensorflow:15,term:[2,10],test:[0,4,6,10],testset:4,text:[8,10,12,15,16,19],tgt:[3,4,8,10,11,13,15,16],tgt_dict:10,tgt_embed:15,tgt_file_path:12,tgt_group:3,tgt_lang:[3,16],tgt_languag:3,tgt_len:[8,10],tgt_onmttok_kwarg:[13,15,16],tgt_pad_mask:10,tgt_prefix:[12,16],tgt_sent:12,tgt_seq_length:[4,13,15,16],tgt_seq_length_trunc:15,tgt_subword_alpha:[4,13,15,16],tgt_subword_model:[4,13,15,16],tgt_subword_nbest:[4,13,15,16],tgt_subword_typ:[13,15,16],tgt_subword_vocab:[13,15,16],tgt_vocab:[4,8,13,15],tgt_vocab_s:15,tgt_vocab_threshold:[13,15,16],tgt_word_vec_s:15,tgt_words_min_frequ:15,than:[0,1,12,15,17],thang:19,thant:12,thei:[2,10,12],them:[3,10],theorem:10,thi:[0,2,3,4,6,7,8,10,12,13,15,16],thin:8,thing:[0,3],thoroughli:10,thread:13,three:[2,10],through:[2,3,8],thu:8,tic:0,tick:0,time:[2,3,6,8,12,15,16],timeout:11,timer:11,titl:7,tlen:10,to_cpu:11,to_gpu:11,todo:[6,10,17],tok:11,token:[4,8,10,11,12,13,15,16],token_drop:5,token_mask:5,tokendrop:[13,15,16],tokendrop_temperatur:[13,15,16],tokenizer_mark:11,tokenizer_opt:11,tokenmask:[13,15,16],tokenmask_temperatur:[13,15,16],too:12,tool:5,toolkit:7,top:[2,10,12,16],topk_id:12,topk_scor:12,torch:[0,6,8,10,15],torchtext:8,total:[3,8,15],trail:0,train:[3,5,6,7,8,10,19],train_extremely_large_corpu:17,train_from:15,train_it:8,train_loss:8,train_loss_md:8,train_step:[4,8,15],trainabl:[1,8],trainer:5,training_step:8,transform:[1,2,4,5,8,19],transformer_ff:[4,15],transformer_lm:15,transformerattentionbridgelay:5,transformerdecod:10,transformerdecoderbas:10,transformerencod:10,transformerencoderlay:2,translat:[1,3,5,7,8,10,11,14,19],translate_batch:12,translation_serv:11,translationbuild:12,translationserv:11,transpos:1,travi:0,tree:10,trg:3,triang:3,trick:[5,10],trivial:10,trunc_siz:8,truncat:[8,15],truncated_decod:15,trust:17,ttention:19,turn:[10,15],tutori:[5,18],two:[2,3,10],txt:[0,16,17],type:[0,2,3,5,8,10,11,12,13,16],typic:[8,15],u_a:10,under:[3,15,16],undergo:2,undergon:2,underli:12,uniform:15,unigram:[13,15,16],union:0,unit:[2,10],unittest:0,unk:[12,16],unknown:12,unless:3,unload:11,unload_model:11,unmodifi:12,unnecessari:[0,1,3],unnorm:10,unset:3,unsqueez:1,until:[12,16],unwieldli:3,updat:[6,8,11,12,15],update_dropout:10,update_finish:12,update_learning_r:15,update_n_src_word:8,update_vocab:15,upgrad:6,upper:3,url:[6,7,19],url_root:14,usag:[5,13,14,15,16],use:[0,2,3,4,6,8,10,11,12,13,15,16,17],use_bridg:10,use_relu:10,use_tanh:10,used:[1,2,3,4,8,10,11,12,13,15,16],useful:8,user:[6,8,10,11],uses:[0,3,10,12,15],using:[0,2,3,7,10,11,12,13,15,16],uszkoreit:19,util:[2,8],v11:4,v_a:10,valid:[4,8,13,15,16],valid_batch_s:[4,15],valid_it:8,valid_loss:8,valid_loss_md:8,valid_step:[4,8,15],valu:[2,3,8,10,11,12,13,15,16],variabl:[3,6,12],variat:0,vaswani:19,vaswanispujgkp17:0,vector:[10,15],venv:6,verbos:[12,15,16],veri:[0,16],version:[10,11,12],via:[1,10,15,19],view:1,vinyal:19,virtual:6,visit:0,visual:15,vocab:[4,5,8,10,12],vocab_path:[13,15],vocab_s:[12,15,17],vocab_sample_queue_s:13,vocab_size_multipl:15,vocabulari:[3,8,10,13,15,16,17],vsp:[10,19],w_a:10,wai:[3,12],wait:3,wang:19,want:[3,16],warmup:15,warmup_step:[4,15],warn:[13,15,16],weight:[2,3,4,10,15,16],weight_decai:15,weighted_sampl:15,weightnormconv2d:10,well:[0,15],wget:17,what:[3,5,8,11],when:[0,3,7,10,12,13,15,16,17],where:[1,2,4,6,10,12,13,15,16],wherea:[12,15],whether:[8,10,11,12,13,15,16],which:[1,3,10,12,15],whl:6,whole:[4,12],whose:16,why:2,wiki:15,wikipedia:15,window:[13,15,16],wise:2,with_align:8,within:[2,10,11],without:[0,10,15],wmt14_en_d:4,wmt:4,wmtend:4,wojciech:19,wolfgang:19,word2vec:15,word:[2,10,12,13,15,16],word_align:12,word_lut:10,word_padding_idx:10,word_vec_s:[4,10,15],word_vocab_s:10,work:[0,3,12,15],workflow:7,world_siz:[4,15],would:[3,12,15],wpdn18:[13,15,16,19],wrap:11,wrapper:8,writabl:3,write:[3,8],writer:8,written:4,wsc:[12,19],www:15,xavier_uniform:15,xent:8,xinyi:19,xiong:19,xzvf:17,yaml:[4,13,15,16],yang:19,yann:19,yarat:19,year:7,yet:[10,12],yml:0,yoav:19,yonghui:19,yoon:7,yoshua:19,you:[0,3,4,6,10,15,16,19],your:[0,3,6,16,17],your_venv_nam:6,your_vevn_nam:6,yourself:7,yuan:19,yuntian:7,zaremba:19,zero:[3,8,10,12,15,16],zero_grad:8,zhang:19,zhifeng:19,zihang:19,zxs18:[10,19]},titles:["Contributors","Questions","Attention Bridge","Config-config tool","Translation","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"class":12,The:3,actual:3,adapt:[3,15],adapter_config:3,ae_path:3,ae_transform:3,align:15,allocate_devic:3,altern:3,architectur:10,argument:14,attent:[2,10,15],autoencod:3,bank:1,beam:16,behind:1,between:1,bridg:[2,15],build:[4,13],challeng:17,citat:7,cluster_languag:3,command:3,common:[13,15,16],complete_language_pair:3,config:3,config_al:3,config_config:3,configur:[13,15,16],content:5,contributor:0,conv2conv:10,copi:10,core:[10,11],corpora:3,corpora_schedul:3,data:[4,9,13,15,16,17,18],dataset:9,dec_sharing_group:3,decod:[1,10,12,15,16],denois:[13,15,16],differ:1,direct:17,distanc:3,distance_matrix:3,docstr:0,download:[4,17],dynam:15,effici:16,embed:15,enc_sharing_group:3,encod:[1,10,15],encoder_output:1,evalu:4,featur:15,feedforwardattentionbridgelay:2,filter:[13,15,16],fix:1,framework:8,gener:[1,15],get:17,group:3,guidelin:0,inferfeat:[13,15,16],initi:15,input:3,instal:[6,7,18],intermediate_output:1,intuit:1,kei:3,languag:[3,16],length:1,level:3,linattentionbridgelay:2,line:3,loader:9,log:[15,16],loss:8,lumi:6,mahti:6,mammoth:18,manual:3,matrix:3,memori:1,model:[1,4,8,11,15,16,17],modul:10,n_gpus_per_nod:3,n_group:3,n_node:3,name:14,need:1,onmttok:[13,15,16],optim:[8,15],opu:17,other:3,overrid:3,overview:7,paramet:3,pars:17,path:17,penalti:16,perceiverattentionbridgelay:2,prepar:[4,17,18],prune:15,puhti:6,question:1,quickstart:18,random:16,rate:15,reader:9,refer:19,relev:17,remove_temporary_kei:3,reproduc:[13,15,16],run:6,sampl:16,save:1,score:12,search:16,sentencepiec:17,separ:1,server:[11,14],set:17,set_transform:3,share:3,sharing_group:3,shot:17,simpleattentionbridgelay:2,sourc:16,specifi:3,src_path:3,sru:10,stage:3,step:[4,17,18],strategi:12,structur:10,subword:[4,13,15,16],supervis:17,switchout:[13,15,16],target:16,task:15,tatoeba:17,test:17,tgt_path:3,than:3,token_drop:[13,15,16],token_mask:[13,15,16],tool:3,top:3,train:[4,15,17],trainer:8,transform:[3,10,13,15,16],transformerattentionbridgelay:2,translat:[4,12,16,17],translation_config:3,translation_config_dir:3,trick:16,type:15,usag:3,use_introduce_at_training_step:3,use_weight:3,valid:17,variabl:17,vocab:[13,15,17],vocabulari:4,what:1,why:1,yaml:3,zero:17,zero_shot:3}})
\ No newline at end of file
+Search.setIndex({docnames:["CONTRIBUTING","FAQ","attention_bridges","config_config","examples/Translation","index","install","main","onmt","onmt.inputters","onmt.modules","onmt.translate.translation_server","onmt.translation","options/build_vocab","options/server","options/train","options/translate","prepare_data","quickstart","ref"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","FAQ.md","attention_bridges.md","config_config.md","examples/Translation.md","index.rst","install.md","main.md","onmt.rst","onmt.inputters.rst","onmt.modules.rst","onmt.translate.translation_server.rst","onmt.translation.rst","options/build_vocab.rst","options/server.rst","options/train.rst","options/translate.rst","prepare_data.md","quickstart.md","ref.rst"],objects:{"onmt.Trainer":{train:[8,1,1,""],validate:[8,1,1,""]},"onmt.decoders":{CNNDecoder:[10,0,1,""],DecoderBase:[10,0,1,""],InputFeedRNNDecoder:[10,0,1,""],StdRNNDecoder:[10,0,1,""],TransformerDecoder:[10,0,1,""]},"onmt.decoders.CNNDecoder":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.decoders.DecoderBase":{from_opt:[10,1,1,""]},"onmt.decoders.TransformerDecoder":{forward:[10,1,1,""]},"onmt.decoders.decoder":{RNNDecoderBase:[10,0,1,""]},"onmt.decoders.decoder.RNNDecoderBase":{forward:[10,1,1,""],from_opt:[10,1,1,""],init_state:[10,1,1,""]},"onmt.encoders":{CNNEncoder:[10,0,1,""],EncoderBase:[10,0,1,""],MeanEncoder:[10,0,1,""],RNNEncoder:[10,0,1,""],TransformerEncoder:[10,0,1,""]},"onmt.encoders.CNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.EncoderBase":{forward:[10,1,1,""]},"onmt.encoders.MeanEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.RNNEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.encoders.TransformerEncoder":{forward:[10,1,1,""],from_opt:[10,1,1,""]},"onmt.models":{NMTModel:[8,0,1,""]},"onmt.models.NMTModel":{count_parameters:[8,1,1,""],forward:[8,1,1,""]},"onmt.models.sru":{SRU:[10,0,1,""]},"onmt.models.sru.SRU":{forward:[10,1,1,""]},"onmt.modules":{AverageAttention:[10,0,1,""],ConvMultiStepAttention:[10,0,1,""],CopyGenerator:[10,0,1,""],Embeddings:[10,0,1,""],GlobalAttention:[10,0,1,""],MultiHeadedAttention:[10,0,1,""],PositionalEncoding:[10,0,1,""],WeightNormConv2d:[10,0,1,""]},"onmt.modules.AverageAttention":{cumulative_average:[10,1,1,""],cumulative_average_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.ConvMultiStepAttention":{apply_mask:[10,1,1,""],forward:[10,1,1,""]},"onmt.modules.CopyGenerator":{forward:[10,1,1,""]},"onmt.modules.Embeddings":{emb_luts:[10,1,1,""],forward:[10,1,1,""],load_pretrained_vectors:[10,1,1,""],word_lut:[10,1,1,""]},"onmt.modules.GlobalAttention":{forward:[10,1,1,""],score:[10,1,1,""]},"onmt.modules.MultiHeadedAttention":{forward:[10,1,1,""],training:[10,2,1,""],update_dropout:[10,1,1,""]},"onmt.modules.PositionalEncoding":{forward:[10,1,1,""]},"onmt.modules.WeightNormConv2d":{forward:[10,1,1,""]},"onmt.modules.position_ffn":{PositionwiseFeedForward:[10,0,1,""]},"onmt.modules.position_ffn.PositionwiseFeedForward":{forward:[10,1,1,""]},"onmt.modules.structured_attention":{MatrixTree:[10,0,1,""]},"onmt.modules.structured_attention.MatrixTree":{forward:[10,1,1,""]},"onmt.translate":{BeamSearch:[12,0,1,""],DecodeStrategy:[12,0,1,""],GNMTGlobalScorer:[12,0,1,""],GreedySearch:[12,0,1,""],Translation:[12,0,1,""],TranslationBuilder:[12,0,1,""],Translator:[12,0,1,""]},"onmt.translate.BeamSearch":{initialize:[12,1,1,""]},"onmt.translate.DecodeStrategy":{advance:[12,1,1,""],block_ngram_repeats:[12,1,1,""],initialize:[12,1,1,""],maybe_update_forbidden_tokens:[12,1,1,""],maybe_update_target_prefix:[12,1,1,""],target_prefixing:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.GreedySearch":{advance:[12,1,1,""],initialize:[12,1,1,""],update_finished:[12,1,1,""]},"onmt.translate.Translation":{log:[12,1,1,""]},"onmt.translate.Translator":{translate_batch:[12,1,1,""]},"onmt.translate.greedy_search":{sample_with_temperature:[12,3,1,""]},"onmt.translate.penalties":{PenaltyBuilder:[12,0,1,""]},"onmt.translate.penalties.PenaltyBuilder":{coverage_none:[12,1,1,""],coverage_summary:[12,1,1,""],coverage_wu:[12,1,1,""],length_average:[12,1,1,""],length_none:[12,1,1,""],length_wu:[12,1,1,""]},"onmt.translate.translation_server":{ServerModel:[11,0,1,""],ServerModelError:[11,4,1,""],Timer:[11,0,1,""],TranslationServer:[11,0,1,""]},"onmt.translate.translation_server.ServerModel":{build_tokenizer:[11,1,1,""],detokenize:[11,1,1,""],do_timeout:[11,1,1,""],maybe_convert_align:[11,1,1,""],maybe_detokenize:[11,1,1,""],maybe_detokenize_with_align:[11,1,1,""],maybe_postprocess:[11,1,1,""],maybe_preprocess:[11,1,1,""],maybe_tokenize:[11,1,1,""],parse_opt:[11,1,1,""],postprocess:[11,1,1,""],preprocess:[11,1,1,""],rebuild_seg_packages:[11,1,1,""],to_gpu:[11,1,1,""],tokenize:[11,1,1,""],tokenizer_marker:[11,1,1,""]},"onmt.translate.translation_server.TranslationServer":{clone_model:[11,1,1,""],list_models:[11,1,1,""],load_model:[11,1,1,""],preload_model:[11,1,1,""],run:[11,1,1,""],start:[11,1,1,""],unload_model:[11,1,1,""]},"onmt.utils":{Optimizer:[8,0,1,""],Statistics:[8,0,1,""]},"onmt.utils.Optimizer":{amp:[8,1,1,""],backward:[8,1,1,""],from_opt:[8,1,1,""],learning_rate:[8,1,1,""],step:[8,1,1,""],training_step:[8,1,1,""],zero_grad:[8,1,1,""]},"onmt.utils.Statistics":{accuracy:[8,1,1,""],all_gather_stats:[8,1,1,""],all_gather_stats_list:[8,1,1,""],elapsed_time:[8,1,1,""],log_tensorboard:[8,1,1,""],output:[8,1,1,""],ppl:[8,1,1,""],update:[8,1,1,""],xent:[8,1,1,""]},"onmt.utils.loss":{LossComputeBase:[8,0,1,""]},onmt:{Trainer:[8,0,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:function","4":"py:exception"},terms:{"0473v3":19,"25g":6,"abstract":10,"boolean":[8,12],"break":17,"class":[0,5,8,10,11],"default":[11,13,14,15,16,17],"export":6,"final":[2,4,10,12],"float":[3,10,12],"function":[0,1,2,3,8,10,11,12,15],"import":0,"int":[8,10,11,12],"long":0,"new":[0,2,4],"public":6,"return":[0,8,10,11,12],"static":[8,15],"true":[3,4,8,10,12,15,16,17],"try":[0,6],"while":[3,10],And:[0,10],But:1,EOS:12,For:[0,1,3,12,15,18],IDs:12,IFS:17,LPs:3,Not:0,One:3,The:[1,2,4,8,10,11,12,15,16],Then:[0,4,10],There:[2,3],These:[2,3,10,12],Use:[3,15,16],Used:12,Will:[1,3],__init__:11,_compute_loss:8,a_j:10,aan:[10,15],aan_useffn:[10,15],ab_fixed_length:15,ab_lay:15,ab_layer_norm:15,abbrevi:0,abigail:19,abil:10,about:0,abov:[0,12],abs:[2,15,16,19],acceler:[10,19],accept:[0,3,12],access:[2,3,6],accord:3,account:[3,6],accross:8,accum:8,accum_count:[4,8,15],accum_step:[4,8,15],accumul:[8,15],accuraci:[8,12],achiev:3,achin:19,acl:[7,19],aclweb:15,act:1,action:[10,12,15],activ:[1,2,6,10,15],activation_fn:10,activationfunct:10,actual:12,adadelta:15,adafactor:15,adagrad:15,adagrad_accumulator_init:15,adam:[4,15],adam_beta1:15,adam_beta2:[4,15],adamoptim:15,adamw:15,adapt:[5,7],adapter_nam:3,add:[0,4,10],added:3,adding:0,addit:[0,10,13,15,16],addition:10,address:12,adjust:3,adopt:15,adpot:10,advanc:[12,15],advic:0,after:[0,2,10,12,15],afterward:10,again:0,agnost:1,aidan:19,alexand:7,algorithm:19,align:[5,8,10,11,12,16,19],align_debug:16,alignment_head:[10,15],alignment_lay:[10,15],aliv:12,alive_attn:12,alive_seq:12,all:[0,3,8,10,12,13,15,16,19],all_gather_stat:8,all_gather_stats_list:8,all_preprocess:11,allennlp:0,alloc:3,allow:[0,2,3,15],almost:[12,15],alon:0,along:2,alpha:[2,12,16],alphabet:3,alreadi:[13,15,16],also:[0,3,6,8,10,15],altern:10,although:10,alwai:[0,3],amp:[8,15],ani:[0,1,3,12,13,15],annurev:19,anoth:[0,2,8],antholog:15,apex:15,apex_opt_level:15,api:[0,5],api_doc:15,appear:3,append:[6,17],appli:[2,3,10,12,13,15,16],applic:16,apply_mask:10,approach:10,appropri:12,approxim:15,architectur:[2,5],arg:[0,10,11],argmax:16,argpars:11,argument:[0,5],around:10,artzi:19,arxiv:[0,2,15,16,19],ashish:19,assig:3,assign:[3,16],assing:3,assum:[10,12],att_typ:2,attend:2,attent:[0,1,5,8,12,16,19],attention_bridg:8,attention_dropout:[4,10,15],attentionbridgenorm:2,attet:10,attn:[10,12,16],attn_debug:[12,16],attn_func:10,attn_typ:10,attr:11,attribut:12,augment:19,auli:19,author:[7,10],autodoc:0,autogener:15,avail:[8,10,11,15,16],available_model:14,averag:[10,15,16,19],average_decai:[4,8,15],average_everi:[8,15],average_output:10,averageattent:10,avg:[10,16],avg_raw_prob:16,avoid:[0,3],aws:6,axi:12,back:8,backend:15,backward:8,bahdanau:[10,15,19],ban_unk_token:[12,16],bank:[5,10],barri:19,bart:[3,5],base:[0,2,3,4,6,7,8,10,11,12,13,15,16,19],base_target_emb:10,baselin:15,basemodel:8,basenam:[4,17],bash:6,batch:[2,4,8,10,12,15,16],batch_siz:[4,10,12,15,16],batch_size_multipl:[4,15],batch_typ:[4,15,16],bcb14:[10,19],beam:[5,12],beam_search:12,beam_siz:[4,12,16],beamsearch:12,beamsearchbas:12,becaus:[3,16],becom:3,been:[10,12,13,15,16],befor:[0,4,11,12,15,16],begin:[8,12],behind:5,below:0,ben:3,benefit:1,bengali:3,bengio:19,best:[12,16],beta1:15,beta2:15,beta:[12,16],better:[0,13,15,16],between:[2,5,10,13,15,16,19],beyond:8,biao:19,bib:0,bibtex:0,bibtext:0,bidir_edg:15,bidirect:[10,15],bidirectional_encod:10,bin:[6,15],binari:[4,10],bit:16,blank:0,bleu:4,blob:10,block:[12,16],block_ngram_repeat:[12,16],booktitl:7,bool:[8,10,11,12],bos:12,both:[3,12,15],both_embed:15,boundari:[13,15,16],bpe:[13,15,16],bptt:[8,15],bridg:[1,5,19],bridge_extra_nod:15,bring:1,brnn:15,browser:0,bucket_s:[4,15],buffer:8,build:[0,5,8,10,11,12,16,17],build_token:11,build_vocab:13,built:8,bytetensor:12,cach:10,calcul:[2,8,10,12],call:[10,12],callabl:12,callback:8,can:[2,3,4,6,8,11,12,13,15,16],cancel:11,candid:[3,13,15,16],cao:19,capit:0,captur:2,care:10,cat:17,categor:12,categori:12,challeng:5,chang:[0,3,8,15],channel:2,charact:[0,16],character_coverag:17,check:[0,7,18],checklist:0,checkpoint:[4,8,15],chen:19,chmod:[4,6],cho:19,choic:[0,10,13,15,16],choos:0,chosen:12,christoph:19,citat:[0,5],cite:[0,7],classmethod:[8,10],clear:0,clone:[7,11,18],clone_model:11,close:0,cls:8,cluster:[3,7,18],clutter:0,cnn:[10,15,19],cnn_kernel_width:[10,15],cnndecod:10,cnnencod:10,code:[0,3,6,16],code_dir:6,codebas:6,column:3,com:[7,10,18],combin:[10,16],comma:3,command:[4,5],comment:0,commentari:4,common:[0,5],commoncrawl:4,commun:0,complet:12,complex:[3,12],compon:[2,3],composit:15,comput:[2,3,4,8,10,15,16],concat:[10,15],concaten:15,condit:[12,15,16],conf:[14,16],config:[4,5,11,13,14,15,16],config_fil:11,configur:[3,4,5],connect:2,consid:[3,10,17],consider:15,consist:[0,10],constant:3,construct:10,constructor:[0,10],consum:15,contain:[1,3,10,11,12],content:[0,16],context:[2,10,15],context_attn:10,context_g:[10,15],contextg:10,continu:0,contribut:[0,2,10],contributor:5,control:[3,8],conv2conv:5,conv2d:10,conv:[10,15],conveni:3,convent:0,convers:12,convert:11,convex:10,convmultistepattent:10,convolut:[10,19],copi:[0,3,5,6,15,16],copy_attn:[10,12,15],copy_attn_forc:15,copy_attn_typ:[10,15],copy_loss_by_seqlength:15,copygener:10,core:[2,5,8],corpora:4,corpu:[3,4,13,15,17],corr:[0,19],correct:3,correspand:11,correspond:[2,16],could:12,count:[3,8,12,13,15,16],count_paramet:8,cov:12,cov_pen:12,coverag:[10,12,15,16],coverage_attn:[10,15],coverage_non:12,coverage_penalti:[12,16],coverage_summari:12,coverage_wu:12,cpu:[11,15,16],crai:6,crayon:15,creat:[3,6,8],creation:3,criteria:15,criterion:8,critic:[15,16],cross:[8,10,15],cross_queri:1,csc:17,csv:3,ct2_model:11,ct2_translate_batch_arg:11,ct2_translator_arg:11,ctrl:0,cuda_funct:10,cudnn:10,cumbersom:3,cumul:[10,12,16],cumulative_averag:10,cumulative_average_mask:10,cur_dir:17,cur_len:12,current:[3,8,10,12,15],curricula:3,curriculum:3,custom:[11,15],custom_opt:11,cut:[0,17],cutoff:12,d_ff:10,d_model:10,dai:19,data:[2,3,5,8,12,19],data_path:17,data_typ:[8,12,15,16],dataset:[4,5,13,15,16,17],datastructur:11,dauphin:19,david:19,dblp:0,ddress:19,deal:3,debug:[14,15,16],dec:3,dec_lay:[4,15],dec_out:10,dec_rnn_siz:15,decai:15,decay_method:[4,15],decay_step:15,decod:[2,3,5,8],decode_strategi:12,decoder_typ:[4,15],decoderbas:[8,10],decodestrategi:12,def:0,defin:[3,4,10,13,15,16],definit:10,delai:3,delet:[13,15,16],delimit:16,deng:7,deni:19,denot:2,depend:[0,1,3,6,8,10,11],deprec:[15,16],describ:[2,10,11,15],descript:0,desir:[3,4],detail:[7,13,15],determin:3,detoken:[4,11],dev:[6,17],develop:0,devic:[3,10,12,16],device_context:8,deyi:19,diagon:3,dict:[3,8,10,11,12,13,15,16],dict_kei:15,dictionari:[8,10,12,15],differ:[0,2,3,5,10,11,16],dilat:10,dim:10,dimens:[2,10,12,15],dimension:[2,10],dir:17,direct:[0,3,12],directli:[0,10,16],directori:[3,6,11,15],disabl:15,discard:[13,15,16],discourag:15,disk:15,displai:8,dist:8,distanc:[10,15],distribtut:10,distribut:[3,8,10,12,13,15,16],divers:[2,13,15,16],divid:[2,3,15,16],divis:10,do_timeout:11,doc:0,document:[0,7],doe:[1,3,16],doesn:17,doi:[7,19],doing:[3,16],don:0,done:[4,12,17],dot:[2,10,15],dotprod:15,down:[12,13],download:6,dropout:[4,8,10,13,15,16],dropout_step:[4,8,15],due:15,dump:[13,15,16],dump_beam:[12,16],dump_sampl:13,dump_transform:15,dure:[11,15,16],dynam:[5,10,16],dzmitri:19,each:[2,3,10,12,13,15,16],earli:15,earlier:[2,13,15,16],early_stop:15,early_stopping_criteria:15,earlystopp:8,eas:3,easi:0,easili:3,echo:[4,17],edg:15,effect:[2,10,11,13],effici:[5,8,19],either:[12,15],elaps:8,elapsed_tim:8,element:[2,3],els:[1,17],emb:10,emb_fil:10,emb_lut:10,embed:[2,5,10,13],embedding_s:10,embeddings_typ:15,emerg:2,emnlp:19,emploi:[2,8],empti:[4,10,12,13,15],enabl:[10,16],enc:3,enc_hidden:10,enc_lay:[4,15],enc_rnn_siz:15,encapsul:2,encod:[2,3,5,8,12],encoder_fin:10,encoder_out_combin:10,encoder_out_top:10,encoder_output:5,encoder_typ:[4,15],encoderbas:[8,10],encordec:[13,15],encount:[13,15],encout:[13,15],end:12,eng:3,english:[1,3,4,17],enhanc:[1,2],ensembl:16,ensur:2,entir:17,entri:0,entropi:8,env_dir:6,environ:6,eos:12,epoch:15,eps:10,epsilon:15,equal:[12,15],equat:10,equival:15,error:[0,13,15,16],especi:3,essenti:12,establish:2,eural:19,europarl:4,evalu:8,even:3,event:12,everi:[8,10,15,16],exactli:0,exampl:[0,1,3,4,13,15,18],exce:15,except:[0,11,13,15,16],exclusion_token:12,execut:[4,13,15],exist:[13,15,16,17],exp:15,exp_host:15,expand:1,expect:[3,12],experi:[13,15,16],experiment:15,exponenti:15,extend:[0,10],extern:0,extra:[6,10,15],extra_word:10,extract:17,facilit:2,fail:12,fairseq:0,fals:[8,10,11,12,13,14,15,16],familiar:7,fast:[10,19],faster:[10,15],feat_0:16,feat_1:16,feat_dim_expon:10,feat_merg:[10,15],feat_merge_s:15,feat_padding_idx:10,feat_vec_expon:[10,15],feat_vec_s:[10,15],feat_vocab_s:10,feats0:16,feats1:16,featur:[2,5,8,10,13,16,19],fed:2,feed:[3,10,15],feedforward:[2,15],feedforwardattentionbridgelay:5,feel:0,few:0,ffectiv:19,ffn:[10,15],figur:10,file:[0,3,11,13,15,16,17],filenam:15,filter:[4,5,17],filterfeat:[13,15,16],filternonzeronumer:[13,15,16],filterrepetit:[13,15,16],filterterminalpunct:[13,15,16],filtertoolong:[3,4,13,15,16],filterwordratio:[13,15,16],find:0,firefox:0,first:[0,3,10,12,15],five:2,fix:[0,5,12,15],flag:8,flake8:0,floattensor:[8,10,12],flow:2,fly:4,fnn:10,focu:[0,2],folder:0,follow:[0,2,3,4,16,18],foo:0,forbidden:12,forbidden_token:12,forc:[12,16],format:[0,11,13,15,16,17],former:10,forward:[3,8,10,15],fotran:3,found:17,foundat:2,fp16:[15,16],fp32:[4,8,15,16],frac:2,fraction:[13,15,16],framework:[5,15],free:[0,11],freez:[10,15],freeze_word_vec:10,freeze_word_vecs_dec:15,freeze_word_vecs_enc:15,french:1,frequenc:[13,15,16],from:[2,3,8,10,12,15,16,17],from_opt:[8,10],frozenset:12,full:[0,3,10,11,13,15,16,17],full_context_align:[10,15],fulli:[3,10],further:[13,15],fusedadam:15,gag:[10,19],gao:19,gap:19,garg:15,gate:15,gather:8,gating_output:10,gehr:19,gelu:15,gener:[0,2,3,4,5,8,10,12,16,19],generator_funct:15,german:4,get:[5,6,19],ggnn:15,git:[7,18],github:[7,10,15,18],give:[3,15,16],given:[2,3,11],global:10,global_attent:15,global_attention_funct:15,global_scor:12,globalattent:10,glove:15,gnmt:12,gnmtglobalscor:12,going:12,gold:12,gold_scor:12,gold_sent:12,gomez:19,gone:15,good:[0,15],googl:[0,12,16,19],gpu:[3,4,6,11,12,15,16],gpu_backend:15,gpu_rank:[4,15],gpu_verbose_level:[8,15],gpuid:15,grad:8,gradient:[8,15],graham:19,gram:12,grangier:19,graph:15,gre:6,greater:12,greedy_search:12,greedysearch:12,group:[10,15,16],groupwis:3,grow:12,gru:[10,15],gtx1080:16,guid:[7,10,18],guidelin:5,guillaum:7,h_j:10,h_s:10,h_t:10,had:16,haddow:19,hand:3,handl:[0,8],happen:12,has:[2,3,12,13,15,16],has_cov_pen:12,has_len_pen:12,has_tgt:12,have:[0,3,4,10,12,15,16],head:[2,4,10,15],head_count:10,help:[0,2,16],helsinki:[7,18],here:[2,12,17],hidden:[1,8,10,15],hidden_ab_s:15,hidden_dim:2,hidden_s:10,hieu:19,high:3,higher:[12,15,16],highest:16,hold:12,hook:10,hop:[1,2],host:6,how:[0,10],howev:[0,8,10],html:[0,15],http:[2,6,7,10,15,16,17,18,19],huge:15,human:[3,19],hyp_:4,hyperbol:2,hyphen:3,hypothesi:4,iclr:19,identifi:16,idl:3,ids:3,ignor:[4,10,13,15,16],ignore_when_block:[12,16],illia:19,ilya:19,imag:8,impact:15,implement:[2,8,10,15],impli:[2,10],improv:[10,12,15,19],in_channel:10,in_config:3,includ:[0,3,10,13,15,16],incorpor:15,increas:3,independ:1,index:[6,10,15],indic:[2,8,10,12,13,15,16],individu:3,inf:12,infer:12,inferfeat:5,info:[15,16],inform:[1,2,3,15,16],ingredi:12,init:[10,15],init_scal:10,init_st:[8,10],initi:[5,8,10,11,12],initial_accumulator_valu:15,inner:10,inp:12,inp_seq_len:12,inproceed:7,input:[2,5,8,10,11,12,13,15,16,17,19],input_fe:15,input_feed:10,input_format:4,input_from_dec:10,input_len:10,input_s:10,input_sentence_s:17,inputfeedrnndecod:10,inputs_len:10,inputt:12,insert:[13,15,16],insert_ratio:[13,15,16],instal:[0,4,5],instanc:[8,10,12],instanti:8,instead:[0,3,6,10,13,15,16],instruct:15,int8:16,integ:12,integr:0,interact:6,interfac:[8,10],intermedi:[1,2],intermediate_output:[2,5],intern:11,interv:15,introduc:[2,3],introduct:3,intuit:5,invalid:[13,15,16],involv:2,is_finish:12,is_on_top:10,isn:12,item:10,iter:8,its:[0,3,10],itself:3,jakob:19,jean:7,jinsong:19,job:6,joiner:[13,15,16],jointli:[10,19],jona:19,jone:19,journal:0,json:14,kaiser:19,keep:[11,12,15],keep_checkpoint:[4,15],keep_stat:15,keep_topk:12,keep_topp:12,kei:10,kera:15,kernel_s:[10,15],key_len:10,kim:7,klau:19,klein:7,krikun:19,kwarg:10,kyunghyun:19,label:15,label_smooth:[4,15],lambda:[13,15,16],lambda_align:15,lambda_coverag:15,lang:3,lang_a:3,lang_b:3,lang_pair:[3,16],languag:[1,2,5,13,15,17],language_pair:17,lapata:19,last:[3,10,15,16],latent_arrai:1,lattent_arrai:1,latter:10,layer:[1,2,10,15,16],layer_cach:10,layer_norm_modul:10,layer_type_to_cl:2,layernorm:15,layerstack:3,lead:12,learn:[2,8,10,15,19],learning_r:[4,8,15],learning_rate_decai:15,learning_rate_decay_fn:8,least:0,leav:[3,15],left:2,lei:19,len:[8,10,12],length:[3,5,8,10,12,13,15,16,17],length_averag:12,length_non:12,length_pen:12,length_penalti:[12,16],length_wu:12,less:3,let:[3,4],level:[13,15],lib:6,librari:15,like:[0,12,16],limit:16,lin:[1,2,15],linattentionbridgelay:5,line:[0,4,13,15,16],linear:[1,2],linear_warmup:15,linguist:[10,19],link:[0,2,6],list:[0,3,8,10,11,12,13,15,16],list_model:11,liter:1,literatur:15,liu:19,ll17:[10,19],llion:19,load:[6,8,10,11,15],load_model:11,load_pretrained_vector:10,loader:5,local:[0,3],localhost:15,log:[5,8,12],log_fil:[15,16],log_file_level:[15,16],log_prob:12,log_tensorboard:8,logger:12,login:6,logit:[12,16],logsumexp:12,longer:[1,16],longest:12,longtensor:[8,10,12],look:[0,7,10,16],loop:8,loss:[5,15],loss_scal:15,losscomputebas:8,love:0,lower:[1,3,15],lpm15:[10,19],lsl:[12,19],lstm:[10,15],lua:11,lukasz:19,luong:[10,15,19],lustrep1:6,lustrep2:6,lza17:[10,19],macherei:19,machin:[7,10,12,19],made:3,magic:12,mai:[3,8,11,12,13,15],main:[0,7,8,13,15,16],maintain:12,make:[0,6,8,13,15,16],make_shard_st:8,mammoth:[5,6,7],man:19,manag:8,mani:[8,12,15],manipul:8,manual:[11,12],many2on:1,map:[3,8,10],margin:10,marian:15,mark:15,marker:11,mask:[10,13,15,16],mask_length:[13,15,16],mask_or_step:10,mask_ratio:[13,15,16],massiv:[3,7],master:[10,15],master_ip:15,master_port:15,match:11,mathbb:2,mathbf:2,mathemat:2,matric:2,matrix:[2,10,15],matrixtre:10,max:[8,10,12,17],max_generator_batch:[4,15],max_grad_norm:[4,8,15],max_len:10,max_length:[12,16],max_relative_posit:[10,15],max_sent_length:16,max_sentence_length:17,max_siz:8,maxim:19,maximum:[13,15,16],maybe_convert_align:11,maybe_detoken:11,maybe_detokenize_with_align:11,maybe_postprocess:11,maybe_preprocess:11,maybe_token:11,maybe_update_forbidden_token:12,maybe_update_target_prefix:12,mean:[3,10,11,15,16],meanencod:10,mechan:[2,3,10],mem:6,memori:[5,10,11,15],memory_bank:[10,12],memory_length:10,merg:[10,15],meta:3,metadata:8,method:[8,10,15],metric:16,mi250:6,michael:19,mike:19,min_length:[12,16],minh:19,minimum:[13,15,16],mirella:19,mirror:15,mix:8,mkdir:[6,17],mlp:[10,15],mode:[3,13,15,16],model:[2,3,5,10,12,13],model_dim:10,model_dtyp:[4,8,15],model_id:11,model_kwarg:11,model_prefix:17,model_root:11,model_sav:8,model_step:4,model_task:15,model_typ:15,modelsaverbas:8,modif:8,modifi:[0,12],modul:[0,1,2,5,6,8,15,16],modular:7,mohammad:19,monolingu:3,more:[0,1,3,12,13,15,16],most:[12,16],mostli:8,move:[11,15],moving_averag:[8,15],much:15,multi:[0,1,2,10],multiheadedattent:[2,10],multilingu:[3,7],multipl:[0,2,3,8,10,15,16],multipli:2,multplic:0,must:[3,10,11,15],mymodul:6,n_batch:8,n_best:[11,12,16],n_bucket:15,n_correct:8,n_edge_typ:15,n_node:15,n_sampl:[4,13,15],n_seg:11,n_src_word:8,n_step:15,n_word:8,name:[0,3,5,12,13,15,17],namespac:11,napoleon:0,nccl:15,necessari:[0,4,6,8,12,15,16],necessit:3,need:[0,3,4,8,10,15,19],neg:[11,15],network:[10,19],neubig:19,neural:[7,10,12,19],neuro:19,never:12,news_commentari:4,next:[3,8,12,16],nfeat:10,ngram:[12,16],nightmar:3,niki:19,nlp:[7,18],nmt:[8,12,15,16],nmtmodel:[8,10],noam:[4,15,19],noamwd:15,node:[3,6,8,15],node_rank:15,nois:3,non:[10,12,15],none:[1,8,10,11,12,13,15,16],nonetyp:[10,12],nonzero_threshold:[13,15,16],norm:[10,15],norm_method:8,normal:[2,4,8,15],normalz:8,norouzi:19,note:[0,3,4,6,12],noth:[0,8],notset:[15,16],ntask:6,nucleu:16,num_lay:10,num_step:8,num_thread:13,number:[2,3,8,10,12,13,15,16],numer:[13,15,16],nvidia:15,obj:[0,8],object:[0,8,11,12,13,15,17],oder:3,off:15,ofi:6,often:[13,15,16],on_timemout:11,on_timeout:11,onc:[12,15],one2mani:1,one:[0,2,3,8,10,13,15,16],onli:[3,8,12,13,15,16],onmt:[0,8,10,11,12,15,17],onmt_build_vocab:4,onmt_token:[13,15,16],onmt_transl:4,onmttok:5,open:7,opennmt:[0,3,6,7,8,14],oper:[2,10],operatornam:2,opt:[4,8,10,11,15,16],opt_level:15,optim:[4,5],option:[0,3,4,6,8,10,11,12,13,15,16,17],opu:5,opus100:[17,18],ord:19,order:[3,15],org:[2,6,7,15,16,19],origin:[2,15,17],oriol:19,other:[2,6,8,12,13,15,16,17,19],other_lang:17,otherwis:[3,10,15,16],our:[6,12],our_stat:8,out:[2,3,7,8,18],out_channel:10,out_config:3,out_fil:12,outcom:2,output:[1,2,3,4,8,10,11,12,13,15,16],output_model:16,output_s:10,over:[0,1,3,4,8,10,12,15,16,17],overal:[1,2],overrid:[10,12,13,15],overridden:10,overview:5,overwrit:[6,13,15],own:[8,16],ownership:8,p17:7,p18:15,packag:[6,11],pad:[8,10,12],pad_idx:10,pair:[3,8,11,15,16,17],paper:[0,2,10,15],parallel:[10,12,13,15],parallel_path:12,parallelcorpu:12,param:8,param_init:[4,15],param_init_glorot:[4,15],paramet:[1,4,8,10,11,12,13,15,16],parameter:10,parenthes:0,parmar:19,pars:[10,11],parse_opt:11,part:[1,2,12],partial:1,particular:[0,3,10],partit:6,pass:[2,3,8,10,11,15],past:[0,15],path:[3,6,10,11,12,13,15,16],path_src:4,path_tgt:4,patienc:8,pattern:[3,13,15,16],pdf:15,pen:12,penalti:[5,12,13,15],penaltybuild:12,peopl:6,per:[0,3,13,15,16],perceiv:[2,15],perceiverattentionbridgelay:[1,5],percentag:[13,15,16],perfom:15,perform:[2,10,15],permut:[13,15,16],permute_sent_ratio:[13,15,16],perplex:8,peter:19,pfs:6,pham:19,phrase_t:[12,16],piec:4,pip3:[6,7,18],pip:[0,6],pipelin:[13,15,16],pleas:[0,7],plu:15,point:19,pointer:[10,19],poisson:[13,15,16],poisson_lambda:[13,15,16],polosukhin:19,polyak_decai:10,pool:[1,10,15],port:[14,15],portal:7,pos_ffn_activation_fn:[10,15],posit:[10,15],position_encod:[10,15],position_ffn:10,positionalencod:10,positionwisefeedforward:[10,15],possibl:[3,8,10,11,12,13,15,16],postprocess:11,postprocess_opt:11,potenti:12,pouta:17,ppl:8,pproach:19,pre:[8,11,12],pre_word_vecs_dec:15,pre_word_vecs_enc:15,preced:3,precis:8,pred:16,pred_scor:12,pred_sent:12,predict:[8,12,16],prefer:0,prefix:[3,8,13,15,16],prefix_seq_len:12,preliminari:4,preload:11,preload_model:11,prepar:[5,12],prepare_wmt_data:4,preprint:19,preprocess:11,preprocess_opt:11,presenc:3,presum:12,pretrain:[10,15],prevent:[12,16],previou:[2,3,10,12],previous:2,primari:3,prime:2,print:[8,15,16],prior:4,prior_token:[13,15,16],prob:12,proba:16,probabl:[10,12,13,15,16],probil:10,problem:12,proc:[7,19],procedur:3,process:[2,8,11,13,15],processu:11,produc:[1,2,12,13,15,16],product:2,projappl:6,project:[0,1,2,6,7,10],project_2005099:6,project_462000125:6,propag:8,proper:11,properli:6,properti:[8,10],proport:[3,13,15,16],provid:[7,16],prune:5,pty:6,pull_request_chk:0,punct_threshold:[13,15,16],punctuat:[0,13,15,16],push:1,put:12,pwd:17,pyonmttok:[13,15,16],python3:[3,6],python:[0,3,6,15],pythonpath:6,pythonuserbas:6,pytorch:[0,6,10],qin:19,quantiz:16,queri:10,query_len:10,question:5,queue:[13,15],queue_siz:[4,15],quickstart:[5,7],quoc:19,quot:0,rais:[13,15],random:[5,13,15],random_ratio:[13,15,16],random_sampling_temp:[12,16],random_sampling_topk:[12,16],random_sampling_topp:[12,16],randomli:12,rang:16,rank:[12,15],ranslat:19,rare:12,rate:[5,8],rather:0,ratio:[12,13,15,16],raw:[10,12,16],rccl:6,reach:12,read:[0,3,11,17],readabl:[0,3],reader:5,readm:15,rebuild:11,rebuild_seg_packag:11,receiv:3,recent:15,recip:10,recommend:15,recommonmark:0,rectifi:2,recurr:10,redund:3,ref:0,refer:[0,2,5],regardless:3,regist:10,regular:[13,15,16],rel:[10,15],relat:[4,13,15,16],relationship:2,relev:[10,12],relu:[2,10,15],rememb:0,remov:[3,13,15,16],renorm:15,reorder:12,rep_max_len:[13,15,16],rep_min_len:[13,15,16],rep_threshold:[13,15,16],repeat:[12,13,15,16],repetit:16,replac:[1,12,13,15,16],replace_length:[13,15,16],replace_unk:[12,16],replic:10,report:[7,8,15,16],report_align:[12,16],report_everi:[4,15],report_manag:8,report_scor:12,report_stats_from_paramet:[8,15],report_tim:[12,16],reportmgrbas:8,repres:[2,8],represent:[1,2,10,15,19],reproduc:5,requir:[0,8,10,15],research:7,reset:8,reset_optim:15,resett:15,residu:10,resourc:3,respect:[2,3],respons:8,rest:14,restrict:[13,15,16],result:[2,11,15],return_attent:12,return_hidden:10,reus:[1,10,15],reuse_copy_attn:[10,15],revers:[13,15,16],reversible_token:[13,15,16],rico:19,right:[0,2],rmsnorm:15,rnn:[8,10,15,19],rnn_dropout:10,rnn_size:[4,15],rnn_type:[10,15],rnndecoderbas:10,rnnencod:10,roblem:19,rocm5:6,rocm:6,root:[2,3],rotat:[13,15,16],rotate_ratio:[13,15,16],roundrobin:15,row:3,rsqrt:15,rst:0,run:[0,3,4,8,10,11,15,16],rush:7,sacrebleu:[4,6,7,18],sai:3,samantao:6,same:[0,3,4,10,11,15],sampl:[5,12,13,15,17],sample_with_temperatur:12,sampling_temp:12,saniti:16,save:[5,8,13,15,16,17],save_all_gpu:15,save_checkpoint_step:[4,8,15],save_config:[13,15,16],save_data:[4,13,15],save_model:[4,15],saver:8,scale:[10,12,15],schedul:[8,15],schuster:19,score:[5,10,11,13,15,16],scorer:12,scratch:6,script:[0,4,5,6],search:[0,3,5,12],second:[2,3,10,11],secur:[13,15],see:[3,10,11,12,13,15,19],seed:[4,12,13,15,16],seem:1,seemingli:15,seen:2,segment:[3,11,13,15,16],select:[10,12,15],select_index:12,self:[1,2,10,11,12,15],self_attn_typ:[10,15],send:[0,15],senellart:7,sennrich:19,sensibl:0,sent:[8,15,16],sent_numb:12,sentenc:[1,12,13,15,16,17],sentencepiec:[3,4,6,7,13,15,16,18],separ:[3,10],seper:11,seq2seq:[12,15],seq:12,seq_len:[2,10,12],seqlength:10,sequenc:[1,2,3,8,10,11,12,13,15,16,19],serial:10,serv:2,server:[5,15,17],servermodel:11,servermodelerror:11,session:6,set:[2,3,4,6,8,10,11,12,13,15,16],setup:[4,10],sever:[3,10,12],sgd:15,sh16:[10,19],shape:[0,1,10,12],shard:[8,15,16],shard_siz:[8,16],share:[1,6,13,15,16],share_decoder_embed:[4,15],share_embed:[4,15],share_vocab:[13,15],shazeer:19,shortest:12,shot:3,should:[3,4,10,12,15],shuf:17,shuffle_input_sent:17,side:[3,8,11,13,15,16],side_a:3,side_b:3,sign:[13,15,16],silent:[4,10,13,15],similar:[2,3,10,15],simpl:[2,8,15],simpleattentionbridgelay:5,simpli:10,simulatan:10,sin:15,sinc:10,singl:[0,11,15],single_pass:15,sinusoid:10,site:6,size:[3,8,10,12,13,15,16,17],skip:[3,13,15],skip_embed:10,skip_empty_level:[4,13,15],slen:10,slm17:[10,19],slow:[13,16],slurm:[3,6],smaller:[13,15,16],smooth:[13,15,16],softmax:[1,2,10,15,16],some:[0,1,3,8,16],someth:0,sometim:0,sort:[11,17],sorted_pair:3,sourc:[0,1,3,5,6,7,8,10,11,12,13,15],sp_path:17,space:[0,2,15],spacer:[13,15,16],span:[13,15,16],spars:10,sparseadam:15,sparsemax:[10,15],sparsesoftmax:1,specif:[1,2,3,7,12,13,15,18],specifi:[2,10,13,15,16],sphinx:0,sphinx_rtd_them:0,sphinxcontrib:0,spill:0,spm_decod:4,spm_encod:[4,17],spm_train:17,sqrt:2,squar:[2,3],src:[3,4,8,10,11,12,13,15,16,17],src_embed:15,src_feat:16,src_feats_vocab:[13,15],src_file_path:12,src_ggnn_size:15,src_group:3,src_lang:[3,16],src_languag:3,src_len:[8,10],src_length:12,src_map:[10,12],src_onmttok_kwarg:[13,15,16],src_raw:12,src_seq_length:[4,13,15,16],src_seq_length_trunc:15,src_subword_alpha:[4,13,15,16],src_subword_model:[4,13,15,16],src_subword_nbest:[4,13,15,16],src_subword_typ:[13,15,16],src_subword_vocab:[13,15,16],src_vocab:[4,12,13,15],src_vocab_s:15,src_vocab_threshold:[13,15,16],src_word_vec_s:15,src_words_min_frequ:15,sru:[5,15],srun:6,stabl:2,stack:[1,10,15,16],stage:2,stand:0,standard:[10,15,16],start:[3,5,6,8,11,15,17],start_decay_step:15,stat:[8,15],stat_list:8,state:[8,10,12,15],state_dict:15,state_dim:15,statist:[8,15],stdout:8,stdrnndecod:10,step:[2,3,5,8,10,12,15,16],stepwis:10,stepwise_penalti:[12,16],still:0,stop:[13,15,16],store:15,str:[0,8,10,11,12],strategi:[5,8,15],stride:10,string:[8,10,13,15,16],structur:[1,2,5,19],structured_attent:10,style:[0,10,15],styleguid:0,subclass:[8,10,12],subcompon:3,subdirectori:6,subsequ:2,subset:17,substitut:3,substr:[13,15,16],subword:[3,5],suggest:15,sum:[8,10,12,15],sum_:10,sume:8,summar:19,summari:[0,12,16],superclass:0,supervis:[3,10,15],support:[0,3,10,15],suppos:17,sure:[6,12],sutskev:19,switchout:[5,19],switchout_temperatur:[13,15,16],symmetr:3,system:[12,15,19],tab:[13,15],tabl:[10,16],take:[2,3,7,10,13,15,16],taken:10,tangent:2,tanh:[2,10],tao:19,taolei87:10,tar:17,target:[3,5,8,10,11,12,13,15],target_prefix:12,task:[3,4,5,8,12],task_distribution_strategi:15,task_queue_manag:8,tatoeba:[3,5],tau:[13,15,16],technic:7,temperatur:[3,12,13,15,16],templat:3,tend:1,tensor:[0,8,10,12],tensorboard:[8,15],tensorboard_log_dir:15,tensorflow:15,term:[2,10],termin:[13,15,16],test:[0,4,6,10],testset:4,text:[8,10,12,15,16,19],tgt:[3,4,8,10,11,13,15,16],tgt_dict:10,tgt_embed:15,tgt_file_path:12,tgt_group:3,tgt_lang:[3,16],tgt_languag:3,tgt_len:[8,10],tgt_onmttok_kwarg:[13,15,16],tgt_pad_mask:10,tgt_prefix:[12,16],tgt_sent:12,tgt_seq_length:[4,13,15,16],tgt_seq_length_trunc:15,tgt_subword_alpha:[4,13,15,16],tgt_subword_model:[4,13,15,16],tgt_subword_nbest:[4,13,15,16],tgt_subword_typ:[13,15,16],tgt_subword_vocab:[13,15,16],tgt_vocab:[4,8,13,15],tgt_vocab_s:15,tgt_vocab_threshold:[13,15,16],tgt_word_vec_s:15,tgt_words_min_frequ:15,than:[0,1,12,15,17],thang:19,thant:12,thei:[2,10,12],them:[3,10],theorem:10,thi:[0,2,3,4,6,7,8,10,12,13,15,16],thin:8,thing:[0,3],thoroughli:10,thread:13,three:[2,10],threshold:[13,15,16],through:[2,3,8],thu:8,tic:0,tick:0,time:[2,3,6,8,12,13,15,16],timeout:11,timer:11,titl:7,tlen:10,to_cpu:11,to_gpu:11,todo:[6,10,17],tok:11,token:[4,8,10,11,12,13,15,16],token_drop:5,token_mask:5,tokendrop:[13,15,16],tokendrop_temperatur:[13,15,16],tokenizer_mark:11,tokenizer_opt:11,tokenmask:[13,15,16],tokenmask_temperatur:[13,15,16],too:12,tool:5,toolkit:7,top:[2,10,12,16],topk_id:12,topk_scor:12,torch:[0,6,8,10,15],torchtext:8,total:[3,8,15],trail:0,train:[3,5,6,7,8,10,19],train_extremely_large_corpu:17,train_from:15,train_it:8,train_loss:8,train_loss_md:8,train_step:[4,8,15],trainabl:[1,8],trainer:5,training_step:8,transform:[1,2,4,5,8,19],transformer_ff:[4,15],transformer_lm:15,transformerattentionbridgelay:5,transformerdecod:10,transformerdecoderbas:10,transformerencod:10,transformerencoderlay:2,translat:[1,3,5,7,8,10,11,14,19],translate_batch:12,translation_serv:11,translationbuild:12,translationserv:11,transpos:1,travi:0,tree:10,trg:3,triang:3,trick:[5,10],trivial:10,trunc_siz:8,truncat:[8,15],truncated_decod:15,trust:17,ttention:19,turn:[10,15],tutori:[5,18],two:[2,3,10],txt:[0,16,17],type:[0,2,3,5,8,10,11,12,13,16],typic:[8,15],u_a:10,under:[3,15,16],undergo:2,undergon:2,underli:12,uniform:15,unigram:[13,15,16],union:0,unit:[2,10],unittest:0,unk:[12,16],unknown:12,unless:3,unload:11,unload_model:11,unmodifi:12,unnecessari:[0,1,3],unnorm:10,unset:3,unsqueez:1,until:[12,16],unwieldli:3,updat:[6,8,11,12,15],update_dropout:10,update_finish:12,update_learning_r:15,update_n_src_word:8,update_vocab:15,upgrad:6,upper:3,url:[6,7,19],url_root:14,usag:[5,13,14,15,16],use:[0,2,3,4,6,8,10,11,12,13,15,16,17],use_bridg:10,use_relu:10,use_tanh:10,used:[1,2,3,4,8,10,11,12,13,15,16],useful:8,user:[6,8,10,11],uses:[0,3,10,12,15],using:[0,2,3,7,10,11,12,13,15,16],uszkoreit:19,util:[2,8],v11:4,v_a:10,valid:[4,8,13,15,16],valid_batch_s:[4,15],valid_it:8,valid_loss:8,valid_loss_md:8,valid_step:[4,8,15],valu:[2,3,8,10,11,12,13,15,16],variabl:[3,6,12],variat:0,vaswani:19,vaswanispujgkp17:0,vector:[10,15],venv:6,verbos:[12,15,16],veri:[0,16],version:[10,11,12],via:[1,10,15,19],view:1,vinyal:19,virtual:6,visit:0,visual:15,vocab:[4,5,8,10,12],vocab_path:[13,15],vocab_s:[12,15,17],vocab_sample_queue_s:13,vocab_size_multipl:15,vocabulari:[3,8,10,13,15,16,17],vsp:[10,19],w_a:10,wai:[3,12],wait:3,wang:19,want:[3,16],warmup:15,warmup_step:[4,15],warn:[13,15,16],weight:[2,3,4,10,15,16],weight_decai:15,weighted_sampl:15,weightnormconv2d:10,well:[0,15],wget:17,what:[3,5,8,11],when:[0,3,7,10,12,13,15,16,17],where:[1,2,4,6,10,12,13,15,16],wherea:[12,15],whether:[8,10,11,12,13,15,16],which:[1,3,10,12,15],whl:6,whole:[4,12],whose:16,why:2,wiki:15,wikipedia:15,window:[13,15,16],wise:2,with_align:8,within:[2,10,11],without:[0,10,15],wmt14_en_d:4,wmt:4,wmtend:4,wojciech:19,wolfgang:19,word2vec:15,word:[2,10,12,13,15,16],word_align:12,word_lut:10,word_padding_idx:10,word_ratio_threshold:[13,15,16],word_vec_s:[4,10,15],word_vocab_s:10,work:[0,3,12,15],workflow:7,world_siz:[4,15],would:[3,12,15],wpdn18:[13,15,16,19],wrap:11,wrapper:8,writabl:3,write:[3,8],writer:8,written:4,wsc:[12,19],www:15,xavier_uniform:15,xent:8,xinyi:19,xiong:19,xzvf:17,yaml:[4,13,15,16],yang:19,yann:19,yarat:19,year:7,yet:[10,12],yml:0,yoav:19,yonghui:19,yoon:7,yoshua:19,you:[0,3,4,6,10,15,16,19],your:[0,3,6,16,17],your_venv_nam:6,your_vevn_nam:6,yourself:7,yuan:19,yuntian:7,zaremba:19,zero:[3,8,10,12,13,15,16],zero_grad:8,zhang:19,zhifeng:19,zihang:19,zxs18:[10,19]},titles:["Contributors","Questions","Attention Bridge","Config-config tool","Translation","Contents","Installation","Overview","Framework","Data Loaders","Modules","Server","Translation","Build Vocab","Server","Train","Translate","Prepare Data","Quickstart","References"],titleterms:{"class":12,The:3,actual:3,adapt:[3,15],adapter_config:3,ae_path:3,ae_transform:3,align:15,allocate_devic:3,altern:3,architectur:10,argument:14,attent:[2,10,15],autoencod:3,bank:1,bart:[13,15,16],beam:16,behind:1,between:1,bridg:[2,15],build:[4,13],challeng:17,citat:7,cluster_languag:3,command:3,common:[13,15,16],complete_language_pair:3,config:3,config_al:3,config_config:3,configur:[13,15,16],content:5,contributor:0,conv2conv:10,copi:10,core:[10,11],corpora:3,corpora_schedul:3,data:[4,9,13,15,16,17,18],dataset:9,dec_sharing_group:3,decod:[1,10,12,15,16],differ:1,direct:17,distanc:3,distance_matrix:3,docstr:0,download:[4,17],dynam:15,effici:16,embed:15,enc_sharing_group:3,encod:[1,10,15],encoder_output:1,evalu:4,featur:15,feedforwardattentionbridgelay:2,filter:[13,15,16],fix:1,framework:8,gener:[1,15],get:17,group:3,guidelin:0,inferfeat:[13,15,16],initi:15,input:3,instal:[6,7,18],intermediate_output:1,intuit:1,kei:3,languag:[3,16],length:1,level:3,linattentionbridgelay:2,line:3,loader:9,log:[15,16],loss:8,lumi:6,mahti:6,mammoth:18,manual:3,matrix:3,memori:1,model:[1,4,8,11,15,16,17],modul:10,n_gpus_per_nod:3,n_group:3,n_node:3,name:14,need:1,onmttok:[13,15,16],optim:[8,15],opu:17,other:3,overrid:3,overview:7,paramet:3,pars:17,path:17,penalti:16,perceiverattentionbridgelay:2,prepar:[4,17,18],prune:15,puhti:6,question:1,quickstart:18,random:16,rate:15,reader:9,refer:19,relev:17,remove_temporary_kei:3,reproduc:[13,15,16],run:6,sampl:16,save:1,score:12,search:16,sentencepiec:17,separ:1,server:[11,14],set:17,set_transform:3,share:3,sharing_group:3,shot:17,simpleattentionbridgelay:2,sourc:16,specifi:3,src_path:3,sru:10,stage:3,step:[4,17,18],strategi:12,structur:10,subword:[4,13,15,16],supervis:17,switchout:[13,15,16],target:16,task:15,tatoeba:17,test:17,tgt_path:3,than:3,token_drop:[13,15,16],token_mask:[13,15,16],tool:3,top:3,train:[4,15,17],trainer:8,transform:[3,10,13,15,16],transformerattentionbridgelay:2,translat:[4,12,16,17],translation_config:3,translation_config_dir:3,trick:16,type:15,usag:3,use_introduce_at_training_step:3,use_weight:3,valid:17,variabl:17,vocab:[13,15,17],vocabulari:4,what:1,why:1,yaml:3,zero:17,zero_shot:3}})
\ No newline at end of file