From 14b9fed291bdfaa38313646374051449714cf1b4 Mon Sep 17 00:00:00 2001 From: Mickus Timothee Date: Mon, 25 Sep 2023 17:03:46 +0300 Subject: [PATCH] normalize or die --- mammoth/bin/average_models.py | 12 +- mammoth/bin/build_vocab.py | 4 +- mammoth/bin/release_model.py | 14 +- mammoth/bin/server.py | 4 +- mammoth/bin/train.py | 146 +++++++-------- mammoth/bin/translate.py | 54 +++--- mammoth/distributed/communication.py | 26 +-- mammoth/distributed/contexts.py | 18 +- mammoth/distributed/tasks.py | 60 +++---- mammoth/inputters/dataloader.py | 4 +- mammoth/inputters/dataset.py | 6 +- mammoth/model_builder.py | 118 ++++++------- mammoth/models/model_saver.py | 8 +- mammoth/modules/attention_bridge.py | 74 ++++---- mammoth/modules/decoder.py | 2 +- mammoth/modules/decoder_ensemble.py | 8 +- mammoth/modules/embeddings.py | 46 ++--- mammoth/modules/encoder.py | 2 +- mammoth/modules/layer_stack_decoder.py | 38 ++-- mammoth/modules/layer_stack_encoder.py | 26 +-- mammoth/modules/mean_encoder.py | 4 +- mammoth/modules/transformer_decoder.py | 32 ++-- mammoth/modules/transformer_encoder.py | 20 +-- mammoth/opts.py | 10 +- mammoth/tests/test_data_prepare.py | 28 +-- mammoth/tests/test_models.py | 54 +++--- .../tests/test_task_distribution_strategy.py | 16 +- mammoth/tests/test_task_queue_manager.py | 54 +++--- mammoth/tests/test_transform.py | 72 ++++---- mammoth/tests/test_translation_server.py | 40 ++--- mammoth/train_single.py | 61 +++---- mammoth/trainer.py | 42 ++--- mammoth/transforms/misc.py | 2 +- mammoth/translate/beam_search.py | 4 +- mammoth/translate/translation_server.py | 120 ++++++------- mammoth/translate/translator.py | 78 ++++---- mammoth/utils/earlystopping.py | 6 +- mammoth/utils/loss.py | 44 ++--- mammoth/utils/misc.py | 4 +- mammoth/utils/module_splitter.py | 2 +- mammoth/utils/optimizers.py | 106 +++++------ mammoth/utils/parse.py | 166 +++++++++--------- mammoth/utils/report_manager.py | 12 +- mammoth/utils/rnn_factory.py | 17 -- test_communication/test.py | 136 +++++++------- tools/embeddings_to_torch.py | 32 ++-- tools/extract_embeddings.py | 20 +-- tools/extract_vocabulary.py | 18 +- 48 files changed, 929 insertions(+), 941 deletions(-) delete mode 100644 mammoth/utils/rnn_factory.py diff --git a/mammoth/bin/average_models.py b/mammoth/bin/average_models.py index d9c09875..417b2c6c 100755 --- a/mammoth/bin/average_models.py +++ b/mammoth/bin/average_models.py @@ -5,7 +5,7 @@ def average_models(model_files, fp32=False): vocab = None - opt = None + opts = None avg_model = None avg_generator = None @@ -21,7 +21,7 @@ def average_models(model_files, fp32=False): generator_weights[k] = v.float() if i == 0: - vocab, opt = m['vocab'], m['opt'] + vocab, opts = m['vocab'], m['opts'] avg_model = model_weights avg_generator = generator_weights else: @@ -31,7 +31,7 @@ def average_models(model_files, fp32=False): for (k, v) in avg_generator.items(): avg_generator[k].mul_(i).add_(generator_weights[k]).div_(i + 1) - final = {"vocab": vocab, "opt": opt, "optim": None, "generator": avg_generator, "model": avg_model} + final = {"vocab": vocab, "opts": opts, "optim": None, "generator": avg_generator, "model": avg_model} return final @@ -40,10 +40,10 @@ def main(): parser.add_argument("-models", "-m", nargs="+", required=True, help="List of models") parser.add_argument("-output", "-o", required=True, help="Output file") parser.add_argument("-fp32", "-f", action="store_true", help="Cast params to float32") - opt = parser.parse_args() + opts = parser.parse_args() - final = average_models(opt.models, opt.fp32) - torch.save(final, opt.output) + final = average_models(opts.models, opts.fp32) + torch.save(final, opts.output) if __name__ == "__main__": diff --git a/mammoth/bin/build_vocab.py b/mammoth/bin/build_vocab.py index 0be0ecda..37997616 100644 --- a/mammoth/bin/build_vocab.py +++ b/mammoth/bin/build_vocab.py @@ -32,8 +32,8 @@ def build_vocab_main(opts): src_counters_by_lang = defaultdict(Counter) tgt_counters_by_lang = defaultdict(Counter) - for corpus_id in opts.data: - lang_pair = opts.data[corpus_id]['src_tgt'] + for corpus_id in opts.tasks: + lang_pair = opts.tasks[corpus_id]['src_tgt'] src_lang, tgt_lang = lang_pair.split('-') task = TaskSpecs( node_rank=None, diff --git a/mammoth/bin/release_model.py b/mammoth/bin/release_model.py index 7adcf93c..354341da 100755 --- a/mammoth/bin/release_model.py +++ b/mammoth/bin/release_model.py @@ -17,19 +17,19 @@ def main(): default=None, help="Quantization type for CT2 model.", ) - opt = parser.parse_args() + opts = parser.parse_args() - model = torch.load(opt.model, map_location=torch.device("cpu")) - if opt.format == "pytorch": + model = torch.load(opts.model, map_location=torch.device("cpu")) + if opts.format == "pytorch": model["optim"] = None - torch.save(model, opt.output) - elif opt.format == "ctranslate2": + torch.save(model, opts.output) + elif opts.format == "ctranslate2": import ctranslate2 if not hasattr(ctranslate2, "__version__"): raise RuntimeError("onmt_release_model script requires ctranslate2 >= 2.0.0") - converter = ctranslate2.converters.OpenNMTPyConverter(opt.model) - converter.convert(opt.output, force=True, quantization=opt.quantization) + converter = ctranslate2.converters.OpenNMTPyConverter(opts.model) + converter.convert(opts.output, force=True, quantization=opts.quantization) if __name__ == "__main__": diff --git a/mammoth/bin/server.py b/mammoth/bin/server.py index a2e52e99..7dae4712 100755 --- a/mammoth/bin/server.py +++ b/mammoth/bin/server.py @@ -50,9 +50,9 @@ def clone_model(model_id): timeout = data['timeout'] del data['timeout'] - opt = data.get('opt', None) + opts = data.get('opts', None) try: - model_id, load_time = translation_server.clone_model(model_id, opt, timeout) + model_id, load_time = translation_server.clone_model(model_id, opts, timeout) except ServerModelError as e: out['status'] = STATUS_ERROR out['error'] = str(e) diff --git a/mammoth/bin/train.py b/mammoth/bin/train.py index 8779edcf..4c1d9072 100644 --- a/mammoth/bin/train.py +++ b/mammoth/bin/train.py @@ -32,60 +32,60 @@ torch.multiprocessing.set_sharing_strategy('file_system') -# def prepare_fields_transforms(opt): +# def prepare_fields_transforms(opts): # """Prepare or dump fields & transforms before training.""" -# transforms_cls = get_transforms_cls(opt._all_transform) -# specials = get_specials(opt, transforms_cls) +# transforms_cls = get_transforms_cls(opts._all_transform) +# specials = get_specials(opts, transforms_cls) # -# fields = build_dynamic_fields(opt, src_specials=specials['src'], tgt_specials=specials['tgt']) +# fields = build_dynamic_fields(opts, src_specials=specials['src'], tgt_specials=specials['tgt']) # # # maybe prepare pretrained embeddings, if any -# prepare_pretrained_embeddings(opt, fields) +# prepare_pretrained_embeddings(opts, fields) # -# if opt.dump_fields: -# save_fields(fields, opt.save_data, overwrite=opt.overwrite) -# if opt.dump_transforms or opt.n_sample != 0: -# transforms = make_transforms(opt, transforms_cls, fields) -# if opt.dump_transforms: -# save_transforms(transforms, opt.save_data, overwrite=opt.overwrite) -# if opt.n_sample != 0: +# if opts.dump_fields: +# save_fields(fields, opts.save_data, overwrite=opts.overwrite) +# if opts.dump_transforms or opts.n_sample != 0: +# transforms = make_transforms(opts, transforms_cls, fields) +# if opts.dump_transforms: +# save_transforms(transforms, opts.save_data, overwrite=opts.overwrite) +# if opts.n_sample != 0: # logger.warning( -# f"`-n_sample` != 0: Training will not be started. Stop after saving {opt.n_sample} samples/corpus." +# f"`-n_sample` != 0: Training will not be started. Stop after saving {opts.n_sample} samples/corpus." # ) -# save_transformed_sample(opt, transforms, n_sample=opt.n_sample) +# save_transformed_sample(opts, transforms, n_sample=opts.n_sample) # logger.info("Sample saved, please check it before restart training.") # sys.exit() # return fields, transforms_cls # TODO: reimplement save_transformed_sample -def _init_train(opt): +def _init_train(opts): """Common initilization stuff for all training process.""" - ArgumentParser.validate_prepare_opts(opt) + ArgumentParser.validate_prepare_opts(opts) - if opt.train_from: + if opts.train_from: # Load checkpoint if we resume from a previous training. - checkpoint = load_checkpoint(ckpt_path=opt.train_from) - # fields = load_fields(opt.save_data, checkpoint) - transforms_cls = get_transforms_cls(opt._all_transform) + checkpoint = load_checkpoint(ckpt_path=opts.train_from) + # fields = load_fields(opts.save_data, checkpoint) + transforms_cls = get_transforms_cls(opts._all_transform) if ( - hasattr(checkpoint["opt"], '_all_transform') - and len(opt._all_transform.symmetric_difference(checkpoint["opt"]._all_transform)) != 0 + hasattr(checkpoint["opts"], '_all_transform') + and len(opts._all_transform.symmetric_difference(checkpoint["opts"]._all_transform)) != 0 ): _msg = "configured transforms is different from checkpoint:" - new_transf = opt._all_transform.difference(checkpoint["opt"]._all_transform) - old_transf = checkpoint["opt"]._all_transform.difference(opt._all_transform) + new_transf = opts._all_transform.difference(checkpoint["opts"]._all_transform) + old_transf = checkpoint["opts"]._all_transform.difference(opts._all_transform) if len(new_transf) != 0: _msg += f" +{new_transf}" if len(old_transf) != 0: _msg += f" -{old_transf}." logger.warning(_msg) - if opt.update_vocab: + if opts.update_vocab: logger.info("Updating checkpoint vocabulary with new vocabulary") - # fields, transforms_cls = prepare_fields_transforms(opt) + # fields, transforms_cls = prepare_fields_transforms(opts) else: checkpoint = None - # fields, transforms_cls = prepare_fields_transforms(opt) + # fields, transforms_cls = prepare_fields_transforms(opts) # Report src and tgt vocab sizes # for side in ['src', 'tgt']: @@ -100,24 +100,24 @@ def _init_train(opt): return checkpoint, None, transforms_cls -# def init_train_prepare_fields_transforms(opt, vocab_path, side): +# def init_train_prepare_fields_transforms(opts, vocab_path, side): # """Prepare or dump fields & transforms before training.""" # -# fields = None # build_dynamic_fields_langspec(opt, vocab_path, side) -# transforms_cls = get_transforms_cls(opt._all_transform) -# # TODO: maybe prepare pretrained embeddings, if any, with `prepare_pretrained_embeddings(opt, fields)` +# fields = None # build_dynamic_fields_langspec(opts, vocab_path, side) +# transforms_cls = get_transforms_cls(opts._all_transform) +# # TODO: maybe prepare pretrained embeddings, if any, with `prepare_pretrained_embeddings(opts, fields)` # -# # if opt.dump_fields: -# # save_fields(fields, opt.save_data, overwrite=opt.overwrite) -# if opt.dump_transforms or opt.n_sample != 0: -# transforms = make_transforms(opt, transforms_cls, fields) -# if opt.dump_transforms: -# save_transforms(transforms, opt.save_data, overwrite=opt.overwrite) -# if opt.n_sample != 0: +# # if opts.dump_fields: +# # save_fields(fields, opts.save_data, overwrite=opts.overwrite) +# if opts.dump_transforms or opts.n_sample != 0: +# transforms = make_transforms(opts, transforms_cls, fields) +# if opts.dump_transforms: +# save_transforms(transforms, opts.save_data, overwrite=opts.overwrite) +# if opts.n_sample != 0: # logger.warning( -# f"`-n_sample` != 0: Training will not be started. Stop after saving {opt.n_sample} samples/corpus." +# f"`-n_sample` != 0: Training will not be started. Stop after saving {opts.n_sample} samples/corpus." # ) -# save_transformed_sample(opt, transforms, n_sample=opt.n_sample) +# save_transformed_sample(opts, transforms, n_sample=opts.n_sample) # logger.info("Sample saved, please check it before restart training.") # sys.exit() # @@ -127,7 +127,7 @@ def _init_train(opt): # return fields -def validate_slurm_node_opts(current_env, world_context, opt): +def validate_slurm_node_opts(current_env, world_context, opts): """If you are using slurm, confirm that opts match slurm environment variables""" slurm_n_nodes = int(current_env['SLURM_NNODES']) if slurm_n_nodes != world_context.n_nodes: @@ -136,35 +136,35 @@ def validate_slurm_node_opts(current_env, world_context, opt): f'but set n_nodes to {world_context.n_nodes} in the conf' ) slurm_node_id = int(current_env['SLURM_NODEID']) - if slurm_node_id != opt.node_rank: + if slurm_node_id != opts.node_rank: raise ValueError( f'Looks like you are running on slurm node {slurm_node_id}, ' - f'but set node_rank to {opt.node_rank} on the command line' + f'but set node_rank to {opts.node_rank} on the command line' ) -def train(opt): - init_logger(opt.log_file) - ArgumentParser.validate_train_opts(opt) - ArgumentParser.update_model_opts(opt) - ArgumentParser.validate_model_opts(opt) - ArgumentParser.validate_prepare_opts(opt) - set_random_seed(opt.seed, False) +def train(opts): + init_logger(opts.log_file) + ArgumentParser.validate_train_opts(opts) + ArgumentParser.update_model_opts(opts) + ArgumentParser.validate_model_opts(opts) + ArgumentParser.validate_prepare_opts(opts) + set_random_seed(opts.seed, False) # set PyTorch distributed related environment variables current_env = os.environ - current_env["WORLD_SIZE"] = str(opt.world_size) - world_context = WorldContext.from_opt(opt) + current_env["WORLD_SIZE"] = str(opts.world_size) + world_context = WorldContext.from_opts(opts) if 'SLURM_NNODES' in current_env: - validate_slurm_node_opts(current_env, world_context, opt) + validate_slurm_node_opts(current_env, world_context, opts) logger.info(f'Training on {world_context}') - opt.data_task = ModelTask.SEQ2SEQ + opts.data_task = ModelTask.SEQ2SEQ - transforms_cls = get_transforms_cls(opt._all_transform) + transforms_cls = get_transforms_cls(opts._all_transform) if transforms_cls: logger.info(f'All transforms: {transforms_cls}') - src_specials, tgt_specials = zip(*(cls.get_specials(opt) for cls in transforms_cls.values())) + src_specials, tgt_specials = zip(*(cls.get_specials(opts) for cls in transforms_cls.values())) all_specials = set(DEFAULT_SPECIALS) for special_group in src_specials + tgt_specials: all_specials = all_specials | special_group @@ -177,12 +177,12 @@ def train(opt): vocabs_dict = OrderedDict() # For creating fields, we use a task_queue_manager that doesn't filter by node and gpu - global_task_queue_manager = TaskQueueManager.from_opt(opt, world_context) + global_task_queue_manager = TaskQueueManager.from_opts(opts, world_context) - vocab_size = {'src': opt.src_vocab_size or None, 'tgt': opt.tgt_vocab_size or None} + vocab_size = {'src': opts.src_vocab_size or None, 'tgt': opts.tgt_vocab_size or None} for side in ('src', 'tgt'): for lang in global_task_queue_manager.get_langs(side): - vocab_path = opt.__getattribute__(f'{side}_vocab')[lang] + vocab_path = opts.__getattribute__(f'{side}_vocab')[lang] # FIXME: for now, all specials are passed to all vocabs, this could be finer-grained vocabs_dict[(side, lang)] = get_vocab(vocab_path, lang, vocab_size[side], specials=all_specials) # for key, val in fields_dict: @@ -193,14 +193,14 @@ def train(opt): logger.debug(f"[{os.getpid()}] Initializing process group with: {current_env}") if world_context.context == DeviceContextEnum.MULTI_GPU: - current_env["MASTER_ADDR"] = opt.master_ip - current_env["MASTER_PORT"] = str(opt.master_port) - node_rank = opt.node_rank + current_env["MASTER_ADDR"] = opts.master_ip + current_env["MASTER_PORT"] = str(opts.master_port) + node_rank = opts.node_rank queues = [] semaphores = [] mp = torch.multiprocessing.get_context('spawn') - logger.info("world_size = {}, queue_size = {}".format(opt.world_size, opt.queue_size)) + logger.info("world_size = {}, queue_size = {}".format(opts.world_size, opts.queue_size)) # Create a thread to listen for errors in the child processes. error_queue = mp.SimpleQueue() error_handler = ErrorHandler(error_queue) @@ -217,21 +217,21 @@ def train(opt): task_queue_manager = global_task_queue_manager.global_to_local( node_rank=node_rank, local_rank=local_rank, - opt=opt + opts=opts ) # store rank in env (FIXME: is this obsolete?) current_env["RANK"] = str(device_context.global_rank) current_env["LOCAL_RANK"] = str(device_context.local_rank) - q = mp.Queue(opt.queue_size) - semaphore = mp.Semaphore(opt.queue_size) + q = mp.Queue(opts.queue_size) + semaphore = mp.Semaphore(opts.queue_size) queues.append(q) semaphores.append(semaphore) procs.append( mp.Process( target=consumer, - args=(train_process, opt, device_context, error_queue, q, semaphore, task_queue_manager), + args=(train_process, opts, device_context, error_queue, q, semaphore, task_queue_manager), daemon=True, ) ) @@ -244,12 +244,12 @@ def train(opt): task_queue_manager=task_queue_manager, transforms_cls=transforms_cls, vocabs_dict=vocabs_dict, - opts=opt, + opts=opts, is_train=True, ) producer = mp.Process( - target=batch_producer, args=(train_iter, q, semaphore, opt, local_rank), daemon=True + target=batch_producer, args=(train_iter, q, semaphore, opts, local_rank), daemon=True ) producers.append(producer) producers[local_rank].start() @@ -272,9 +272,9 @@ def train(opt): task_queue_manager = global_task_queue_manager.global_to_local( node_rank=0, local_rank=0, - opt=opt + opts=opts ) - train_process(opt, device_context=device_context, task_queue_manager=task_queue_manager) + train_process(opts, device_context=device_context, task_queue_manager=task_queue_manager) def _get_parser(): @@ -286,8 +286,8 @@ def _get_parser(): def main(): parser = _get_parser() - opt, unknown = parser.parse_known_args() - train(opt) + opts, unknown = parser.parse_known_args() + train(opts) if __name__ == "__main__": diff --git a/mammoth/bin/translate.py b/mammoth/bin/translate.py index 380e92b0..8d0f7569 100644 --- a/mammoth/bin/translate.py +++ b/mammoth/bin/translate.py @@ -11,20 +11,20 @@ from mammoth.utils.parse import ArgumentParser -def translate(opt): - ArgumentParser.validate_translate_opts(opt) - ArgumentParser._get_all_transform_translate(opt) - ArgumentParser._validate_transforms_opts(opt) - ArgumentParser.validate_translate_opts_dynamic(opt) - logger = init_logger(opt.log_file) +def translate(opts): + ArgumentParser.validate_translate_opts(opts) + ArgumentParser._get_all_transform_translate(opts) + ArgumentParser._validate_transforms_opts(opts) + ArgumentParser.validate_translate_opts_dynamic(opts) + logger = init_logger(opts.log_file) encoder_adapter_ids = set() - for layer_stack_idx, stack in enumerate(opt.stack['encoder']): + for layer_stack_idx, stack in enumerate(opts.stack['encoder']): if 'adapters' in stack: for group_id, sub_id in stack['adapters']: encoder_adapter_ids.add((layer_stack_idx, group_id, sub_id)) decoder_adapter_ids = set() - for layer_stack_idx, stack in enumerate(opt.stack['decoder']): + for layer_stack_idx, stack in enumerate(opts.stack['decoder']): if 'adapters' in stack: for group_id, sub_id in stack['adapters']: decoder_adapter_ids.add((layer_stack_idx, group_id, sub_id)) @@ -36,10 +36,10 @@ def translate(opt): task = TaskSpecs( node_rank=None, local_rank=None, - src_lang=opt.src_lang, - tgt_lang=opt.tgt_lang, - encoder_id=[stack['id'] for stack in opt.stack['encoder']], - decoder_id=[stack['id'] for stack in opt.stack['decoder']], + src_lang=opts.src_lang, + tgt_lang=opts.tgt_lang, + encoder_id=[stack['id'] for stack in opts.stack['encoder']], + decoder_id=[stack['id'] for stack in opts.stack['decoder']], corpus_id='trans', weight=1, corpus_opt=dict(), @@ -49,23 +49,23 @@ def translate(opt): decoder_adapter_ids=decoder_adapter_ids, ) - translator = build_translator(opt, task, logger=logger, report_score=True) + translator = build_translator(opts, task, logger=logger, report_score=True) - # data_reader = InferenceDataReader(opt.src, opt.tgt, opt.src_feats) - src_shards = split_corpus(opt.src, opt.shard_size) - tgt_shards = split_corpus(opt.tgt, opt.shard_size) + # data_reader = InferenceDataReader(opts.src, opts.tgt, opts.src_feats) + src_shards = split_corpus(opts.src, opts.shard_size) + tgt_shards = split_corpus(opts.tgt, opts.shard_size) features_shards = [] features_names = [] - for feat_name, feat_path in opt.src_feats.items(): - features_shards.append(split_corpus(feat_path, opt.shard_size)) + for feat_name, feat_path in opts.src_feats.items(): + features_shards.append(split_corpus(feat_path, opts.shard_size)) features_names.append(feat_name) shard_pairs = zip(src_shards, tgt_shards, *features_shards) # Build transforms - transforms_cls = get_transforms_cls(opt._all_transform) - transforms = make_transforms(opt, transforms_cls, translator.vocabs, task=task) + transforms_cls = get_transforms_cls(opts._all_transform) + transforms = make_transforms(opts, transforms_cls, translator.vocabs, task=task) data_transform = [ - transforms[name] for name in opt.transforms if name in transforms + transforms[name] for name in opts.transforms if name in transforms ] transform = TransformPipe.build_from(data_transform) @@ -76,10 +76,10 @@ def translate(opt): transform=transform, # src_feats=feats_shard, # TODO: put me back in tgt=tgt_shard, - batch_size=opt.batch_size, - batch_type=opt.batch_type, - attn_debug=opt.attn_debug, - align_debug=opt.align_debug + batch_size=opts.batch_size, + batch_type=opts.batch_type, + attn_debug=opts.attn_debug, + align_debug=opts.align_debug ) @@ -95,8 +95,8 @@ def _get_parser(): def main(): parser = _get_parser() - opt = parser.parse_args() - translate(opt) + opts = parser.parse_args() + translate(opts) if __name__ == "__main__": diff --git a/mammoth/distributed/communication.py b/mammoth/distributed/communication.py index 27dcb4a2..687da4b9 100644 --- a/mammoth/distributed/communication.py +++ b/mammoth/distributed/communication.py @@ -11,12 +11,12 @@ from mammoth.utils.misc import set_random_seed -def multi_init(opt, global_rank): - dist_init_method = 'tcp://{master_ip}:{master_port}'.format(master_ip=opt.master_ip, master_port=opt.master_port) +def multi_init(opts, global_rank): + dist_init_method = 'tcp://{master_ip}:{master_port}'.format(master_ip=opts.master_ip, master_port=opts.master_port) - dist_world_size = opt.world_size + dist_world_size = opts.world_size torch.distributed.init_process_group( - backend=opt.gpu_backend, + backend=opts.gpu_backend, init_method=dist_init_method, rank=global_rank, world_size=dist_world_size, @@ -238,11 +238,11 @@ def signal_handler(self, signalnum, stackframe): raise Exception(msg) -def batch_producer(generator_to_serve, queue, semaphore, opt, device_id): +def batch_producer(generator_to_serve, queue, semaphore, opts, device_id): """Produce batches to `queues` from `generator_to_serve`.""" - log_level = "INFO" if opt.verbose or device_id == 0 else "WARNING" - init_logger(opt.log_file, log_level=log_level) - set_random_seed(opt.seed, False) + log_level = "INFO" if opts.verbose or device_id == 0 else "WARNING" + init_logger(opts.log_file, log_level=log_level) + set_random_seed(opts.seed, False) logger.info("BATCH PRODUCER") logger.info(generator_to_serve) @@ -254,7 +254,7 @@ def batch_producer(generator_to_serve, queue, semaphore, opt, device_id): queue.put((batch, metadata, communication_batch_id)) -def consumer(process_fn, opt, device_context, error_queue, batch_queue, semaphore, task_queue_manager): +def consumer(process_fn, opts, device_context, error_queue, batch_queue, semaphore, task_queue_manager): """Run `process_fn` on `device_id` with data from `batch_queue`.""" try: logger.info( @@ -262,11 +262,11 @@ def consumer(process_fn, opt, device_context, error_queue, batch_queue, semaphor f'node_rank {device_context.node_rank} ' f'local_rank {device_context.local_rank}' ) - logger.info(f'opt.gpu_ranks {opt.gpu_ranks}') - multi_init(opt, device_context.global_rank) + logger.info(f'opts.gpu_ranks {opts.gpu_ranks}') + multi_init(opts, device_context.global_rank) # error_queue not passed (is this intentional?) process_fn( - opt, + opts, device_context=device_context, batch_queue=batch_queue, semaphore=semaphore, @@ -279,4 +279,4 @@ def consumer(process_fn, opt, device_context, error_queue, batch_queue, semaphor # propagate exception to parent process, keeping original traceback import traceback - error_queue.put((opt.gpu_ranks[device_context.node_rank], traceback.format_exc())) + error_queue.put((opts.gpu_ranks[device_context.node_rank], traceback.format_exc())) diff --git a/mammoth/distributed/contexts.py b/mammoth/distributed/contexts.py index b13699c1..8a8e4241 100644 --- a/mammoth/distributed/contexts.py +++ b/mammoth/distributed/contexts.py @@ -48,32 +48,32 @@ def global_to_local(self, node_rank, local_rank): ) @classmethod - def from_opt(cls, opt): - gpus_per_node = len(opt.gpu_ranks) - world_size = int(opt.world_size) if gpus_per_node > 0 else 0 + def from_opts(cls, opts): + gpus_per_node = len(opts.gpu_ranks) + world_size = int(opts.world_size) if gpus_per_node > 0 else 0 multinode = gpus_per_node != world_size if world_size <= 0: # setting a non-positive world size means use CPU device_context_enum = DeviceContextEnum.CPU - if opt.n_nodes != 1: + if opts.n_nodes != 1: raise ValueError('CPU training is only possible on a single node') elif world_size == 1: # world size 1 uses GPU, but is not distributed device_context_enum = DeviceContextEnum.SINGLE_GPU - if opt.n_nodes != 1: + if opts.n_nodes != 1: raise ValueError( f'Invalid single-gpu node configuration: ' - f'n_nodes {opt.n_nodes} gpus_per_node {gpus_per_node} world_size {world_size}' + f'n_nodes {opts.n_nodes} gpus_per_node {gpus_per_node} world_size {world_size}' ) else: # world size > 1 - if multinode and opt.n_nodes == 1: + if multinode and opts.n_nodes == 1: raise ValueError( f'Invalid multi-node configuration: ' - f'n_nodes {opt.n_nodes} gpus_per_node {gpus_per_node} world_size {world_size}' + f'n_nodes {opts.n_nodes} gpus_per_node {gpus_per_node} world_size {world_size}' ) device_context_enum = DeviceContextEnum.MULTI_GPU - world_context = WorldContext(context=device_context_enum, n_nodes=opt.n_nodes, gpus_per_node=gpus_per_node) + world_context = WorldContext(context=device_context_enum, n_nodes=opts.n_nodes, gpus_per_node=gpus_per_node) return world_context diff --git a/mammoth/distributed/tasks.py b/mammoth/distributed/tasks.py index 82074630..b015fe37 100644 --- a/mammoth/distributed/tasks.py +++ b/mammoth/distributed/tasks.py @@ -25,7 +25,7 @@ def __init__(self, my_corpus_ids: List[str], **kwargs): @classmethod @abstractmethod - def from_opt(cls, my_corpus_ids: List[str], opt: dict): + def from_opts(cls, my_corpus_ids: List[str], opts: dict): pass @abstractmethod @@ -64,10 +64,10 @@ def __init__( raise ValueError('Invalid curriculum: no corpus is ready to start in the first step') @classmethod - def from_opt(cls, my_corpus_ids: List[str], opt: dict): - my_weights = [opt.data[corpus_id]['weight'] for corpus_id in my_corpus_ids] + def from_opts(cls, my_corpus_ids: List[str], opts: dict): + my_weights = [opts.tasks[corpus_id]['weight'] for corpus_id in my_corpus_ids] my_introduce_at_training_step = [ - opt.data[corpus_id]['introduce_at_training_step'] for corpus_id in my_corpus_ids + opts.tasks[corpus_id]['introduce_at_training_step'] for corpus_id in my_corpus_ids ] return cls(my_corpus_ids, my_weights, my_introduce_at_training_step) @@ -101,7 +101,7 @@ def __init__(self, my_corpus_ids: List[str]): self.infinite_corpus_ids = cycle(my_corpus_ids) @classmethod - def from_opt(cls, my_corpus_ids: List[str], opt: dict): + def from_opts(cls, my_corpus_ids: List[str], opts: dict): return cls(my_corpus_ids) def sample_corpus_ids( @@ -156,10 +156,10 @@ def get_serializable_metadata(self): ) -def get_adapter_ids(opt, corpus_opt, side): - if 'adapters' not in opt or 'adapters' not in corpus_opt: +def get_adapter_ids(opts, corpus_opt, side): + if 'adapters' not in opts or 'adapters' not in corpus_opt: return [] - global_adapters_opt = opt.adapters.get(side, None) + global_adapters_opt = opts.adapters.get(side, None) corpus_adapter_opt = corpus_opt['adapters'].get(side, None) if not global_adapters_opt or not corpus_adapter_opt: return [] @@ -229,17 +229,17 @@ def local_rank(self): return self.device_context.local_rank @classmethod - def from_opt(cls, opt: Namespace, world_context: WorldContext): - n_tasks = len(opt.data) + def from_opts(cls, opts: Namespace, world_context: WorldContext): + n_tasks = len(opts.tasks) # Sorting the keys, to ensure that tasks have a consistent order across devices. # This in turn ensures the order in which components are created from those tasks. - corpus_ids = sorted(opt.data.keys()) + corpus_ids = sorted(opts.tasks.keys()) if world_context.is_distributed(): - if any(task.get('node_gpu', None) is not None for task in opt.data.values()): + if any(task.get('node_gpu', None) is not None for task in opts.tasks.values()): node_gpu = [ - tuple(int(y) for y in opt.data[corpus_id]['node_gpu'].split(':', 1)) + tuple(int(y) for y in opts.tasks[corpus_id]['node_gpu'].split(':', 1)) for corpus_id in corpus_ids] else: # When --node_gpu is not set, assume an assigment that fills gpus in rank order @@ -248,24 +248,24 @@ def from_opt(cls, opt: Namespace, world_context: WorldContext): node_gpu = [(0, 0)] * n_tasks enc_sharing_group = [ - opt.data[corpus_id].get('enc_sharing_group', None) for corpus_id in corpus_ids + opts.tasks[corpus_id].get('enc_sharing_group', None) for corpus_id in corpus_ids ] dec_sharing_group = [ - opt.data[corpus_id].get('dec_sharing_group', None) for corpus_id in corpus_ids + opts.tasks[corpus_id].get('dec_sharing_group', None) for corpus_id in corpus_ids ] if any(x is not None for x in enc_sharing_group): - assert all(len(enc_ids) == len(opt.enc_layers) for enc_ids in enc_sharing_group) + assert all(len(enc_ids) == len(opts.enc_layers) for enc_ids in enc_sharing_group) else: # if no encoder sharing groups are defined, # it is assumed that there is only one encoder stack and it is language specific - if not len(opt.enc_layers) == 1: + if not len(opts.enc_layers) == 1: raise Exception('With more than one encoder stack, you must explictly define enc_sharing_group') if any(x is not None for x in dec_sharing_group): - assert all(len(dec_ids) == len(opt.dec_layers) for dec_ids in dec_sharing_group) + assert all(len(dec_ids) == len(opts.dec_layers) for dec_ids in dec_sharing_group) else: # if no decoder sharing groups are defined, # it is assumed that there is only one decoder stack and it is language specific - if not len(opt.dec_layers) == 1: + if not len(opts.dec_layers) == 1: raise Exception('With more than one decoder stack, you must explictly define dec_sharing_group') tasks = [] @@ -277,14 +277,14 @@ def from_opt(cls, opt: Namespace, world_context: WorldContext): node_gpu, corpus_ids ): - corpus_opt = opt.data[corpus_id] + corpus_opt = opts.tasks[corpus_id] src_lang, tgt_lang = corpus_opt['src_tgt'].split('-', 1) encoder_id = corpus_opt.get('enc_sharing_group', [src_lang]) decoder_id = corpus_opt.get('dec_sharing_group', [tgt_lang]) weight = corpus_opt.get('weight', 1.0) if 'adapters' in corpus_opt: - encoder_adapter_ids = get_adapter_ids(opt, corpus_opt, 'encoder') - decoder_adapter_ids = get_adapter_ids(opt, corpus_opt, 'decoder') + encoder_adapter_ids = get_adapter_ids(opts, corpus_opt, 'encoder') + decoder_adapter_ids = get_adapter_ids(opts, corpus_opt, 'decoder') uses_adapters = True else: encoder_adapter_ids = None @@ -308,14 +308,14 @@ def from_opt(cls, opt: Namespace, world_context: WorldContext): return cls( tasks, world_context=world_context, - tasks_per_communication_batch=opt.accum_count, + tasks_per_communication_batch=opts.accum_count, uses_adapters=uses_adapters, ) - def global_to_local(self, node_rank, local_rank, opt): + def global_to_local(self, node_rank, local_rank, opts): assert node_rank is not None assert local_rank is not None - task_distribution_strategy = self._get_strategy(node_rank=node_rank, local_rank=local_rank, opt=opt) + task_distribution_strategy = self._get_strategy(node_rank=node_rank, local_rank=local_rank, opts=opts) device_context = self.world_context.global_to_local(node_rank, local_rank) return self.__class__( self.tasks, @@ -328,15 +328,15 @@ def global_to_local(self, node_rank, local_rank, opt): uses_adapters=self.uses_adapters, ) - def _get_strategy(self, node_rank, local_rank, opt): + def _get_strategy(self, node_rank, local_rank, opts): assert node_rank is not None assert local_rank is not None # Global TQM does not have a task distribution strategy, but the local ones do my_corpus_ids = [task.corpus_id for task in self._tasks_on_device(node_rank, local_rank)] try: - strategy = TASK_DISTRIBUTION_STRATEGIES[opt.task_distribution_strategy].from_opt( + strategy = TASK_DISTRIBUTION_STRATEGIES[opts.task_distribution_strategy].from_opts( my_corpus_ids=my_corpus_ids, - opt=opt, + opts=opts, ) return strategy except Exception as e: @@ -558,11 +558,11 @@ def get_fields(self, side: str, fields_dict): raise RuntimeError # FIXME: merge with below - def get_vocabularies(self, opt: Namespace, side: str): + def get_vocabularies(self, opts: Namespace, side: str): result = [] for task in self.get_tasks(): lang = self.src_lang if side == 'src' else self.tgt_lang - vocab_path = opt.__getattribute__(f'{side}_vocab')[lang] + vocab_path = opts.__getattribute__(f'{side}_vocab')[lang] result.append((lang, vocab_path)) return result diff --git a/mammoth/inputters/dataloader.py b/mammoth/inputters/dataloader.py index 69af73af..78cff698 100644 --- a/mammoth/inputters/dataloader.py +++ b/mammoth/inputters/dataloader.py @@ -242,7 +242,7 @@ def from_opts(cls, task_queue_manager, transforms_cls, vocabs_dict, opts, is_tra return cls( task_queue_manager, opts, - opts.data, + opts.tasks, transforms_cls, vocabs_dict, is_train, @@ -275,7 +275,7 @@ def _init_datasets(self): # Case 2: we are validation (hence self.is_train := False), we need an iterator # if and only the task defines validation data, i.e. if the key `path_valid_src` # is defined - if self.is_train or self.opts.data[task.corpus_id].get('path_valid_src', None) is not None: + if self.is_train or self.opts.tasks[task.corpus_id].get('path_valid_src', None) is not None: corpus = get_corpus( self.opts, task, src_vocab, tgt_vocab, is_train=self.is_train ).to(device) diff --git a/mammoth/inputters/dataset.py b/mammoth/inputters/dataset.py index d6ec0544..6bb2d1a9 100644 --- a/mammoth/inputters/dataset.py +++ b/mammoth/inputters/dataset.py @@ -177,7 +177,7 @@ def get_corpus(opts, task, src_vocab: Vocab, tgt_vocab: Vocab, is_train: bool = # get transform classes to infer special tokens # FIXME ensure TQM properly initializes transform with global if necessary vocabs = {'src': src_vocab, 'tgt': tgt_vocab} - corpus_opts = opts.data[task.corpus_id] + corpus_opts = opts.tasks[task.corpus_id] transforms_to_apply = corpus_opts.get('transforms', None) transforms_to_apply = transforms_to_apply or opts.get('transforms', None) transforms_to_apply = transforms_to_apply or [] @@ -245,8 +245,8 @@ def build_vocab_counts(opts, corpus_id, transforms, n_sample=3): corpora = { corpus_id: read_examples_from_files( - opts.data[corpus_id]["path_src"], - opts.data[corpus_id]["path_tgt"], + opts.tasks[corpus_id]["path_src"], + opts.tasks[corpus_id]["path_tgt"], # FIXME this is likely not working transforms_fn=TransformPipe(transforms).apply if transforms else lambda x: x, ) diff --git a/mammoth/model_builder.py b/mammoth/model_builder.py index 68634b9f..d8bd4589 100644 --- a/mammoth/model_builder.py +++ b/mammoth/model_builder.py @@ -29,23 +29,23 @@ from mammoth.modules.attention_bridge import AttentionBridge -def build_embeddings(opt, vocab, for_encoder=True): +def build_embeddings(opts, vocab, for_encoder=True): """ Args: - opt: the option in current environment. + opts: the option in current environment. vocab: stoi-ish object. for_encoder(bool): build Embeddings for encoder or decoder? """ - emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size + emb_dim = opts.src_word_vec_size if for_encoder else opts.tgt_word_vec_size word_padding_idx = vocab.stoi[DefaultTokens.PAD] - opt.word_padding_idx = word_padding_idx + opts.word_padding_idx = word_padding_idx - freeze_word_vecs = opt.freeze_word_vecs_enc if for_encoder else opt.freeze_word_vecs_dec + freeze_word_vecs = opts.freeze_word_vecs_enc if for_encoder else opts.freeze_word_vecs_dec emb = Embeddings( word_vec_size=emb_dim, - position_encoding=opt.position_encoding, - dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, + position_encoding=opts.position_encoding, + dropout=opts.dropout[0] if type(opts.dropout) is list else opts.dropout, word_padding_idx=word_padding_idx, word_vocab_size=len(vocab), freeze_word_vecs=freeze_word_vecs, @@ -53,47 +53,47 @@ def build_embeddings(opt, vocab, for_encoder=True): return emb -def build_encoder(opt, embeddings, task_queue_manager): +def build_encoder(opts, embeddings, task_queue_manager): """ Various encoder dispatcher function. Args: - opt: the option in current environment. + opts: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ - assert opt.encoder_type == 'transformer', 'Only Transformer is supported' - return LayerStackEncoder.from_opt(opt, embeddings, task_queue_manager) + assert opts.encoder_type == 'transformer', 'Only Transformer is supported' + return LayerStackEncoder.from_opts(opts, embeddings, task_queue_manager) -def build_decoder(opt, embeddings, task_queue_manager): +def build_decoder(opts, embeddings, task_queue_manager): """ Various decoder dispatcher function. Args: - opt: the option in current environment. + opts: the option in current environment. embeddings (Embeddings): vocab embeddings for this decoder. """ - assert opt.decoder_type == 'transformer', 'Only Transformer is supported' - return LayerStackDecoder.from_opt(opt, embeddings, task_queue_manager) + assert opts.decoder_type == 'transformer', 'Only Transformer is supported' + return LayerStackDecoder.from_opts(opts, embeddings, task_queue_manager) -def load_test_multitask_model(opt, model_path=None): +def load_test_multitask_model(opts, model_path=None): """If a checkpoint ending with ".pt" returns a full model otherwise it builds a bilingual model""" if model_path is None: - model_path = opt.models[0] + model_path = opts.models[0] - opt.lang_pair = opt.lang_pair if opt.lang_pair else f'{opt.src_lang}-{opt.tgt_lang}' + opts.lang_pair = opts.lang_pair if opts.lang_pair else f'{opts.src_lang}-{opts.tgt_lang}' if model_path.endswith('.pt'): - return load_test_model(opt, model_path) + return load_test_model(opts, model_path) else: checkpoint_modules = [ - (f'encoder.embeddings.embeddings_{opt.src_lang}.', f'src_embeddings_{opt.src_lang}'), - (f'decoder.embeddings.embeddings_{opt.tgt_lang}.', f'tgt_embeddings_{opt.tgt_lang}'), - (f'generator.generator_{opt.tgt_lang}.', f'generator_{opt.tgt_lang}'), + (f'encoder.embeddings.embeddings_{opts.src_lang}.', f'src_embeddings_{opts.src_lang}'), + (f'decoder.embeddings.embeddings_{opts.tgt_lang}.', f'tgt_embeddings_{opts.tgt_lang}'), + (f'generator.generator_{opts.tgt_lang}.', f'generator_{opts.tgt_lang}'), ('attention_bridge.', 'attention_bridge'), ] - for layer_stack_idx, layer_stack_opt in enumerate(opt.stack['encoder']): + for layer_stack_idx, layer_stack_opt in enumerate(opts.stack['encoder']): layer_stack_key = layer_stack_opt['id'] checkpoint_modules.append( ( @@ -108,7 +108,7 @@ def load_test_multitask_model(opt, model_path=None): f'encoder_adapter_{layer_stack_idx}_{layer_stack_key}_{adapter_group}_{sub_id}' ) ) - for layer_stack_idx, layer_stack_opt in enumerate(opt.stack['decoder']): + for layer_stack_idx, layer_stack_opt in enumerate(opts.stack['decoder']): layer_stack_key = layer_stack_opt['id'] checkpoint_modules.append( ( @@ -129,8 +129,8 @@ def load_test_multitask_model(opt, model_path=None): (prefix, f'{model_path}_{key}.pt') for (prefix, key) in checkpoint_modules ] - opt.model_frame = model_path + '_frame.pt' - frame = torch.load(opt.model_frame, map_location=lambda storage, loc: storage) + opts.model_frame = model_path + '_frame.pt' + frame = torch.load(opts.model_frame, map_location=lambda storage, loc: storage) checkpoint_state_dicts = { prefix: torch.load(path, map_location=lambda storage, loc: storage) @@ -140,19 +140,19 @@ def load_test_multitask_model(opt, model_path=None): combined_state_dict = _combine_ordered_dicts(checkpoint_state_dicts) vocabs_dict = { - 'src': frame["vocab"].get(('src', opt.src_lang)), - 'tgt': frame["vocab"].get(('tgt', opt.tgt_lang)), + 'src': frame["vocab"].get(('src', opts.src_lang)), + 'tgt': frame["vocab"].get(('tgt', opts.tgt_lang)), } # FIXME # fields["indices"] = Field(use_vocab=False, dtype=torch.long, sequential=False) - model_opt = ArgumentParser.ckpt_model_opts(frame['opt']) + model_opt = ArgumentParser.ckpt_model_opts(frame['opts']) # Avoid functionality on inference model_opt.update_vocab = False model = create_bilingual_model( - src_lang=opt.src_lang, - tgt_lang=opt.tgt_lang, - opt_stack=opt.stack, + src_lang=opts.src_lang, + tgt_lang=opts.tgt_lang, + opt_stack=opts.stack, model_opt=model_opt, vocabs_dict=vocabs_dict ) @@ -166,7 +166,7 @@ def load_test_multitask_model(opt, model_path=None): if key not in combined_state_dict: print(f'Key missing {key}') model.load_state_dict(combined_state_dict) - device = torch.device("cuda" if use_gpu(opt) else "cpu") + device = torch.device("cuda" if use_gpu(opts) else "cpu") model.to(device) model.eval() @@ -174,16 +174,16 @@ def load_test_multitask_model(opt, model_path=None): return vocabs_dict, model, model_opt -def load_test_model(opt, model_path=None): +def load_test_model(opts, model_path=None): if model_path is None: - model_path = opt.models[0] + model_path = opts.models[0] - if len(opt.models) > 1: - model_path_enc = opt.models[0] + if len(opts.models) > 1: + model_path_enc = opts.models[0] checkpoint = torch.load(model_path_enc, map_location=lambda storage, loc: storage) model = checkpoint['whole_model'] - model_path_dec = opt.models[1] + model_path_dec = opts.models[1] model_dec = torch.load(model_path_dec, map_location=lambda storage, loc: storage)['whole_model'] model.decoder = model_dec.decoder model.generator = model_dec.generator @@ -191,17 +191,17 @@ def load_test_model(opt, model_path=None): checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) model = checkpoint['whole_model'] - model_opt = ArgumentParser.ckpt_model_opts(checkpoint['opt']) + model_opt = ArgumentParser.ckpt_model_opts(checkpoint['opts']) ArgumentParser.update_model_opts(model_opt) ArgumentParser.validate_model_opts(model_opt) vocabs = checkpoint['vocab'] print("VOCABS") print(vocabs) - if opt.gpu != -1: + if opts.gpu != -1: device = torch.device("cuda") model.to(device) - lang_pair = opt.lang_pair + lang_pair = opts.lang_pair src_lang, tgt_lang = lang_pair.split("-") # FIXME vocabs_dict = {} @@ -213,10 +213,10 @@ def load_test_model(opt, model_path=None): # Avoid functionality on inference model_opt.update_vocab = False - if opt.fp32: + if opts.fp32: model.float() - elif opt.int8: - if opt.gpu >= 0: + elif opts.int8: + if opts.gpu >= 0: raise ValueError("Dynamic 8-bit quantization is not supported on GPU") torch.quantization.quantize_dynamic(model, inplace=True) model.eval() @@ -242,7 +242,7 @@ def create_bilingual_model( generator = build_generator(model_opt, len(vocabs_dict['tgt']), tgt_emb) generators_md.add_module(f'generator_{tgt_lang}', generator) - attention_bridge = AttentionBridge.from_opt(model_opt) + attention_bridge = AttentionBridge.from_opts(model_opt) nmt_model = mammoth.models.NMTModel( encoder=encoder, @@ -315,7 +315,7 @@ def build_task_specific_model( decoder = build_only_dec(model_opt, pluggable_tgt_emb, task_queue_manager) # TODO: implement hierarchical approach to layer sharing - attention_bridge = AttentionBridge.from_opt(model_opt) + attention_bridge = AttentionBridge.from_opts(model_opt) if model_opt.param_init != 0.0: for p in attention_bridge.parameters(): @@ -500,11 +500,11 @@ def build_base_model_langspec( return model, generators_md -def uses_adapters(opt): - return 'adapters' in opt and opt.adapters +def uses_adapters(opts): + return 'adapters' in opts and opts.adapters -def create_all_adapters(model, opt, task_queue_manager): +def create_all_adapters(model, opts, task_queue_manager): my_enc_adapter_ids = set() my_dec_adapter_ids = set() adapter_to_encoder_ids = defaultdict(set) @@ -520,7 +520,7 @@ def create_all_adapters(model, opt, task_queue_manager): adapter_to_decoder_ids[adapter_id].add(tuple(task.decoder_id)) _create_adapters( model, - opt, + opts, my_enc_adapter_ids, adapter_to_encoder_ids, my_dec_adapter_ids, @@ -528,7 +528,7 @@ def create_all_adapters(model, opt, task_queue_manager): ) -def create_bilingual_adapters(model, opt, src_lang, tgt_lang, opt_stack): +def create_bilingual_adapters(model, opts, src_lang, tgt_lang, opt_stack): my_enc_adapter_ids = [] my_dec_adapter_ids = [] adapter_to_encoder_ids = {} @@ -551,7 +551,7 @@ def create_bilingual_adapters(model, opt, src_lang, tgt_lang, opt_stack): _create_adapters( model, - opt, + opts, my_enc_adapter_ids, adapter_to_encoder_ids, my_dec_adapter_ids, @@ -561,7 +561,7 @@ def create_bilingual_adapters(model, opt, src_lang, tgt_lang, opt_stack): def _create_adapters( model, - opt, + opts, my_enc_adapter_ids, adapter_to_encoder_ids, my_dec_adapter_ids, @@ -569,14 +569,14 @@ def _create_adapters( ): my_enc_adapter_ids = [tuple(item) for item in my_enc_adapter_ids] my_dec_adapter_ids = [tuple(item) for item in my_dec_adapter_ids] - for adapter_group, adapter_opts in opt.adapters['encoder'].items(): + for adapter_group, adapter_opts in opts.adapters['encoder'].items(): layer_stack_index = adapter_opts['layer_stack_index'] for sub_id in adapter_opts['ids']: adapter_id_long = (layer_stack_index, adapter_group, sub_id) if adapter_id_long not in my_enc_adapter_ids: continue adapter = Adapter(adapter_group, sub_id) - input_dim = opt.rnn_size + input_dim = opts.rnn_size hidden_dim = adapter_opts['hidden_size'] # all stacks to which this adapter should be added @@ -597,14 +597,14 @@ def _create_adapters( layer_stack_index=layer_stack_index, module_ids=adapted_stacks, ) - for adapter_group, adapter_opts in opt.adapters['decoder'].items(): + for adapter_group, adapter_opts in opts.adapters['decoder'].items(): layer_stack_index = adapter_opts['layer_stack_index'] for sub_id in adapter_opts['ids']: adapter_id_long = (layer_stack_index, adapter_group, sub_id) if adapter_id_long not in my_dec_adapter_ids: continue adapter = Adapter(adapter_group, sub_id) - input_dim = opt.rnn_size + input_dim = opts.rnn_size hidden_dim = adapter_opts['hidden_size'] adapted_stacks = set( @@ -626,12 +626,12 @@ def _create_adapters( ) -def build_model(model_opt, opt, vocabs_dict, task_queue_manager, checkpoint): +def build_model(model_opt, opts, vocabs_dict, task_queue_manager, checkpoint): logger.info('Building model...') model, generators_md = build_base_model_langspec( model_opt=model_opt, vocabs_dict=vocabs_dict, - gpu=use_gpu(opt), + gpu=use_gpu(opts), task_queue_manager=task_queue_manager, checkpoint=checkpoint, ) diff --git a/mammoth/models/model_saver.py b/mammoth/models/model_saver.py index 3fbc41b9..bffc44c7 100644 --- a/mammoth/models/model_saver.py +++ b/mammoth/models/model_saver.py @@ -8,13 +8,13 @@ from mammoth.utils.module_splitter import explode_model -def build_model_saver(model_opt, opt, model, vocabs_dict, optim, device_context): +def build_model_saver(model_opt, opts, model, vocabs_dict, optim, device_context): # _check_save_model_path - save_model_path = os.path.abspath(opt.save_model) + save_model_path = os.path.abspath(opts.save_model) os.makedirs(os.path.dirname(save_model_path), exist_ok=True) model_saver = ModelSaver( - opt.save_model, model, model_opt, vocabs_dict, optim, opt.keep_checkpoint, device_context, opt.save_all_gpus + opts.save_model, model, model_opt, vocabs_dict, optim, opts.keep_checkpoint, device_context, opts.save_all_gpus ) return model_saver @@ -129,7 +129,7 @@ def _save(self, step, model, device_context): "model": model_state_dict, # 'generator': generator_state_dict, "vocab": self.vocabs_dict, - "opt": self.model_opt, + "opts": self.model_opt, "optim": {k: v.state_dict() for k, v in self.optim._optimizer.optimizers.items()}, "whole_model": self.model, } diff --git a/mammoth/modules/attention_bridge.py b/mammoth/modules/attention_bridge.py index b19af21a..3933db0f 100644 --- a/mammoth/modules/attention_bridge.py +++ b/mammoth/modules/attention_bridge.py @@ -73,15 +73,15 @@ def __init__( self.self_ff_norm = AttentionBridgeNorm(latent_size, norm_type) @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): return cls( - opt.rnn_size, - opt.hidden_ab_size, - opt.ab_fixed_length, - opt.heads, - opt.attention_dropout[0], - opt.max_relative_positions, - opt.ab_layer_norm, + opts.rnn_size, + opts.hidden_ab_size, + opts.ab_fixed_length, + opts.heads, + opts.attention_dropout[0], + opts.max_relative_positions, + opts.ab_layer_norm, ) @property @@ -154,15 +154,15 @@ def __init__( self.norm = AttentionBridgeNorm(d, ab_layer_norm) @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): """Alternate constructor.""" return cls( - opt.rnn_size, - opt.ab_fixed_length, - opt.hidden_ab_size, - opt.model_type, - opt.rnn_size, - opt.ab_layer_norm, + opts.rnn_size, + opts.ab_fixed_length, + opts.hidden_ab_size, + opts.model_type, + opts.rnn_size, + opts.ab_layer_norm, ) def forward(self, intermediate_output, encoder_output, mask=None): @@ -244,12 +244,12 @@ def forward(self, intermediate_output, encoder_output, mask=None): return attention_weights, output @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): return cls( - opt.rnn_size, - opt.hidden_ab_size, - opt.ab_fixed_length, - opt.ab_layer_norm, + opts.rnn_size, + opts.hidden_ab_size, + opts.ab_fixed_length, + opts.ab_layer_norm, ) @@ -276,15 +276,15 @@ def forward(self, intermediate_output, encoder_output, mask=None): return None, outp @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): return cls( - opt.rnn_size, - opt.heads, - opt.hidden_ab_size, # d_ff + opts.rnn_size, + opts.heads, + opts.hidden_ab_size, # d_ff # TODO: that list indexing things seems suspicious to me... - opt.dropout[0], - opt.attention_dropout[0], - max_relative_positions=opt.max_relative_positions, + opts.dropout[0], + opts.attention_dropout[0], + max_relative_positions=opts.max_relative_positions, ) @@ -313,11 +313,11 @@ def forward(self, intermediate_output, encoder_output, mask=None): return None, self.module(intermediate_output) @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): return cls( - opt.rnn_size, - opt.hidden_ab_size, - opt.ab_layer_norm, + opts.rnn_size, + opts.hidden_ab_size, + opts.ab_layer_norm, ) @@ -333,7 +333,7 @@ def __init__(self, layers): self.is_fixed_length = any(x.is_fixed_length for x in layers) @classmethod - def from_opt(cls, opt): + def from_opts(cls, opts): """Alternate constructor.""" # convert opts specifications to architectures layer_type_to_cls = { @@ -344,16 +344,16 @@ def from_opt(cls, opt): 'feedforward': FeedForwardAttentionBridgeLayer, } - # preconstruct layers using .from_opt(...) - layers = [layer_type_to_cls[layer_type].from_opt(opt) for layer_type in opt.ab_layers] + # preconstruct layers using .from_opts(...) + layers = [layer_type_to_cls[layer_type].from_opts(opts) for layer_type in opts.ab_layers] # FIXME: locking-in edge case behavior - if any(layer == 'perceiver' for layer in opt.ab_layers): - first_perceiver_index = next(idx for idx, layer in enumerate(opt.ab_layers) if layer == 'perceiver') + if any(layer == 'perceiver' for layer in opts.ab_layers): + first_perceiver_index = next(idx for idx, layer in enumerate(opts.ab_layers) if layer == 'perceiver') if first_perceiver_index != 0: assert any(layer.is_fixed_length for layer in layers[:first_perceiver_index]), \ 'Unsupported bridge configuration: at least one layer must be fixed-size before perceiver' - if not all(layer == 'perceiver' for layer in opt.ab_layers): + if not all(layer == 'perceiver' for layer in opts.ab_layers): warnings.warn('Architecture-mixing not fully supported with perceiver.') # FIXME: deleting unused params manually for perceiver_layer in layers[1:]: diff --git a/mammoth/modules/decoder.py b/mammoth/modules/decoder.py index 6619e00e..e0e707f5 100644 --- a/mammoth/modules/decoder.py +++ b/mammoth/modules/decoder.py @@ -13,7 +13,7 @@ def __init__(self, attentional=True): self.attentional = attentional @classmethod - def from_opt(cls, opt, embeddings): + def from_opts(cls, opts, embeddings): """Alternate constructor. Subclasses should override this method. diff --git a/mammoth/modules/decoder_ensemble.py b/mammoth/modules/decoder_ensemble.py index 84d4b141..06e1fbdb 100644 --- a/mammoth/modules/decoder_ensemble.py +++ b/mammoth/modules/decoder_ensemble.py @@ -120,13 +120,13 @@ def __init__(self, models, raw_probs=False): self.models = nn.ModuleList(models) -def load_test_model(opt): +def load_test_model(opts): """Read in multiple models for ensemble.""" shared_vocabs = None shared_model_opt = None models = [] - for model_path in opt.models: - vocabs, model, model_opt = mammoth.model_builder.load_test_multitask_model(opt, model_path=model_path) + for model_path in opts.models: + vocabs, model, model_opt = mammoth.model_builder.load_test_multitask_model(opts, model_path=model_path) if shared_vocabs is None: shared_vocabs = vocabs else: @@ -138,5 +138,5 @@ def load_test_model(opt): models.append(model) if shared_model_opt is None: shared_model_opt = model_opt - ensemble_model = EnsembleModel(models, opt.avg_raw_probs) + ensemble_model = EnsembleModel(models, opts.avg_raw_probs) return shared_vocabs, ensemble_model, shared_model_opt diff --git a/mammoth/modules/embeddings.py b/mammoth/modules/embeddings.py index 35f61c57..dbd16ab1 100644 --- a/mammoth/modules/embeddings.py +++ b/mammoth/modules/embeddings.py @@ -338,12 +338,12 @@ def convert_to_torch_tensor(word_to_float_list_dict, vocab): return tensor # FIXME: seems it got nuked during the great refactoring of data -# def prepare_pretrained_embeddings(opt, fields): -# if all([opt.both_embeddings is None, opt.src_embeddings is None, opt.tgt_embeddings is None]): +# def prepare_pretrained_embeddings(opts, fields): +# if all([opts.both_embeddings is None, opts.src_embeddings is None, opts.tgt_embeddings is None]): # return # # assert ( -# opt.save_data +# opts.save_data # ), "-save_data is required when using \ # pretrained embeddings." # @@ -356,42 +356,42 @@ def convert_to_torch_tensor(word_to_float_list_dict, vocab): # vocs.append(vocab) # enc_vocab, dec_vocab = vocs # -# skip_lines = 1 if opt.embeddings_type == "word2vec" else 0 -# if opt.both_embeddings is not None: +# skip_lines = 1 if opts.embeddings_type == "word2vec" else 0 +# if opts.both_embeddings is not None: # set_of_src_and_tgt_vocab = set(enc_vocab.stoi.keys()) | set(dec_vocab.stoi.keys()) -# logger.info("Reading encoder and decoder embeddings from {}".format(opt.both_embeddings)) -# src_vectors, total_vec_count = read_embeddings(opt.both_embeddings, skip_lines, set_of_src_and_tgt_vocab) +# logger.info("Reading encoder and decoder embeddings from {}".format(opts.both_embeddings)) +# src_vectors, total_vec_count = read_embeddings(opts.both_embeddings, skip_lines, set_of_src_and_tgt_vocab) # tgt_vectors = src_vectors # logger.info("\tFound {} total vectors in file".format(total_vec_count)) # else: -# if opt.src_embeddings is not None: -# logger.info("Reading encoder embeddings from {}".format(opt.src_embeddings)) -# src_vectors, total_vec_count = read_embeddings(opt.src_embeddings, skip_lines, filter_set=enc_vocab.stoi) +# if opts.src_embeddings is not None: +# logger.info("Reading encoder embeddings from {}".format(opts.src_embeddings)) +# src_vectors, total_vec_count = read_embeddings(opts.src_embeddings, skip_lines, filter_set=enc_vocab.stoi) # logger.info("\tFound {} total vectors in file.".format(total_vec_count)) # else: # src_vectors = None -# if opt.tgt_embeddings is not None: -# logger.info("Reading decoder embeddings from {}".format(opt.tgt_embeddings)) -# tgt_vectors, total_vec_count = read_embeddings(opt.tgt_embeddings, skip_lines, filter_set=dec_vocab.stoi) +# if opts.tgt_embeddings is not None: +# logger.info("Reading decoder embeddings from {}".format(opts.tgt_embeddings)) +# tgt_vectors, total_vec_count = read_embeddings(opts.tgt_embeddings, skip_lines, filter_set=dec_vocab.stoi) # logger.info("\tFound {} total vectors in file".format(total_vec_count)) # else: # tgt_vectors = None # logger.info("After filtering to vectors in vocab:") -# if opt.src_embeddings is not None or opt.both_embeddings is not None: +# if opts.src_embeddings is not None or opts.both_embeddings is not None: # logger.info("\t* enc: %d match, %d missing, (%.2f%%)" % calc_vocab_load_stats(enc_vocab, src_vectors)) -# if opt.tgt_embeddings is not None or opt.both_embeddings is not None: +# if opts.tgt_embeddings is not None or opts.both_embeddings is not None: # logger.info("\t* dec: %d match, %d missing, (%.2f%%)" % calc_vocab_load_stats(dec_vocab, tgt_vectors)) # # # Write to file -# enc_output_file = opt.save_data + ".enc_embeddings.pt" -# dec_output_file = opt.save_data + ".dec_embeddings.pt" -# if opt.src_embeddings is not None or opt.both_embeddings is not None: +# enc_output_file = opts.save_data + ".enc_embeddings.pt" +# dec_output_file = opts.save_data + ".dec_embeddings.pt" +# if opts.src_embeddings is not None or opts.both_embeddings is not None: # logger.info("\nSaving encoder embeddings as:\n\t* enc: %s" % enc_output_file) # torch.save(convert_to_torch_tensor(src_vectors, enc_vocab), enc_output_file) -# # set the opt in place -# opt.pre_word_vecs_enc = enc_output_file -# if opt.tgt_embeddings is not None or opt.both_embeddings is not None: +# # set the opts in place +# opts.pre_word_vecs_enc = enc_output_file +# if opts.tgt_embeddings is not None or opts.both_embeddings is not None: # logger.info("\nSaving decoder embeddings as:\n\t* dec: %s" % dec_output_file) # torch.save(convert_to_torch_tensor(tgt_vectors, dec_vocab), dec_output_file) -# # set the opt in place -# opt.pre_word_vecs_dec = dec_output_file +# # set the opts in place +# opts.pre_word_vecs_dec = dec_output_file diff --git a/mammoth/modules/encoder.py b/mammoth/modules/encoder.py index 8d2f0ce7..9e55792b 100644 --- a/mammoth/modules/encoder.py +++ b/mammoth/modules/encoder.py @@ -31,7 +31,7 @@ class EncoderBase(nn.Module): """ @classmethod - def from_opt(cls, opt, embeddings=None): + def from_opts(cls, opts, embeddings=None): raise NotImplementedError def _check_args(self, src, lengths=None, hidden=None): diff --git a/mammoth/modules/layer_stack_decoder.py b/mammoth/modules/layer_stack_decoder.py index 299b46fe..ed824c3f 100644 --- a/mammoth/modules/layer_stack_decoder.py +++ b/mammoth/modules/layer_stack_decoder.py @@ -17,11 +17,11 @@ def __init__(self, embeddings, decoders): self._active: List[str] = [] @classmethod - def from_opt(cls, opt, embeddings, task_queue_manager, is_on_top=False): + def from_opts(cls, opts, embeddings, task_queue_manager, is_on_top=False): """Alternate constructor for use during training.""" decoders = nn.ModuleList() - for layer_stack_index, n_layers in enumerate(opt.dec_layers): - is_on_top = layer_stack_index == len(opt.dec_layers) - 1 + for layer_stack_index, n_layers in enumerate(opts.dec_layers): + is_on_top = layer_stack_index == len(opts.dec_layers) - 1 stacks = nn.ModuleDict() for module_id in task_queue_manager.get_decoders(layer_stack_index): if module_id in stacks: @@ -29,26 +29,26 @@ def from_opt(cls, opt, embeddings, task_queue_manager, is_on_top=False): continue stacks[module_id] = AdaptedTransformerDecoder( n_layers, - opt.rnn_size, - opt.heads, - opt.transformer_ff, - opt.copy_attn, - opt.self_attn_type, - opt.dropout[0] if type(opt.dropout) is list else opt.dropout, + opts.rnn_size, + opts.heads, + opts.transformer_ff, + opts.copy_attn, + opts.self_attn_type, + opts.dropout[0] if type(opts.dropout) is list else opts.dropout, ( - opt.attention_dropout[0] - if type(opt.attention_dropout) is list - else opt.attention_dropout + opts.attention_dropout[0] + if type(opts.attention_dropout) is list + else opts.attention_dropout ), None, # embeddings, - opt.max_relative_positions, - opt.aan_useffn, - opt.full_context_alignment, - opt.alignment_layer, - alignment_heads=opt.alignment_heads, - pos_ffn_activation_fn=opt.pos_ffn_activation_fn, + opts.max_relative_positions, + opts.aan_useffn, + opts.full_context_alignment, + opts.alignment_layer, + alignment_heads=opts.alignment_heads, + pos_ffn_activation_fn=opts.pos_ffn_activation_fn, layer_norm_module=( - nn.LayerNorm(opt.rnn_size, eps=1e-6) if is_on_top + nn.LayerNorm(opts.rnn_size, eps=1e-6) if is_on_top else nn.Identity() ), ) diff --git a/mammoth/modules/layer_stack_encoder.py b/mammoth/modules/layer_stack_encoder.py index 574c67f7..b5a692d8 100644 --- a/mammoth/modules/layer_stack_encoder.py +++ b/mammoth/modules/layer_stack_encoder.py @@ -17,32 +17,32 @@ def __init__(self, embeddings, encoders): self._active: List[str] = [] @classmethod - def from_opt(cls, opt, embeddings, task_queue_manager): + def from_opts(cls, opts, embeddings, task_queue_manager): """Alternate constructor for use during training.""" encoders = nn.ModuleList() - for layer_stack_index, n_layers in enumerate(opt.enc_layers): + for layer_stack_index, n_layers in enumerate(opts.enc_layers): stacks = nn.ModuleDict() - is_on_top = layer_stack_index == len(opt.enc_layers) - 1 + is_on_top = layer_stack_index == len(opts.enc_layers) - 1 for module_id in task_queue_manager.get_encoders(layer_stack_index): if module_id in stacks: # several tasks using the same layer stack continue stacks[module_id] = AdaptedTransformerEncoder( n_layers, - opt.rnn_size, - opt.heads, - opt.transformer_ff, - opt.dropout[0] if type(opt.dropout) is list else opt.dropout, + opts.rnn_size, + opts.heads, + opts.transformer_ff, + opts.dropout[0] if type(opts.dropout) is list else opts.dropout, ( - opt.attention_dropout[0] - if type(opt.attention_dropout) is list - else opt.attention_dropout + opts.attention_dropout[0] + if type(opts.attention_dropout) is list + else opts.attention_dropout ), None, # embeddings, - opt.max_relative_positions, - pos_ffn_activation_fn=opt.pos_ffn_activation_fn, + opts.max_relative_positions, + pos_ffn_activation_fn=opts.pos_ffn_activation_fn, layer_norm_module=( - nn.LayerNorm(opt.rnn_size, eps=1e-6) if is_on_top + nn.LayerNorm(opts.rnn_size, eps=1e-6) if is_on_top else nn.Identity() ) ) diff --git a/mammoth/modules/mean_encoder.py b/mammoth/modules/mean_encoder.py index 3e94a4c5..943a099e 100644 --- a/mammoth/modules/mean_encoder.py +++ b/mammoth/modules/mean_encoder.py @@ -18,9 +18,9 @@ def __init__(self, num_layers, embeddings): self.embeddings = embeddings @classmethod - def from_opt(cls, opt, embeddings): + def from_opts(cls, opts, embeddings): """Alternate constructor.""" - return cls(opt.enc_layers, embeddings) + return cls(opts.enc_layers, embeddings) def forward(self, src, lengths=None): """See :func:`EncoderBase.forward()`""" diff --git a/mammoth/modules/transformer_decoder.py b/mammoth/modules/transformer_decoder.py index daf3ac98..b78330c4 100644 --- a/mammoth/modules/transformer_decoder.py +++ b/mammoth/modules/transformer_decoder.py @@ -283,26 +283,26 @@ def __init__(self, d_model, copy_attn, embeddings, alignment_layer, layer_norm_m self.alignment_layer = alignment_layer @classmethod - def from_opt(cls, opt, embeddings, is_on_top=False): + def from_opts(cls, opts, embeddings, is_on_top=False): """Alternate constructor.""" return cls( - opt.dec_layers, - opt.rnn_size, - opt.heads, - opt.transformer_ff, - opt.copy_attn, - opt.self_attn_type, - opt.dropout[0] if type(opt.dropout) is list else opt.dropout, - opt.attention_dropout[0] if type(opt.attention_dropout) is list else opt.attention_dropout, + opts.dec_layers, + opts.rnn_size, + opts.heads, + opts.transformer_ff, + opts.copy_attn, + opts.self_attn_type, + opts.dropout[0] if type(opts.dropout) is list else opts.dropout, + opts.attention_dropout[0] if type(opts.attention_dropout) is list else opts.attention_dropout, embeddings, - opt.max_relative_positions, - opt.aan_useffn, - opt.full_context_alignment, - opt.alignment_layer, - alignment_heads=opt.alignment_heads, - pos_ffn_activation_fn=opt.pos_ffn_activation_fn, + opts.max_relative_positions, + opts.aan_useffn, + opts.full_context_alignment, + opts.alignment_layer, + alignment_heads=opts.alignment_heads, + pos_ffn_activation_fn=opts.pos_ffn_activation_fn, layer_norm_module=( - nn.LayerNorm(opt.rnn_size, eps=1e-6) if is_on_top + nn.LayerNorm(opts.rnn_size, eps=1e-6) if is_on_top else nn.Identity() ), ) diff --git a/mammoth/modules/transformer_encoder.py b/mammoth/modules/transformer_encoder.py index 1cd83b12..db09609a 100644 --- a/mammoth/modules/transformer_encoder.py +++ b/mammoth/modules/transformer_encoder.py @@ -133,20 +133,20 @@ def __init__( self.layer_norm = layer_norm_module @classmethod - def from_opt(cls, opt, embeddings, is_on_top=False): + def from_opts(cls, opts, embeddings, is_on_top=False): """Alternate constructor.""" return cls( - opt.enc_layers, - opt.rnn_size, - opt.heads, - opt.transformer_ff, - opt.dropout[0] if type(opt.dropout) is list else opt.dropout, - opt.attention_dropout[0] if type(opt.attention_dropout) is list else opt.attention_dropout, + opts.enc_layers, + opts.rnn_size, + opts.heads, + opts.transformer_ff, + opts.dropout[0] if type(opts.dropout) is list else opts.dropout, + opts.attention_dropout[0] if type(opts.attention_dropout) is list else opts.attention_dropout, embeddings, - opt.max_relative_positions, - pos_ffn_activation_fn=opt.pos_ffn_activation_fn, + opts.max_relative_positions, + pos_ffn_activation_fn=opts.pos_ffn_activation_fn, layer_norm_module=( - nn.LayerNorm(opt.rnn_size, eps=1e-6) if is_on_top + nn.LayerNorm(opts.rnn_size, eps=1e-6) if is_on_top else nn.Identity() ) ) diff --git a/mammoth/opts.py b/mammoth/opts.py index 27547c09..24aac75f 100644 --- a/mammoth/opts.py +++ b/mammoth/opts.py @@ -93,10 +93,10 @@ def _add_reproducibility_opts(parser): def _add_dynamic_corpus_opts(parser, build_vocab_only=False): """Options related to training corpus, type: a list of dictionary.""" - group = parser.add_argument_group('Data') + group = parser.add_argument_group('Data/Tasks') group.add( - "-data", - "--data", + "-tasks", + "--tasks", required=True, help="List of datasets and their specifications. See examples/*.yaml for further details.", ) @@ -603,7 +603,7 @@ def model_opts(parser): type=str, default="O1", choices=["O0", "O1", "O2", "O3"], - help="For FP16 training, the opt_level to use. See https://nvidia.github.io/apex/amp.html#opt-levels.", + help="For FP16 training, the opt_level to use. See https://nvidia.github.io/apex/amp.html#opts-levels.", ) # attention bridge options @@ -1207,7 +1207,7 @@ def translate_opts(parser, dynamic=False): help="Divide src and tgt (if applicable) into " "smaller multiple src and tgt files, then " "build shards, each shard will have " - "opt.shard_size samples except last shard. " + "opts.shard_size samples except last shard. " "shard_size=0 means no segmentation " "shard_size>0 means segment dataset into multiple shards, " "each shard has shard_size samples", diff --git a/mammoth/tests/test_data_prepare.py b/mammoth/tests/test_data_prepare.py index cf4a1241..d1780982 100644 --- a/mammoth/tests/test_data_prepare.py +++ b/mammoth/tests/test_data_prepare.py @@ -27,11 +27,11 @@ # '-tgt_vocab', 'data/vocab-train.tgt' # ] # -# opt = parser.parse_known_args(default_opts)[0] +# opts = parser.parse_known_args(default_opts)[0] # # Inject some dummy training options that may needed when build fields -# opt.copy_attn = False -# ArgumentParser.validate_prepare_opts(opt) -# return opt +# opts.copy_attn = False +# ArgumentParser.validate_prepare_opts(opts) +# return opts # # # default_opts = get_default_opts() @@ -40,15 +40,15 @@ # class TestData(unittest.TestCase): # def __init__(self, *args, **kwargs): # super(TestData, self).__init__(*args, **kwargs) -# self.opt = default_opts +# self.opts = default_opts # -# def dataset_build(self, opt): +# def dataset_build(self, opts): # try: -# prepare_fields_transforms(opt) +# prepare_fields_transforms(opts) # except SystemExit as err: # print(err) # except IOError as err: -# if opt.skip_empty_level != 'error': +# if opts.skip_empty_level != 'error': # raise err # else: # print(f"Catched IOError: {err}") @@ -56,10 +56,10 @@ # # Remove the generated *pt files. # for pt in glob.glob(SAVE_DATA_PREFIX + '*.pt'): # os.remove(pt) -# if self.opt.save_data: +# if self.opts.save_data: # # Remove the generated data samples # sample_path = os.path.join( -# os.path.dirname(self.opt.save_data), +# os.path.dirname(self.opts.save_data), # CorpusName.SAMPLE) # if os.path.exists(sample_path): # for f in glob.glob(sample_path + '/*'): @@ -78,12 +78,12 @@ # # def test_method(self): # if param_setting: -# opt = copy.deepcopy(self.opt) +# opts = copy.deepcopy(self.opts) # for param, setting in param_setting: -# setattr(opt, param, setting) +# setattr(opts, param, setting) # else: -# opt = self.opt -# getattr(self, methodname)(opt) +# opts = self.opts +# getattr(self, methodname)(opts) # if param_setting: # name = 'test_' + methodname + "_" + "_".join( # str(param_setting).split()) diff --git a/mammoth/tests/test_models.py b/mammoth/tests/test_models.py index 6c1803d7..a879cce4 100644 --- a/mammoth/tests/test_models.py +++ b/mammoth/tests/test_models.py @@ -14,13 +14,13 @@ mammoth.opts._add_train_general_opts(parser) # -data option is required, but not used in this test, so dummy. -opt = parser.parse_known_args(['-data', 'dummy', '-node_rank', '0'])[0] +opts = parser.parse_known_args(['-tasks', 'dummy', '-node_rank', '0'])[0] class TestModel(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestModel, self).__init__(*args, **kwargs) - self.opt = opt + self.opts = opts def get_field(self): return Vocab(None, items=[], tag='dummy', specials=list(DEFAULT_SPECIALS)) @@ -32,77 +32,77 @@ def get_batch(self, source_l=3, bsize=1): test_length = torch.ones(bsize).fill_(source_l).long() return test_src, test_tgt, test_length - def embeddings_forward(self, opt, source_l=3, bsize=1): + def embeddings_forward(self, opts, source_l=3, bsize=1): ''' Tests if the embeddings works as expected args: - opt: set of options + opts: set of options source_l: Length of generated input sentence bsize: Batchsize of generated input ''' word_field = self.get_field() - emb = build_embeddings(opt, word_field) + emb = build_embeddings(opts, word_field) test_src, _, __ = self.get_batch(source_l=source_l, bsize=bsize) - if opt.decoder_type == 'transformer': + if opts.decoder_type == 'transformer': input = torch.cat([test_src, test_src], 0) res = emb(input) - compare_to = torch.zeros(source_l * 2, bsize, opt.src_word_vec_size) + compare_to = torch.zeros(source_l * 2, bsize, opts.src_word_vec_size) else: res = emb(test_src) - compare_to = torch.zeros(source_l, bsize, opt.src_word_vec_size) + compare_to = torch.zeros(source_l, bsize, opts.src_word_vec_size) self.assertEqual(res.size(), compare_to.size()) - def encoder_forward(self, opt, source_l=3, bsize=1): + def encoder_forward(self, opts, source_l=3, bsize=1): ''' Tests if the encoder works as expected args: - opt: set of options + opts: set of options source_l: Length of generated input sentence bsize: Batchsize of generated input ''' word_field = self.get_field() - embeddings = build_embeddings(opt, word_field) - enc = build_encoder(opt, embeddings) + embeddings = build_embeddings(opts, word_field) + enc = build_encoder(opts, embeddings) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) hidden_t, outputs, test_length = enc(test_src, test_length) # Initialize vectors to compare size with - test_hid = torch.zeros(self.opt.enc_layers, bsize, opt.rnn_size) - test_out = torch.zeros(source_l, bsize, opt.rnn_size) + test_hid = torch.zeros(self.opts.enc_layers, bsize, opts.rnn_size) + test_out = torch.zeros(source_l, bsize, opts.rnn_size) # Ensure correct sizes and types self.assertEqual(test_hid.size(), hidden_t[0].size(), hidden_t[1].size()) self.assertEqual(test_out.size(), outputs.size()) self.assertEqual(type(outputs), torch.Tensor) - def nmtmodel_forward(self, opt, source_l=3, bsize=1): + def nmtmodel_forward(self, opts, source_l=3, bsize=1): """ - Creates a nmtmodel with a custom opt function. + Creates a nmtmodel with a custom opts function. Forwards a testbatch and checks output size. Args: - opt: Namespace with options + opts: Namespace with options source_l: length of input sequence bsize: batchsize """ word_field = self.get_field() - embeddings = build_embeddings(opt, word_field) - enc = build_encoder(opt, embeddings) + embeddings = build_embeddings(opts, word_field) + enc = build_encoder(opts, embeddings) - embeddings = build_embeddings(opt, word_field, for_encoder=False) - dec = build_decoder(opt, embeddings) + embeddings = build_embeddings(opts, word_field, for_encoder=False) + dec = build_decoder(opts, embeddings) model = mammoth.models.model.NMTModel(enc, dec) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) outputs, attn = model(test_src, test_tgt, test_length) - outputsize = torch.zeros(source_l - 1, bsize, opt.rnn_size) + outputsize = torch.zeros(source_l - 1, bsize, opts.rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.Tensor) @@ -118,12 +118,12 @@ def _add_test(param_setting, methodname): """ def test_method(self): - opt = copy.deepcopy(self.opt) + opts = copy.deepcopy(self.opts) if param_setting: for param, setting in param_setting: - setattr(opt, param, setting) - ArgumentParser.update_model_opts(opt) - getattr(self, methodname)(opt) + setattr(opts, param, setting) + ArgumentParser.update_model_opts(opts) + getattr(self, methodname)(opts) if param_setting: name = 'test_' + methodname + "_" + "_".join(str(param_setting).split()) @@ -136,7 +136,7 @@ def test_method(self): ''' TEST PARAMETERS ''' -opt.brnn = False +opts.brnn = False # FIXME: Most tests disabled: FoTraNMT only supports Transformer test_embeddings = [ diff --git a/mammoth/tests/test_task_distribution_strategy.py b/mammoth/tests/test_task_distribution_strategy.py index 07b4b3e4..612d9140 100644 --- a/mammoth/tests/test_task_distribution_strategy.py +++ b/mammoth/tests/test_task_distribution_strategy.py @@ -5,7 +5,7 @@ def test_weights_all_zero(): - opt = Namespace(data={ + opts = Namespace(data={ 'a': { 'weight': 0, 'introduce_at_training_step': 0, @@ -20,12 +20,12 @@ def test_weights_all_zero(): }, }) with pytest.raises(ValueError) as exc_info: - WeightedSamplingTaskDistributionStrategy.from_opt(['a', 'b'], opt) + WeightedSamplingTaskDistributionStrategy.from_opts(['a', 'b'], opts) assert 'Can not set "weight" of all corpora on a device to zero' in str(exc_info.value) def test_weights_all_postponed(): - opt = Namespace(data={ + opts = Namespace(data={ 'a': { 'weight': 1, 'introduce_at_training_step': 1, @@ -40,12 +40,12 @@ def test_weights_all_postponed(): }, }) with pytest.raises(ValueError) as exc_info: - WeightedSamplingTaskDistributionStrategy.from_opt(['a', 'b'], opt) + WeightedSamplingTaskDistributionStrategy.from_opts(['a', 'b'], opts) assert 'Can not set "introduce_at_training_step" of all corpora on a device to nonzero' in str(exc_info.value) def test_invalid_curriculum(): - opt = Namespace(data={ + opts = Namespace(data={ # 'a' disabled by weight 'a': { 'weight': 0, @@ -62,12 +62,12 @@ def test_invalid_curriculum(): }, }) with pytest.raises(ValueError) as exc_info: - WeightedSamplingTaskDistributionStrategy.from_opt(['a', 'b'], opt) + WeightedSamplingTaskDistributionStrategy.from_opts(['a', 'b'], opts) assert 'Invalid curriculum' in str(exc_info.value) def test_sampling_task_distribution_strategy(): - opt = Namespace(data={ + opts = Namespace(data={ # 'a' disabled by weight 'a': { 'weight': 0, @@ -89,7 +89,7 @@ def test_sampling_task_distribution_strategy(): 'introduce_at_training_step': 0, }, }) - strategy = WeightedSamplingTaskDistributionStrategy.from_opt(['a', 'b', 'c'], opt) + strategy = WeightedSamplingTaskDistributionStrategy.from_opts(['a', 'b', 'c'], opts) all_samples = [] n_samples = 10 n_batches = 1000 diff --git a/mammoth/tests/test_task_queue_manager.py b/mammoth/tests/test_task_queue_manager.py index e38e44a2..17155080 100644 --- a/mammoth/tests/test_task_queue_manager.py +++ b/mammoth/tests/test_task_queue_manager.py @@ -20,9 +20,9 @@ def test_init_minimal(): 'train_c-d': {'path_src': 'dummy', 'path_tgt': 'dummy', 'src_tgt': 'c-d'}, } } - opt = Namespace(**opt_dict) - world_context = WorldContext.from_opt(opt) - task_queue_manager = TaskQueueManager.from_opt(opt, world_context) + opts = Namespace(**opt_dict) + world_context = WorldContext.from_opts(opts) + task_queue_manager = TaskQueueManager.from_opts(opts, world_context) assert world_context.is_gpu() assert world_context.is_distributed() assert len(task_queue_manager.tasks) == 2 @@ -95,15 +95,15 @@ def create_basic_task_queue_manager(): }, } } - opt = Namespace(**opt_dict) - world_context = WorldContext.from_opt(opt) - task_queue_manager = TaskQueueManager.from_opt(opt, world_context) - return task_queue_manager, opt + opts = Namespace(**opt_dict) + world_context = WorldContext.from_opts(opts) + task_queue_manager = TaskQueueManager.from_opts(opts, world_context) + return task_queue_manager, opts def test_init_basic(): - global_task_queue_manager, opt = create_basic_task_queue_manager() - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opt=opt) + global_task_queue_manager, opts = create_basic_task_queue_manager() + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opts=opts) world_context = task_queue_manager.world_context assert world_context.is_gpu() assert world_context.is_distributed() @@ -129,7 +129,7 @@ def __call__(self, sorted_global_ranks): self.group_idx += 1 return result - global_task_queue_manager, opt = create_basic_task_queue_manager() + global_task_queue_manager, opts = create_basic_task_queue_manager() all_groups = global_task_queue_manager.create_all_distributed_groups(new_group_func=MockGroup()) assert all_groups == { 'src_emb': OrderedDict({ @@ -157,8 +157,8 @@ def __call__(self, sorted_global_ranks): self.group_idx += 1 return result - global_task_queue_manager, opt = create_basic_task_queue_manager() - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opt=opt) + global_task_queue_manager, opts = create_basic_task_queue_manager() + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opts=opts) my_groups = task_queue_manager.get_distributed_groups(new_group_func=MockGroup()) assert my_groups == { 'encoder': OrderedDict({ @@ -196,10 +196,10 @@ def test_cpu_distributed_groups(): }, } } - opt = Namespace(**opt_dict) - world_context = WorldContext.from_opt(opt) - global_task_queue_manager = TaskQueueManager.from_opt(opt, world_context) - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opt=opt) + opts = Namespace(**opt_dict) + world_context = WorldContext.from_opts(opts) + global_task_queue_manager = TaskQueueManager.from_opts(opts, world_context) + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opts=opts) new_group_func = MagicMock().new_group_func my_groups = task_queue_manager.get_distributed_groups(new_group_func=new_group_func) # No groups should be created when running on CPU @@ -248,10 +248,10 @@ def test_distributed_groups_no_encoder_group(): }, } } - opt = Namespace(**opt_dict) - world_context = WorldContext.from_opt(opt) - global_task_queue_manager = TaskQueueManager.from_opt(opt, world_context) - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opt=opt) + opts = Namespace(**opt_dict) + world_context = WorldContext.from_opts(opts) + global_task_queue_manager = TaskQueueManager.from_opts(opts, world_context) + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opts=opts) new_group_func = MagicMock().new_group_func my_groups = task_queue_manager.get_distributed_groups(new_group_func=new_group_func) # No groups should be created: @@ -269,20 +269,20 @@ def test_distributed_groups_no_encoder_group(): # (side, lang): f'{side} {lang}' for (side, lang) in # [('src', 'a'), ('src', 'c'), ('src', 'e'), ('tgt', 'b'), ('tgt', 'd')] # } -# global_task_queue_manager, opt = create_basic_task_queue_manager() -# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opt=opt) +# global_task_queue_manager, opts = create_basic_task_queue_manager() +# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opts=opts) # fields = task_queue_manager.get_fields('src', mock_fields) # assert fields == [('src', 'a', None, 'src a')] # fields = task_queue_manager.get_fields('tgt', mock_fields) # assert fields == [('tgt', 'b', None, 'tgt b')] # -# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opt=opt) +# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opts=opts) # fields = task_queue_manager.get_fields('src', mock_fields) # assert fields == [('src', 'c', None, 'src c'), ('src', 'a', 'x', 'src a')] # fields = task_queue_manager.get_fields('tgt', mock_fields) # assert fields == [('tgt', 'd', None, 'tgt d')] # -# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=1, local_rank=0, opt=opt) +# task_queue_manager = global_task_queue_manager.global_to_local(node_rank=1, local_rank=0, opts=opts) # fields = task_queue_manager.get_fields('src', mock_fields) # assert fields == [('src', 'e', None, 'src e')] # fields = task_queue_manager.get_fields('tgt', mock_fields) @@ -290,8 +290,8 @@ def test_distributed_groups_no_encoder_group(): def test_basic_getters(): - global_task_queue_manager, opt = create_basic_task_queue_manager() - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opt=opt) + global_task_queue_manager, opts = create_basic_task_queue_manager() + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=0, opts=opts) encoders = list(task_queue_manager.get_encoders(0)) assert encoders == ['x'] decoders = list(task_queue_manager.get_decoders(0)) @@ -303,7 +303,7 @@ def test_basic_getters(): generators = list(task_queue_manager.get_generators()) assert generators == ['b'] - task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opt=opt) + task_queue_manager = global_task_queue_manager.global_to_local(node_rank=0, local_rank=1, opts=opts) encoders = list(task_queue_manager.get_encoders(0)) assert encoders == ['xx', 'x'] decoders = list(task_queue_manager.get_decoders(0)) diff --git a/mammoth/tests/test_transform.py b/mammoth/tests/test_transform.py index 326d74a1..e53ec3e5 100644 --- a/mammoth/tests/test_transform.py +++ b/mammoth/tests/test_transform.py @@ -31,13 +31,13 @@ def test_transform_register(self): def test_vocab_required_transform(self): transforms_cls = get_transforms_cls(["denoising", "switchout"]) - opt = Namespace(seed=-1, switchout_temperature=1.0) + opts = Namespace(seed=-1, switchout_temperature=1.0) # transforms that require vocab will not create if not provide vocab - transforms = make_transforms(opt, transforms_cls, vocabs=None, task=None) + transforms = make_transforms(opts, transforms_cls, vocabs=None, task=None) self.assertEqual(len(transforms), 0) with self.assertRaises(ValueError): - transforms_cls["switchout"](opt).warm_up(vocabs=None) - transforms_cls["denoising"](opt).warm_up(vocabs=None) + transforms_cls["switchout"](opts).warm_up(vocabs=None) + transforms_cls["denoising"](opts).warm_up(vocabs=None) def test_transform_specials(self): transforms_cls = get_transforms_cls(["prefix"]) @@ -52,8 +52,8 @@ def test_transform_specials(self): tgt_prefix: "⦅_pf_tgt⦆" """ ) - opt = Namespace(data=corpora) - specials = get_specials(opt, transforms_cls) + opts = Namespace(tasks=corpora) + specials = get_specials(opts, transforms_cls) specials_expected = {"src": {"⦅_pf_src⦆"}, "tgt": {"⦅_pf_tgt⦆"}} self.assertEqual(specials, specials_expected) @@ -71,13 +71,13 @@ def test_transform_pipe(self): tgt_prefix: "⦅_pf_tgt⦆" """ ) - opt = Namespace(data=corpora, seed=-1) - prefix_transform = prefix_cls(opt) + opts = Namespace(tasks=corpora, seed=-1) + prefix_transform = prefix_cls(opts) prefix_transform.warm_up() # 2. Init second transform in the pipe filter_cls = get_transforms_cls(["filtertoolong"])["filtertoolong"] - opt = Namespace(src_seq_length=4, tgt_seq_length=4) - filter_transform = filter_cls(opt) + opts = Namespace(src_seq_length=4, tgt_seq_length=4) + filter_transform = filter_cls(opts) # 3. Sequential combine them into a transform pipe transform_pipe = TransformPipe.build_from([prefix_transform, filter_transform]) ex = { @@ -110,8 +110,8 @@ def test_prefix(self): tgt_prefix: "⦅_pf_tgt⦆" """ ) - opt = Namespace(data=corpora, seed=-1) - prefix_transform = prefix_cls(opt) + opts = Namespace(tasks=corpora, seed=-1) + prefix_transform = prefix_cls(opts) prefix_transform.warm_up() self.assertIn("trainset", prefix_transform.prefix_dict) @@ -128,8 +128,8 @@ def test_prefix(self): def test_filter_too_long(self): filter_cls = get_transforms_cls(["filtertoolong"])["filtertoolong"] - opt = Namespace(src_seq_length=100, tgt_seq_length=100) - filter_transform = filter_cls(opt) + opts = Namespace(src_seq_length=100, tgt_seq_length=100) + filter_transform = filter_cls(opts) # filter_transform.warm_up() ex_in = { "src": ["Hello", "world", "."], @@ -162,9 +162,9 @@ def setUpClass(cls): def test_bpe(self): bpe_cls = get_transforms_cls(["bpe"])["bpe"] - opt = Namespace(**self.base_opts) - bpe_cls._validate_options(opt) - bpe_transform = bpe_cls(opt) + opts = Namespace(**self.base_opts) + bpe_cls._validate_options(opts) + bpe_transform = bpe_cls(opts) bpe_transform.warm_up() ex = { "src": ["Hello", "world", "."], @@ -212,9 +212,9 @@ def test_sentencepiece(self): base_opt = copy.copy(self.base_opts) base_opt["src_subword_model"] = "data/sample.sp.model" base_opt["tgt_subword_model"] = "data/sample.sp.model" - opt = Namespace(**base_opt) - sp_cls._validate_options(opt) - sp_transform = sp_cls(opt) + opts = Namespace(**base_opt) + sp_cls._validate_options(opts) + sp_transform = sp_cls(opts) sp_transform.warm_up() ex = { "src": ["Hello", "world", "."], @@ -245,9 +245,9 @@ def test_pyonmttok_bpe(self): onmt_args = "{'mode': 'space', 'joiner_annotate': True}" base_opt["src_onmttok_kwargs"] = onmt_args base_opt["tgt_onmttok_kwargs"] = onmt_args - opt = Namespace(**base_opt) - onmttok_cls._validate_options(opt) - onmttok_transform = onmttok_cls(opt) + opts = Namespace(**base_opt) + onmttok_cls._validate_options(opts) + onmttok_transform = onmttok_cls(opts) onmttok_transform.warm_up() ex = { "src": ["Hello", "world", "."], @@ -270,9 +270,9 @@ def test_pyonmttok_sp(self): onmt_args = "{'mode': 'none', 'spacer_annotate': True}" base_opt["src_onmttok_kwargs"] = onmt_args base_opt["tgt_onmttok_kwargs"] = onmt_args - opt = Namespace(**base_opt) - onmttok_cls._validate_options(opt) - onmttok_transform = onmttok_cls(opt) + opts = Namespace(**base_opt) + onmttok_cls._validate_options(opts) + onmttok_transform = onmttok_cls(opts) onmttok_transform.warm_up() ex = { "src": ["Hello", "world", "."], @@ -289,8 +289,8 @@ def test_pyonmttok_sp(self): class TestSamplingTransform(unittest.TestCase): def test_tokendrop(self): tokendrop_cls = get_transforms_cls(["tokendrop"])["tokendrop"] - opt = Namespace(seed=3434, tokendrop_temperature=0.1) - tokendrop_transform = tokendrop_cls(opt) + opts = Namespace(seed=3434, tokendrop_temperature=0.1) + tokendrop_transform = tokendrop_cls(opts) tokendrop_transform.warm_up() ex = { "src": ["Hello", ",", "world", "."], @@ -305,8 +305,8 @@ def test_tokendrop(self): def test_tokenmask(self): tokenmask_cls = get_transforms_cls(["tokenmask"])["tokenmask"] - opt = Namespace(seed=3434, tokenmask_temperature=0.1) - tokenmask_transform = tokenmask_cls(opt) + opts = Namespace(seed=3434, tokenmask_temperature=0.1) + tokenmask_transform = tokenmask_cls(opts) tokenmask_transform.warm_up() ex = { "src": ["Hello", ",", "world", "."], @@ -321,8 +321,8 @@ def test_tokenmask(self): def test_switchout(self): switchout_cls = get_transforms_cls(["switchout"])["switchout"] - opt = Namespace(seed=3434, switchout_temperature=0.1) - switchout_transform = switchout_cls(opt) + opts = Namespace(seed=3434, switchout_temperature=0.1) + switchout_transform = switchout_cls(opts) with self.assertRaises(ValueError): # require vocabs to warm_up switchout_transform.warm_up(vocabs=None) @@ -518,16 +518,16 @@ def test_span_infilling(self): def test_vocab_required_transform(self): transforms_cls = get_transforms_cls(["denoising"]) - opt = Namespace(random_ratio=1, denoising_objective='mass') + opts = Namespace(random_ratio=1, denoising_objective='mass') with self.assertRaises(ValueError): - make_transforms(opt, transforms_cls, vocabs=None, task=None) + make_transforms(opts, transforms_cls, vocabs=None, task=None) class TestFeaturesTransform(unittest.TestCase): def test_inferfeats(self): inferfeats_cls = get_transforms_cls(["inferfeats"])["inferfeats"] - opt = Namespace(reversible_tokenization="joiner", prior_tokenization=False) - inferfeats_transform = inferfeats_cls(opt) + opts = Namespace(reversible_tokenization="joiner", prior_tokenization=False) + inferfeats_transform = inferfeats_cls(opts) ex_in = { "src": [ diff --git a/mammoth/tests/test_translation_server.py b/mammoth/tests/test_translation_server.py index 5dac46f9..c639f3b5 100644 --- a/mammoth/tests/test_translation_server.py +++ b/mammoth/tests/test_translation_server.py @@ -16,9 +16,9 @@ class TestServerModel(unittest.TestCase): @unittest.skip('Broken in FoTraNMT') # FIXME def test_deferred_loading_model_and_unload(self): model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=False) + sm = ServerModel(opts, model_id, model_root=model_root, load=False) self.assertFalse(sm.loaded) sm.load() self.assertTrue(sm.loaded) @@ -29,9 +29,9 @@ def test_deferred_loading_model_and_unload(self): @unittest.skip('Broken in FoTraNMT') # FIXME def test_load_model_on_init_and_unload(self): model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) self.assertTrue(sm.loaded) self.assertIsInstance(sm.translator, Translator) sm.unload() @@ -40,18 +40,18 @@ def test_load_model_on_init_and_unload(self): @unittest.skip('Broken in FoTraNMT') # FIXME def test_tokenizing_with_no_tokenizer_fails(self): model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) with self.assertRaises(ValueError): sm.tokenize("hello world") @unittest.skip('Broken in FoTraNMT') # FIXME def test_detokenizing_with_no_tokenizer_fails(self): model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) with self.assertRaises(ValueError): sm.detokenize("hello world") @@ -60,9 +60,9 @@ def test_detokenizing_with_no_tokenizer_fails(self): def test_moving_to_gpu_and_back(self): torch.cuda.set_device(torch.device("cuda", 0)) model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) for p in sm.translator.model.parameters(): self.assertEqual(p.device.type, "cpu") sm.to_gpu() @@ -76,9 +76,9 @@ def test_moving_to_gpu_and_back(self): def test_initialize_on_gpu_and_move_back(self): torch.cuda.set_device(torch.device("cuda", 0)) model_id = 0 - opt = {"models": ["test_model.pt"], "gpu": 0} + opts = {"models": ["test_model.pt"], "gpu": 0} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) for p in sm.translator.model.parameters(): self.assertEqual(p.device.type, "cuda") self.assertEqual(p.device.index, 0) @@ -95,9 +95,9 @@ def test_initialize_on_gpu_and_move_back(self): def test_initialize_on_nonzero_gpu_and_back(self): torch.cuda.set_device(torch.device("cuda", 1)) model_id = 0 - opt = {"models": ["test_model.pt"], "gpu": 1} + opts = {"models": ["test_model.pt"], "gpu": 1} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) for p in sm.translator.model.parameters(): self.assertEqual(p.device.type, "cuda") self.assertEqual(p.device.index, 1) @@ -112,9 +112,9 @@ def test_initialize_on_nonzero_gpu_and_back(self): @unittest.skip('Broken in FoTraNMT') # FIXME def test_run(self): model_id = 0 - opt = {"models": ["test_model.pt"]} + opts = {"models": ["test_model.pt"]} model_root = TEST_DIR - sm = ServerModel(opt, model_id, model_root=model_root, load=True) + sm = ServerModel(opts, model_id, model_root=model_root, load=True) inp = [{"src": "hello how are you today"}, {"src": "good morning to you ."}] results, scores, n_best, time, aligns = sm.run(inp) self.assertIsInstance(results, list) @@ -160,7 +160,7 @@ def write(self, cfg): "timeout": -1, "on_timeout": "to_cpu", "load": false, - "opt": { + "opts": { "beam_size": 5 } } @@ -188,7 +188,7 @@ def test_start_without_initial_loading(self): "timeout": -1, "on_timeout": "to_cpu", "load": true, - "opt": { + "opts": { "beam_size": 5 } } @@ -217,7 +217,7 @@ def test_start_with_initial_loading(self): "timeout": -1, "on_timeout": "to_cpu", "load": true, - "opt": { + "opts": { "beam_size": 5 } }, @@ -227,7 +227,7 @@ def test_start_with_initial_loading(self): "timeout": -1, "on_timeout": "to_cpu", "load": false, - "opt": { + "opts": { "beam_size": 5 } } diff --git a/mammoth/train_single.py b/mammoth/train_single.py index d4e4a14b..b32c1fa6 100644 --- a/mammoth/train_single.py +++ b/mammoth/train_single.py @@ -16,40 +16,41 @@ from mammoth.transforms import get_transforms_cls -def configure_process(opt, device_id): +def configure_process(opts, device_id): logger.info("logger set device {} ".format(device_id)) if device_id >= 0: torch.cuda.set_device(device_id) - set_random_seed(opt.seed, device_id >= 0) + set_random_seed(opts.seed, device_id >= 0) -def _get_model_opts(opt, checkpoint=None): +def _get_model_opts(opts, checkpoint=None): """Get `model_opt` to build model, may load from `checkpoint` if any.""" if checkpoint is not None: - model_opt = ArgumentParser.ckpt_model_opts(checkpoint["opt"]) + model_opt = ArgumentParser.ckpt_model_opts(checkpoint["opts"]) ArgumentParser.update_model_opts(model_opt) ArgumentParser.validate_model_opts(model_opt) - if opt.tensorboard_log_dir == model_opt.tensorboard_log_dir and hasattr(model_opt, 'tensorboard_log_dir_dated'): + if opts.tensorboard_log_dir == model_opt.tensorboard_log_dir and \ + hasattr(model_opt, 'tensorboard_log_dir_dated'): # ensure tensorboard output is written in the directory # of previous checkpoints - opt.tensorboard_log_dir_dated = model_opt.tensorboard_log_dir_dated + opts.tensorboard_log_dir_dated = model_opt.tensorboard_log_dir_dated # Override checkpoint's update_embeddings as it defaults to false - model_opt.update_vocab = opt.update_vocab + model_opt.update_vocab = opts.update_vocab else: - model_opt = opt + model_opt = opts return model_opt -def _build_valid_iter(opt, vocabs_dict, transforms_cls, task_queue_manager): +def _build_valid_iter(opts, vocabs_dict, transforms_cls, task_queue_manager): """Build iterator used for validation.""" - if not any(opt.data[corpus_id].get('path_valid_src', False) for corpus_id in opt.data.keys()): + if not any(opts.tasks[corpus_id].get('path_valid_src', False) for corpus_id in opts.tasks.keys()): return None logger.info("creating validation iterator") valid_iter = DynamicDatasetIter.from_opts( task_queue_manager=task_queue_manager, transforms_cls=transforms_cls, vocabs_dict=vocabs_dict, - opts=opt, + opts=opts, is_train=False, ) return valid_iter @@ -107,7 +108,7 @@ def init_distributed(model, task_queue_manager): def main( - opt, + opts, vocabs_dict, device_context, error_queue=None, @@ -116,26 +117,26 @@ def main( task_queue_manager=None, ): """Start training on `device_id`.""" - # NOTE: It's important that ``opt`` has been validated and updated + # NOTE: It's important that ``opts`` has been validated and updated # at this point. # N.B: task_queue_manager is already local - init_logger(opt.log_file, gpu_id=device_context.id) + init_logger(opts.log_file, gpu_id=device_context.id) if device_context.is_distributed(): sleep_s = device_context.local_rank * 3 logger.warning(f'sleeping {sleep_s}s to alleviate ROCm deadlock') time.sleep(sleep_s) - configure_process(opt, device_context.local_rank) + configure_process(opts, device_context.local_rank) gpu_rank_t = torch.distributed.get_rank() logger.info("RANK GPU FROM TORCH %s", str(gpu_rank_t)) - transforms_cls = get_transforms_cls(opt._all_transform) + transforms_cls = get_transforms_cls(opts._all_transform) checkpoint = None - model_opt = _get_model_opts(opt, checkpoint=checkpoint) + model_opt = _get_model_opts(opts, checkpoint=checkpoint) # Build model. - model, generators_md = build_model(model_opt, opt, vocabs_dict, task_queue_manager, checkpoint) + model, generators_md = build_model(model_opt, opts, vocabs_dict, task_queue_manager, checkpoint) logger.info("{} - Init model".format(device_context.id)) if device_context.is_distributed(): @@ -149,19 +150,19 @@ def main( # Build optimizer. logger.info("{} - Build optimizer".format(device_context.id)) - optim = Optimizer.from_opt( + optim = Optimizer.from_opts( model, - opt, + opts, task_queue_manager=task_queue_manager, checkpoint=checkpoint, ) # Build model saver - model_saver = build_model_saver(model_opt, opt, model, vocabs_dict, optim, device_context) + model_saver = build_model_saver(model_opt, opts, model, vocabs_dict, optim, device_context) logger.info("{} - Build trainer".format(device_context.id)) trainer = build_trainer( - opt, + opts, device_context, model, vocabs_dict, @@ -177,7 +178,7 @@ def main( task_queue_manager=task_queue_manager, transforms_cls=transforms_cls, vocabs_dict=vocabs_dict, - opts=opt, + opts=opts, is_train=True, ) # TODO: check that IterOnDevice is unnecessary here; corpora should be already on device @@ -198,15 +199,15 @@ def _train_iter(): train_iter = _train_iter() # train_iter = iter_on_device(train_iter, device_context) logger.info("Device {} - Valid iter".format(device_context.id)) - valid_iter = _build_valid_iter(opt, vocabs_dict, transforms_cls, task_queue_manager) + valid_iter = _build_valid_iter(opts, vocabs_dict, transforms_cls, task_queue_manager) - if len(opt.gpu_ranks): + if len(opts.gpu_ranks): if device_context.is_master(): - logger.info('Starting training on GPU: %s' % opt.gpu_ranks) + logger.info('Starting training on GPU: %s' % opts.gpu_ranks) else: logger.info('Starting training on CPU, could be very slow') - train_steps = opt.train_steps - if opt.single_pass and train_steps > 0: + train_steps = opts.train_steps + if opts.single_pass and train_steps > 0: if device_context.is_master(): logger.warning("Option single_pass is enabled, ignoring train_steps.") train_steps = 0 @@ -214,9 +215,9 @@ def _train_iter(): trainer.train( train_iter, train_steps, - save_checkpoint_steps=opt.save_checkpoint_steps, + save_checkpoint_steps=opts.save_checkpoint_steps, valid_iter=valid_iter, - valid_steps=opt.valid_steps, + valid_steps=opts.valid_steps, device_context=device_context, ) diff --git a/mammoth/trainer.py b/mammoth/trainer.py index c8f99a89..4f6527e9 100644 --- a/mammoth/trainer.py +++ b/mammoth/trainer.py @@ -30,7 +30,7 @@ def iter_on_device(iterator, device_context): def build_trainer( - opt, + opts, device_context, model, vocabs_dict, @@ -40,10 +40,10 @@ def build_trainer( generators_md=None, ): """ - Simplify `Trainer` creation based on user `opt`s* + Simplify `Trainer` creation based on user `opts`s* Args: - opt (:obj:`Namespace`): user options (usually from argument parsing) + opts (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`mammoth.models.NMTModel`): the model to train vocabs_dict (dict): dict of vocabs optim (:obj:`mammoth.utils.Optimizer`): optimizer used during training @@ -61,31 +61,31 @@ def build_trainer( generator = generators_md[f'generator_{lang}'] train_loss_md.add_module( f'trainloss{lang}', - mammoth.utils.loss.build_loss_compute(model, tgt_vocab, opt, train=True, generator=generator), + mammoth.utils.loss.build_loss_compute(model, tgt_vocab, opts, train=True, generator=generator), ) valid_loss_md.add_module( f'valloss{lang}', - mammoth.utils.loss.build_loss_compute(model, tgt_vocab, opt, train=False, generator=generator), + mammoth.utils.loss.build_loss_compute(model, tgt_vocab, opts, train=False, generator=generator), ) - trunc_size = opt.truncated_decoder # Badly named... - shard_size = opt.max_generator_batches if opt.model_dtype == 'fp32' else 0 - norm_method = opt.normalization - accum_count = opt.accum_count - accum_steps = opt.accum_steps - average_decay = opt.average_decay - average_every = opt.average_every - dropout = opt.dropout - dropout_steps = opt.dropout_steps - gpu_verbose_level = opt.gpu_verbose_level + trunc_size = opts.truncated_decoder # Badly named... + shard_size = opts.max_generator_batches if opts.model_dtype == 'fp32' else 0 + norm_method = opts.normalization + accum_count = opts.accum_count + accum_steps = opts.accum_steps + average_decay = opts.average_decay + average_every = opts.average_every + dropout = opts.dropout + dropout_steps = opts.dropout_steps + gpu_verbose_level = opts.gpu_verbose_level earlystopper = ( - mammoth.utils.EarlyStopping(opt.early_stopping, scorers=mammoth.utils.scorers_from_opts(opt)) - if opt.early_stopping > 0 + mammoth.utils.EarlyStopping(opts.early_stopping, scorers=mammoth.utils.scorers_from_opts(opts)) + if opts.early_stopping > 0 else None ) - report_manager = mammoth.utils.build_report_manager(opt, device_context.node_rank, device_context.local_rank) + report_manager = mammoth.utils.build_report_manager(opts, device_context.node_rank, device_context.local_rank) trainer = mammoth.Trainer( model, train_loss_md, @@ -99,16 +99,16 @@ def build_trainer( device_context=device_context, gpu_verbose_level=gpu_verbose_level, report_manager=report_manager, - with_align=True if opt.lambda_align > 0 else False, + with_align=True if opts.lambda_align > 0 else False, model_saver=model_saver, average_decay=average_decay, average_every=average_every, - model_dtype=opt.model_dtype, + model_dtype=opts.model_dtype, earlystopper=earlystopper, dropout=dropout, dropout_steps=dropout_steps, task_queue_manager=task_queue_manager, - report_stats_from_parameters=opt.report_stats_from_parameters, + report_stats_from_parameters=opts.report_stats_from_parameters, ) return trainer diff --git a/mammoth/transforms/misc.py b/mammoth/transforms/misc.py index 8d591c7b..b8c1e8b1 100644 --- a/mammoth/transforms/misc.py +++ b/mammoth/transforms/misc.py @@ -72,7 +72,7 @@ def _get_prefix(corpus): def get_prefix_dict(cls, opts): """Get all needed prefix correspond to corpus in `opts`.""" prefix_dict = {} - for c_name, corpus in opts.data.items(): + for c_name, corpus in opts.tasks.items(): prefix = cls._get_prefix(corpus) if prefix is not None: logger.info(f"Get prefix for {c_name}: {prefix}") diff --git a/mammoth/translate/beam_search.py b/mammoth/translate/beam_search.py index 0a2257f6..c5741367 100644 --- a/mammoth/translate/beam_search.py +++ b/mammoth/translate/beam_search.py @@ -410,8 +410,8 @@ class GNMTGlobalScorer(object): """ @classmethod - def from_opt(cls, opt): - return cls(opt.alpha, opt.beta, opt.length_penalty, opt.coverage_penalty) + def from_opts(cls, opts): + return cls(opts.alpha, opts.beta, opts.length_penalty, opts.coverage_penalty) def __init__(self, alpha, beta, length_penalty, coverage_penalty): self._validate(alpha, beta, length_penalty, coverage_penalty) diff --git a/mammoth/translate/translation_server.py b/mammoth/translate/translation_server.py index 4750a3bd..be2e4043 100644 --- a/mammoth/translate/translation_server.py +++ b/mammoth/translate/translation_server.py @@ -95,7 +95,7 @@ def __init__(self, model_path, ct2_translator_args, ct2_translate_batch_args, ta self.translator.unload_model(to_cpu=True) @staticmethod - def convert_onmt_to_ct2_opts(ct2_translator_args, ct2_translate_batch_args, opt): + def convert_onmt_to_ct2_opts(ct2_translator_args, ct2_translate_batch_args, opts): def setdefault_if_exists_must_match(obj, name, value): if name in obj: assert value == obj[name], ( @@ -115,18 +115,18 @@ def setdefault_if_exists_must_match(obj, name, value): ct2_translator_args.setdefault(name, value) onmt_for_translator = { - "device": "cuda" if opt.cuda else "cpu", - "device_index": opt.gpu if opt.cuda else 0, + "device": "cuda" if opts.cuda else "cpu", + "device_index": opts.gpu if opts.cuda else 0, } for name, value in onmt_for_translator.items(): setdefault_if_exists_must_match(ct2_translator_args, name, value) onmt_for_translate_batch_enforce = { - "beam_size": opt.beam_size, - "max_batch_size": opt.batch_size, - "num_hypotheses": opt.n_best, - "max_decoding_length": opt.max_length, - "min_decoding_length": opt.min_length, + "beam_size": opts.beam_size, + "max_batch_size": opts.batch_size, + "num_hypotheses": opts.n_best, + "max_decoding_length": opts.max_length, + "min_decoding_length": opts.min_length, } for name, value in onmt_for_translate_batch_enforce.items(): setdefault_if_exists_must_match(ct2_translate_batch_args, name, value) @@ -191,32 +191,32 @@ def start(self, config_file): } kwargs = {k: v for (k, v) in kwargs.items() if v is not None} model_id = conf.get("id", None) - opt = conf["opt"] - opt["models"] = conf["models"] - self.preload_model(opt, model_id=model_id, **kwargs) + opts = conf["opts"] + opts["models"] = conf["models"] + self.preload_model(opts, model_id=model_id, **kwargs) - def clone_model(self, model_id, opt, timeout=-1): + def clone_model(self, model_id, opts, timeout=-1): """Clone a model `model_id`. - Different options may be passed. If `opt` is None, it will use the + Different options may be passed. If `opts` is None, it will use the same set of options """ if model_id in self.models: - if opt is None: - opt = self.models[model_id].user_opt - opt["models"] = self.models[model_id].opt.models - return self.load_model(opt, timeout) + if opts is None: + opts = self.models[model_id].user_opt + opts["models"] = self.models[model_id].opts.models + return self.load_model(opts, timeout) else: raise ServerModelError("No such model '%s'" % str(model_id)) - def load_model(self, opt, model_id=None, **model_kwargs): + def load_model(self, opts, model_id=None, **model_kwargs): """Load a model given a set of options""" - model_id = self.preload_model(opt, model_id=model_id, **model_kwargs) + model_id = self.preload_model(opts, model_id=model_id, **model_kwargs) load_time = self.models[model_id].load_time return model_id, load_time - def preload_model(self, opt, model_id=None, **model_kwargs): + def preload_model(self, opts, model_id=None, **model_kwargs): """Preloading the model: updating internal datastructure It will effectively load the model if `load` is set @@ -230,7 +230,7 @@ def preload_model(self, opt, model_id=None, **model_kwargs): model_id += 1 self.next_id = model_id + 1 print("Pre-loading model %d" % model_id) - model = ServerModel(opt, model_id, **model_kwargs) + model = ServerModel(opts, model_id, **model_kwargs) self.models[model_id] = model return model_id @@ -274,7 +274,7 @@ class ServerModel(object): """Wrap a model with server functionality. Args: - opt (dict): Options for the Translator + opts (dict): Options for the Translator model_id (int): Model ID preprocess_opt (list): Options for preprocess processus or None tokenizer_opt (dict): Options for the tokenizer or None @@ -292,7 +292,7 @@ class ServerModel(object): def __init__( self, - opt, + opts, model_id, preprocess_opt=None, tokenizer_opt=None, @@ -307,7 +307,7 @@ def __init__( ct2_translate_batch_args=None, ): self.model_root = model_root - self.opt = self.parse_opt(opt) + self.opts = self.parse_opt(opts) self.custom_opt = custom_opt self.model_id = model_id @@ -322,20 +322,20 @@ def __init__( self.ct2_translate_batch_args = ct2_translate_batch_args self.unload_timer = None - self.user_opt = opt + self.user_opt = opts self.tokenizers = None - if len(self.opt.log_file) > 0: - log_file = os.path.join(model_root, self.opt.log_file) + if len(self.opts.log_file) > 0: + log_file = os.path.join(model_root, self.opts.log_file) else: log_file = None - self.logger = init_logger(log_file=log_file, log_file_level=self.opt.log_file_level, rotate=True) + self.logger = init_logger(log_file=log_file, log_file_level=self.opts.log_file_level, rotate=True) self.loading_lock = threading.Event() self.loading_lock.set() self.running_lock = threading.Semaphore(value=1) - set_random_seed(self.opt.seed, self.opt.cuda) + set_random_seed(self.opts.seed, self.opts.cuda) if self.preprocess_opt is not None: self.logger.info("Loading preprocessor") @@ -370,14 +370,14 @@ def __init__( self.load(preload=True) self.stop_unload_timer() - def parse_opt(self, opt): + def parse_opt(self, opts): """Parse the option set passed by the user using `mammoth.opts` Args: - opt (dict): Options passed by the user + opts (dict): Options passed by the user Returns: - opt (argparse.Namespace): full set of options for the Translator + opts (argparse.Namespace): full set of options for the Translator """ prec_argv = sys.argv @@ -385,13 +385,13 @@ def parse_opt(self, opt): parser = ArgumentParser() mammoth.opts.translate_opts(parser) - models = opt['models'] + models = opts['models'] if not isinstance(models, (list, tuple)): models = [models] - opt['models'] = [os.path.join(self.model_root, model) for model in models] - opt['src'] = "dummy_src" + opts['models'] = [os.path.join(self.model_root, model) for model in models] + opts['src'] = "dummy_src" - for (k, v) in opt.items(): + for (k, v) in opts.items(): if k == 'models': sys.argv += ['-model'] sys.argv += [str(model) for model in v] @@ -400,12 +400,12 @@ def parse_opt(self, opt): else: sys.argv += ['-%s' % k, str(v)] - opt = parser.parse_args() - ArgumentParser.validate_translate_opts(opt) - opt.cuda = opt.gpu > -1 + opts = parser.parse_args() + ArgumentParser.validate_translate_opts(opts) + opts.cuda = opts.gpu > -1 sys.argv = prec_argv - return opt + return opts @property def loaded(self): @@ -421,18 +421,18 @@ def load(self, preload=False): try: if self.ct2_model is not None: CTranslate2Translator.convert_onmt_to_ct2_opts( - self.ct2_translator_args, self.ct2_translate_batch_args, self.opt + self.ct2_translator_args, self.ct2_translate_batch_args, self.opts ) self.translator = CTranslate2Translator( self.ct2_model, ct2_translator_args=self.ct2_translator_args, ct2_translate_batch_args=self.ct2_translate_batch_args, - target_prefix=self.opt.tgt_prefix, + target_prefix=self.opts.tgt_prefix, preload=preload, ) else: self.translator = build_translator( - self.opt, report_score=False, out_file=codecs.open(os.devnull, "w", "utf-8") + self.opts, report_score=False, out_file=codecs.open(os.devnull, "w", "utf-8") ) except RuntimeError as e: raise ServerModelError("Runtime Error: %s" % str(e)) @@ -470,7 +470,7 @@ def run(self, inputs): if not self.loaded: self.load() timer.tick(name="load") - elif self.opt.cuda: + elif self.opts.cuda: self.to_gpu() timer.tick(name="to_gpu") @@ -517,14 +517,14 @@ def run(self, inputs): scores, predictions = self.translator.translate( texts_to_translate, tgt=texts_ref, - batch_size=len(texts_to_translate) if self.opt.batch_size == 0 else self.opt.batch_size, + batch_size=len(texts_to_translate) if self.opts.batch_size == 0 else self.opts.batch_size, ) except (RuntimeError, Exception) as e: err = "Error: %s" % str(e) self.logger.error(err) self.logger.error("repr(text_to_translate): " + repr(texts_to_translate)) self.logger.error("model: #%s" % self.model_id) - self.logger.error("model opt: " + str(self.opt.__dict__)) + self.logger.error("model opts: " + str(self.opts.__dict__)) self.logger.error(traceback.format_exc()) raise ServerModelError(err) @@ -541,7 +541,7 @@ def run(self, inputs): def flatten_list(_list): return sum(_list, []) - tiled_texts = [t for t in texts_to_translate for _ in range(self.opt.n_best)] + tiled_texts = [t for t in texts_to_translate for _ in range(self.opts.n_best)] results = flatten_list(predictions) def maybe_item(x): @@ -556,24 +556,24 @@ def maybe_item(x): # build back results with empty texts for i in empty_indices: - j = i * self.opt.n_best - results = results[:j] + [""] * self.opt.n_best + results[j:] - aligns = aligns[:j] + [None] * self.opt.n_best + aligns[j:] - scores = scores[:j] + [0] * self.opt.n_best + scores[j:] + j = i * self.opts.n_best + results = results[:j] + [""] * self.opts.n_best + results[j:] + aligns = aligns[:j] + [None] * self.opts.n_best + aligns[j:] + scores = scores[:j] + [0] * self.opts.n_best + scores[j:] rebuilt_segs, scores, aligns = self.rebuild_seg_packages( - all_preprocessed, results, scores, aligns, self.opt.n_best + all_preprocessed, results, scores, aligns, self.opts.n_best ) results = [self.maybe_postprocess(seg) for seg in rebuilt_segs] - head_spaces = [h for h in head_spaces for i in range(self.opt.n_best)] - tail_spaces = [h for h in tail_spaces for i in range(self.opt.n_best)] + head_spaces = [h for h in head_spaces for i in range(self.opts.n_best)] + tail_spaces = [h for h in tail_spaces for i in range(self.opts.n_best)] results = ["".join(items) for items in zip(head_spaces, results, tail_spaces)] self.logger.info("Translation Results: %d", len(results)) - return results, scores, self.opt.n_best, timer.times, aligns + return results, scores, self.opts.n_best, timer.times, aligns def rebuild_seg_packages(self, all_preprocessed, results, scores, aligns, n_best): """ @@ -618,7 +618,7 @@ def do_timeout(self): def unload(self): self.logger.info("Unloading model %d" % self.model_id) del self.translator - if self.opt.cuda: + if self.opts.cuda: torch.cuda.empty_cache() self.stop_unload_timer() self.unload_timer = None @@ -639,7 +639,7 @@ def to_dict(self): hide_opt = ["models", "src"] d = { "model_id": self.model_id, - "opt": {k: self.user_opt[k] for k in self.user_opt.keys() if k not in hide_opt}, + "opts": {k: self.user_opt[k] for k in self.user_opt.keys() if k not in hide_opt}, "models": self.user_opt["models"], "loaded": self.loaded, "timeout": self.timeout, @@ -655,7 +655,7 @@ def to_cpu(self): self.translator.to_cpu() else: self.translator.model.cpu() - if self.opt.cuda: + if self.opts.cuda: torch.cuda.empty_cache() def to_gpu(self): @@ -663,7 +663,7 @@ def to_gpu(self): if type(self.translator) == CTranslate2Translator: self.translator.to_gpu() else: - torch.cuda.set_device(self.opt.gpu) + torch.cuda.set_device(self.opts.gpu) self.translator.model.cuda() def maybe_preprocess(self, sequence): @@ -785,7 +785,7 @@ def maybe_detokenize_with_align(self, sequence, src, side='tgt'): sorted or None if no alignment in output. """ align = None - if self.opt.report_align: + if self.opts.report_align: # output contain alignment sequence, align = sequence.split(DefaultTokens.ALIGNMENT_SEPARATOR) if align != '': diff --git a/mammoth/translate/translator.py b/mammoth/translate/translator.py index 0b427f1c..353a718b 100644 --- a/mammoth/translate/translator.py +++ b/mammoth/translate/translator.py @@ -20,46 +20,46 @@ from mammoth.inputters.dataloader import build_dataloader -def build_translator(opt, task, report_score=True, logger=None, out_file=None): +def build_translator(opts, task, report_score=True, logger=None, out_file=None): if out_file is None: - outdir = os.path.dirname(opt.output) + outdir = os.path.dirname(opts.output) if outdir and not os.path.isdir(outdir): # FIXME use warnings instead logger.info('WARNING: output file directory does not exist... creating it.') - os.makedirs(os.path.dirname(opt.output), exist_ok=True) - out_file = codecs.open(opt.output, "w+", "utf-8") + os.makedirs(os.path.dirname(opts.output), exist_ok=True) + out_file = codecs.open(opts.output, "w+", "utf-8") load_test_model = ( - mammoth.modules.decoder_ensemble.load_test_model if len(opt.models) > 3 + mammoth.modules.decoder_ensemble.load_test_model if len(opts.models) > 3 else mammoth.model_builder.load_test_multitask_model ) if logger: logger.info(str(task)) - vocabs, model, model_opt = load_test_model(opt) + vocabs, model, model_opt = load_test_model(opts) - scorer = mammoth.translate.GNMTGlobalScorer.from_opt(opt) + scorer = mammoth.translate.GNMTGlobalScorer.from_opts(opts) if model_opt.model_task == ModelTask.LANGUAGE_MODEL: - translator = GeneratorLM.from_opt( + translator = GeneratorLM.from_opts( model, vocabs, - opt, + opts, model_opt, global_scorer=scorer, out_file=out_file, - report_align=opt.report_align, + report_align=opts.report_align, report_score=report_score, logger=logger, ) else: - translator = Translator.from_opt( + translator = Translator.from_opts( model, vocabs, - opt, + opts, model_opt, global_scorer=scorer, out_file=out_file, - report_align=opt.report_align, + report_align=opts.report_align, report_score=report_score, logger=logger, task=task, @@ -236,11 +236,11 @@ def __init__( set_random_seed(seed, self._use_cuda) @classmethod - def from_opt( + def from_opts( cls, model, vocabs, - opt, + opts, model_opt, global_scorer=None, out_file=None, @@ -255,7 +255,7 @@ def from_opt( model (mammoth.modules.NMTModel): See :func:`__init__()`. vocabs (dict[str, mammoth.inputters.Vocab]): See :func:`__init__()`. - opt (argparse.Namespace): Command line options + opts (argparse.Namespace): Command line options model_opt (argparse.Namespace): Command line options saved with the model checkpoint. global_scorer (mammoth.translate.GNMTGlobalScorer): See @@ -273,35 +273,35 @@ def from_opt( return cls( model, vocabs, - opt.src, - tgt_file_path=opt.tgt, - gpu=opt.gpu, - n_best=opt.n_best, - min_length=opt.min_length, - max_length=opt.max_length, - ratio=opt.ratio, - beam_size=opt.beam_size, - random_sampling_topk=opt.random_sampling_topk, - random_sampling_topp=opt.random_sampling_topp, - random_sampling_temp=opt.random_sampling_temp, - stepwise_penalty=opt.stepwise_penalty, - dump_beam=opt.dump_beam, - block_ngram_repeat=opt.block_ngram_repeat, - ignore_when_blocking=set(opt.ignore_when_blocking), - replace_unk=opt.replace_unk, - ban_unk_token=opt.ban_unk_token, - tgt_prefix=opt.tgt_prefix, - phrase_table=opt.phrase_table, - data_type=opt.data_type, - verbose=opt.verbose, - report_time=opt.report_time, + opts.src, + tgt_file_path=opts.tgt, + gpu=opts.gpu, + n_best=opts.n_best, + min_length=opts.min_length, + max_length=opts.max_length, + ratio=opts.ratio, + beam_size=opts.beam_size, + random_sampling_topk=opts.random_sampling_topk, + random_sampling_topp=opts.random_sampling_topp, + random_sampling_temp=opts.random_sampling_temp, + stepwise_penalty=opts.stepwise_penalty, + dump_beam=opts.dump_beam, + block_ngram_repeat=opts.block_ngram_repeat, + ignore_when_blocking=set(opts.ignore_when_blocking), + replace_unk=opts.replace_unk, + ban_unk_token=opts.ban_unk_token, + tgt_prefix=opts.tgt_prefix, + phrase_table=opts.phrase_table, + data_type=opts.data_type, + verbose=opts.verbose, + report_time=opts.report_time, copy_attn=model_opt.copy_attn, global_scorer=global_scorer, out_file=out_file, report_align=report_align, report_score=report_score, logger=logger, - seed=opt.seed, + seed=opts.seed, task=task, ) diff --git a/mammoth/utils/earlystopping.py b/mammoth/utils/earlystopping.py index 23d8c29d..6d20c60f 100644 --- a/mammoth/utils/earlystopping.py +++ b/mammoth/utils/earlystopping.py @@ -63,12 +63,12 @@ def _caller(self, stats): SCORER_BUILDER = {"ppl": PPLScorer, "accuracy": AccuracyScorer} -def scorers_from_opts(opt): - if opt.early_stopping_criteria is None: +def scorers_from_opts(opts): + if opts.early_stopping_criteria is None: return DEFAULT_SCORERS else: scorers = [] - for criterion in set(opt.early_stopping_criteria): + for criterion in set(opts.early_stopping_criteria): assert criterion in SCORER_BUILDER.keys(), "Criterion {} not found".format(criterion) scorers.append(SCORER_BUILDER[criterion]()) return scorers diff --git a/mammoth/utils/loss.py b/mammoth/utils/loss.py index 751ca814..5061325d 100644 --- a/mammoth/utils/loss.py +++ b/mammoth/utils/loss.py @@ -10,7 +10,7 @@ from mammoth.constants import ModelTask, DefaultTokens -def build_loss_compute(model, tgt_vocab, opt, train=True, generator=None): +def build_loss_compute(model, tgt_vocab, opts, train=True, generator=None): """ Returns a LossCompute subclass which wraps around an nn.Module subclass (such as nn.NLLLoss) which defines the loss criterion. The LossCompute @@ -19,20 +19,20 @@ def build_loss_compute(model, tgt_vocab, opt, train=True, generator=None): Currently, the NMTLossCompute class handles all loss computation except for when using a copy mechanism. """ - device = torch.device("cuda" if mammoth.utils.misc.use_gpu(opt) else "cpu") + device = torch.device("cuda" if mammoth.utils.misc.use_gpu(opts) else "cpu") padding_idx = tgt_vocab.stoi[DefaultTokens.PAD] unk_idx = tgt_vocab.stoi[DefaultTokens.UNK] - if opt.lambda_coverage != 0: - assert opt.coverage_attn, "--coverage_attn needs to be set in order to use --lambda_coverage != 0" + if opts.lambda_coverage != 0: + assert opts.coverage_attn, "--coverage_attn needs to be set in order to use --lambda_coverage != 0" - if opt.copy_attn: + if opts.copy_attn: criterion = mammoth.modules.CopyGeneratorLoss( - len(tgt_vocab), opt.copy_attn_force, unk_index=unk_idx, ignore_index=padding_idx + len(tgt_vocab), opts.copy_attn_force, unk_index=unk_idx, ignore_index=padding_idx ) - elif opt.label_smoothing > 0 and train: - criterion = LabelSmoothingLoss(opt.label_smoothing, len(tgt_vocab), ignore_index=padding_idx) + elif opts.label_smoothing > 0 and train: + criterion = LabelSmoothingLoss(opts.label_smoothing, len(tgt_vocab), ignore_index=padding_idx) else: criterion = nn.NLLLoss(ignore_index=padding_idx, reduction='sum') @@ -44,35 +44,35 @@ def build_loss_compute(model, tgt_vocab, opt, train=True, generator=None): loss_gen = ( generator[0] if use_raw_logits else generator ) # loss_gen = model.generator[0] if use_raw_logits else model.generator - if opt.copy_attn: - if opt.model_task == ModelTask.SEQ2SEQ: + if opts.copy_attn: + if opts.model_task == ModelTask.SEQ2SEQ: compute = mammoth.modules.CopyGeneratorLossCompute( - criterion, loss_gen, tgt_vocab, opt.copy_loss_by_seqlength, lambda_coverage=opt.lambda_coverage + criterion, loss_gen, tgt_vocab, opts.copy_loss_by_seqlength, lambda_coverage=opts.lambda_coverage ) - elif opt.model_task == ModelTask.LANGUAGE_MODEL: + elif opts.model_task == ModelTask.LANGUAGE_MODEL: compute = mammoth.modules.CopyGeneratorLMLossCompute( - criterion, loss_gen, tgt_vocab, opt.copy_loss_by_seqlength, lambda_coverage=opt.lambda_coverage + criterion, loss_gen, tgt_vocab, opts.copy_loss_by_seqlength, lambda_coverage=opts.lambda_coverage ) else: - raise ValueError(f"No copy generator loss defined for task {opt.model_task}") + raise ValueError(f"No copy generator loss defined for task {opts.model_task}") else: - if opt.model_task == ModelTask.SEQ2SEQ: + if opts.model_task == ModelTask.SEQ2SEQ: compute = NMTLossCompute( criterion, loss_gen, - lambda_coverage=opt.lambda_coverage, - lambda_align=opt.lambda_align, + lambda_coverage=opts.lambda_coverage, + lambda_align=opts.lambda_align, ) - elif opt.model_task == ModelTask.LANGUAGE_MODEL: - assert opt.lambda_align == 0.0, "lamdba_align not supported in LM loss" + elif opts.model_task == ModelTask.LANGUAGE_MODEL: + assert opts.lambda_align == 0.0, "lamdba_align not supported in LM loss" compute = LMLossCompute( criterion, loss_gen, - lambda_coverage=opt.lambda_coverage, - lambda_align=opt.lambda_align, + lambda_coverage=opts.lambda_coverage, + lambda_align=opts.lambda_align, ) else: - raise ValueError(f"No compute loss defined for task {opt.model_task}") + raise ValueError(f"No compute loss defined for task {opts.model_task}") compute.to(device) return compute diff --git a/mammoth/utils/misc.py b/mammoth/utils/misc.py index 36cd5b82..280932f6 100644 --- a/mammoth/utils/misc.py +++ b/mammoth/utils/misc.py @@ -79,11 +79,11 @@ def tile(x, count, dim=0): return x -def use_gpu(opt): +def use_gpu(opts): """ Creates a boolean if gpu used """ - return (hasattr(opt, 'gpu_ranks') and len(opt.gpu_ranks) > 0) or (hasattr(opt, 'gpu') and opt.gpu > -1) + return (hasattr(opts, 'gpu_ranks') and len(opts.gpu_ranks) > 0) or (hasattr(opts, 'gpu') and opts.gpu > -1) def set_random_seed(seed, is_cuda): diff --git a/mammoth/utils/module_splitter.py b/mammoth/utils/module_splitter.py index 738037be..6e190a3a 100644 --- a/mammoth/utils/module_splitter.py +++ b/mammoth/utils/module_splitter.py @@ -62,7 +62,7 @@ def explode_model(full_ab_model): # stuff necessary to build bilingual models combining modules model_frame = { "vocab": full_ab_model["vocab"], - "opt": full_ab_model["opt"], + "opts": full_ab_model["opts"], "optim": full_ab_model["optim"], } diff --git a/mammoth/utils/optimizers.py b/mammoth/utils/optimizers.py index d50b2a54..692fdcb4 100644 --- a/mammoth/utils/optimizers.py +++ b/mammoth/utils/optimizers.py @@ -60,7 +60,7 @@ def attention_bridge_optimizer(model, task_queue_manager, base_optimizer): return optimizer -def build_torch_optimizer(model, opt, task_queue_manager): +def build_torch_optimizer(model, opts, task_queue_manager): """Builds the PyTorch optimizer. We use the default parameters for Adam that are suggested by @@ -76,87 +76,91 @@ def build_torch_optimizer(model, opt, task_queue_manager): Args: model: The model to optimize. - opt. The dictionary of options. + opts. The dictionary of options. Returns: A ``torch.optim.Optimizer`` instance. """ params = [p for p in model.parameters() if p.requires_grad] - betas = [opt.adam_beta1, opt.adam_beta2] - if opt.optim == 'sgd': - optimizer = optim.SGD(params, lr=opt.learning_rate) - elif opt.optim == 'adagrad': - optimizer = optim.Adagrad(params, lr=opt.learning_rate, initial_accumulator_value=opt.adagrad_accumulator_init) - elif opt.optim == 'adadelta': - optimizer = optim.Adadelta(params, lr=opt.learning_rate) - elif opt.optim == 'adafactor': + betas = [opts.adam_beta1, opts.adam_beta2] + if opts.optim == 'sgd': + optimizer = optim.SGD(params, lr=opts.learning_rate) + elif opts.optim == 'adagrad': + optimizer = optim.Adagrad( + params, + lr=opts.learning_rate, + initial_accumulator_value=opts.adagrad_accumulator_init, + ) + elif opts.optim == 'adadelta': + optimizer = optim.Adadelta(params, lr=opts.learning_rate) + elif opts.optim == 'adafactor': optimizer = attention_bridge_optimizer( model, task_queue_manager, - lambda params: AdaFactorFairSeq(params, weight_decay=opt.weight_decay), + lambda params: AdaFactorFairSeq(params, weight_decay=opts.weight_decay), ) - elif opt.optim == 'adam': + elif opts.optim == 'adam': optimizer = attention_bridge_optimizer( model, task_queue_manager, lambda params: optim.Adam( - params, lr=opt.learning_rate, betas=betas, eps=1e-9, weight_decay=opt.weight_decay + params, lr=opts.learning_rate, betas=betas, eps=1e-9, weight_decay=opts.weight_decay ) ) - elif opt.optim == 'adamw': + elif opts.optim == 'adamw': optimizer = attention_bridge_optimizer( model, task_queue_manager, lambda params: optim.AdamW( - params, lr=opt.learning_rate, betas=betas, eps=1e-9, weight_decay=opt.weight_decay + params, lr=opts.learning_rate, betas=betas, eps=1e-9, weight_decay=opts.weight_decay ) ) - elif opt.optim == 'fusedadam': + elif opts.optim == 'fusedadam': # we use here a FusedAdam() copy of an old Apex repo - optimizer = FusedAdam(params, lr=opt.learning_rate, betas=betas) - if opt.model_dtype == 'fp16': + optimizer = FusedAdam(params, lr=opts.learning_rate, betas=betas) + if opts.model_dtype == 'fp16': import apex # In this case use the old FusedAdam with FP16_optimizer wrapper - static_loss_scale = opt.loss_scale - dynamic_loss_scale = opt.loss_scale == 0 + static_loss_scale = opts.loss_scale + dynamic_loss_scale = opts.loss_scale == 0 optimizer = apex.contrib.optimizers.FP16_Optimizer( optimizer, static_loss_scale=static_loss_scale, dynamic_loss_scale=dynamic_loss_scale ) else: - raise ValueError('Invalid optimizer type: ' + opt.optim) + raise ValueError('Invalid optimizer type: ' + opts.optim) return optimizer -def make_learning_rate_decay_fn(opt): +def make_learning_rate_decay_fn(opts): """Returns the learning decay function from options.""" - if opt.decay_method == 'noam': - return functools.partial(noam_decay, warmup_steps=opt.warmup_steps, model_size=opt.rnn_size) - elif opt.decay_method == 'noamwd': + if opts.decay_method == 'noam': + return functools.partial(noam_decay, warmup_steps=opts.warmup_steps, model_size=opts.rnn_size) + elif opts.decay_method == 'noamwd': return functools.partial( noamwd_decay, - warmup_steps=opt.warmup_steps, - model_size=opt.rnn_size, - rate=opt.learning_rate_decay, - decay_steps=opt.decay_steps, - start_step=opt.start_decay_steps, + warmup_steps=opts.warmup_steps, + model_size=opts.rnn_size, + rate=opts.learning_rate_decay, + decay_steps=opts.decay_steps, + start_step=opts.start_decay_steps, ) - elif opt.decay_method == 'rsqrt': - return functools.partial(rsqrt_decay, warmup_steps=opt.warmup_steps) - elif opt.decay_method == 'linear_warmup': + elif opts.decay_method == 'rsqrt': + return functools.partial(rsqrt_decay, warmup_steps=opts.warmup_steps) + elif opts.decay_method == 'linear_warmup': return functools.partial( linear_warmup_decay, - warmup_steps=opt.warmup_steps, - rate=opt.learning_rate, - train_steps=opt.train_steps, + warmup_steps=opts.warmup_steps, + rate=opts.learning_rate, + train_steps=opts.train_steps, ) - elif opt.start_decay_steps is not None: + elif opts.start_decay_steps is not None: return functools.partial( exponential_decay, - rate=opt.learning_rate_decay, - decay_steps=opt.decay_steps, - start_step=opt.start_decay_steps, + rate=opts.learning_rate_decay, + decay_steps=opts.decay_steps, + start_step=opts.start_decay_steps, ) @@ -275,24 +279,24 @@ def __init__(self, optimizer, learning_rate, learning_rate_decay_fn=None, max_gr self._scaler = None @classmethod - def from_opt(cls, model, opt, task_queue_manager, checkpoint=None): + def from_opts(cls, model, opts, task_queue_manager, checkpoint=None): """Builds the optimizer from options. Args: cls: The ``Optimizer`` class to instantiate. model: The model to optimize. - opt: The dict of user options. + opts: The dict of user options. checkpoint: An optional checkpoint to load states from. Returns: An ``Optimizer`` instance. """ - optim_opt = opt + optim_opt = opts optim_state_dict = None - if opt.train_from and checkpoint is not None: + if opts.train_from and checkpoint is not None: optim = checkpoint['optim'] - ckpt_opt = checkpoint['opt'] + ckpt_opt = checkpoint['opts'] ckpt_state_dict = {} if isinstance(optim, Optimizer): # Backward compatibility. ckpt_state_dict['training_step'] = optim._step + 1 @@ -301,19 +305,19 @@ def from_opt(cls, model, opt, task_queue_manager, checkpoint=None): else: ckpt_state_dict = optim - if opt.reset_optim == 'none': + if opts.reset_optim == 'none': # Load everything from the checkpoint. optim_opt = ckpt_opt optim_state_dict = ckpt_state_dict - elif opt.reset_optim == 'all': + elif opts.reset_optim == 'all': # Build everything from scratch. pass - elif opt.reset_optim == 'states': + elif opts.reset_optim == 'states': # Reset optimizer, keep options. optim_opt = ckpt_opt optim_state_dict = ckpt_state_dict del optim_state_dict['optimizer'] - elif opt.reset_optim == 'keep_states': + elif opts.reset_optim == 'keep_states': # Reset options, keep optimizer. optim_state_dict = ckpt_state_dict @@ -324,8 +328,8 @@ def from_opt(cls, model, opt, task_queue_manager, checkpoint=None): max_grad_norm=optim_opt.max_grad_norm, ) - if opt.model_dtype == "fp16": - if opt.optim == "fusedadam": + if opts.model_dtype == "fp16": + if opts.optim == "fusedadam": optimizer._fp16 = "legacy" else: optimizer._fp16 = "amp" diff --git a/mammoth/utils/parse.py b/mammoth/utils/parse.py index 249d1697..7782000b 100644 --- a/mammoth/utils/parse.py +++ b/mammoth/utils/parse.py @@ -23,21 +23,21 @@ def _validate_file(file_path, info): raise IOError(f"Please check path of your {info} file! {file_path}") @classmethod - def _validate_adapters(cls, opt): + def _validate_adapters(cls, opts): """Parse corpora specified in data field of YAML file.""" - if not opt.adapters: + if not opts.adapters: return - adapter_opts = yaml.safe_load(opt.adapters) + adapter_opts = yaml.safe_load(opts.adapters) # TODO: validate adapter opts - opt.adapters = adapter_opts + opts.adapters = adapter_opts @classmethod - def _validate_data(cls, opt): + def _validate_data(cls, opts): """Parse tasks/language-pairs/corpora specified in data field of YAML file.""" - default_transforms = opt.transforms + default_transforms = opts.transforms if len(default_transforms) != 0: logger.info(f"Default transforms: {default_transforms}.") - corpora = yaml.safe_load(opt.data) + corpora = yaml.safe_load(opts.tasks) logger.info("Parsing corpora") n_without_node_gpu = 0 for cname, corpus in corpora.items(): @@ -47,7 +47,7 @@ def _validate_data(cls, opt): if _transforms is None: logger.info(f"Missing transforms field for {cname} data, set to default: {default_transforms}.") corpus['transforms'] = default_transforms - opt.data_task = ModelTask.SEQ2SEQ + opts.data_task = ModelTask.SEQ2SEQ """ # Check path path_src = corpus.get('path_src', None) @@ -57,13 +57,13 @@ def _validate_data(cls, opt): 'tgt path is also required for non language' ' modeling tasks.') else: - opt.data_task = ModelTask.SEQ2SEQ + opts.data_task = ModelTask.SEQ2SEQ if path_tgt is None: logger.warning( "path_tgt is None, it should be set unless the task" " is language modeling" ) - opt.data_task = ModelTask.LANGUAGE_MODEL + opts.data_task = ModelTask.LANGUAGE_MODEL # tgt is src for LM task corpus["path_tgt"] = path_src corpora[cname] = corpus @@ -73,7 +73,7 @@ def _validate_data(cls, opt): """ path_align = corpus.get('path_align', None) if path_align is None: - if hasattr(opt, 'lambda_align') and opt.lambda_align > 0.0: + if hasattr(opts, 'lambda_align') and opts.lambda_align > 0.0: raise ValueError(f'Corpus {cname} alignment file path are required when lambda_align > 0.0') corpus['path_align'] = None else: @@ -140,111 +140,111 @@ def _validate_data(cls, opt): assert n_without_node_gpu == 0 or n_without_node_gpu == len(corpora) logger.info(f"Parsed {len(corpora)} corpora from -data.") - opt.data = corpora + opts.tasks = corpora - src_vocab = yaml.safe_load(opt.src_vocab) + src_vocab = yaml.safe_load(opts.src_vocab) logger.info(f"Parsed {len(src_vocab)} vocabs from -src_vocab.") - opt.src_vocab = src_vocab + opts.src_vocab = src_vocab - tgt_vocab = yaml.safe_load(opt.tgt_vocab) + tgt_vocab = yaml.safe_load(opts.tgt_vocab) logger.info(f"Parsed {len(tgt_vocab)} vocabs from -tgt_vocab.") - opt.tgt_vocab = tgt_vocab + opts.tgt_vocab = tgt_vocab @classmethod - def _validate_transforms_opts(cls, opt): + def _validate_transforms_opts(cls, opts): """Check options used by transforms.""" for name, transform_cls in AVAILABLE_TRANSFORMS.items(): - if name in opt._all_transform: - transform_cls._validate_options(opt) + if name in opts._all_transform: + transform_cls._validate_options(opts) @classmethod - def _get_all_transform(cls, opt): + def _get_all_transform(cls, opts): """Should only called after `_validate_data`.""" - all_transforms = set(opt.transforms) - for cname, corpus in opt.data.items(): + all_transforms = set(opts.transforms) + for cname, corpus in opts.tasks.items(): _transforms = set(corpus['transforms']) if len(_transforms) != 0: all_transforms.update(_transforms) - if hasattr(opt, 'lambda_align') and opt.lambda_align > 0.0: + if hasattr(opts, 'lambda_align') and opts.lambda_align > 0.0: if not all_transforms.isdisjoint({'sentencepiece', 'bpe', 'onmt_tokenize'}): raise ValueError('lambda_align is not compatible with on-the-fly tokenization.') if not all_transforms.isdisjoint({'tokendrop', 'prefix', 'denoising'}): raise ValueError('lambda_align is not compatible yet with potential token deletion/addition.') - opt._all_transform = all_transforms + opts._all_transform = all_transforms @classmethod - def _get_all_transform_translate(cls, opt): - opt._all_transform = opt.transforms + def _get_all_transform_translate(cls, opts): + opts._all_transform = opts.transforms @classmethod - def _validate_fields_opts(cls, opt, build_vocab_only=False): + def _validate_fields_opts(cls, opts, build_vocab_only=False): """Check options relate to vocab and fields.""" - for cname, corpus in opt.data.items(): + for cname, corpus in opts.tasks.items(): if cname != CorpusName.VALID and corpus["src_feats"] is not None: - assert opt.src_feats_vocab, "-src_feats_vocab is required if using source features." - if isinstance(opt.src_feats_vocab, str): - opt.src_feats_vocab = yaml.safe_load(opt.src_feats_vocab) + assert opts.src_feats_vocab, "-src_feats_vocab is required if using source features." + if isinstance(opts.src_feats_vocab, str): + opts.src_feats_vocab = yaml.safe_load(opts.src_feats_vocab) for feature in corpus["src_feats"].keys(): - assert feature in opt.src_feats_vocab, f"No vocab file set for feature {feature}" + assert feature in opts.src_feats_vocab, f"No vocab file set for feature {feature}" if build_vocab_only: - if not opt.share_vocab: - assert opt.tgt_vocab, "-tgt_vocab is required if not -share_vocab." + if not opts.share_vocab: + assert opts.tgt_vocab, "-tgt_vocab is required if not -share_vocab." return # validation when train: - for key, vocab in opt.src_vocab.items(): + for key, vocab in opts.src_vocab.items(): cls._validate_file(vocab, info=f'src vocab ({key})') - if not opt.share_vocab: - for key, vocab in opt.tgt_vocab.items(): + if not opts.share_vocab: + for key, vocab in opts.tgt_vocab.items(): cls._validate_file(vocab, info=f'tgt vocab ({key})') - # if opt.dump_fields or opt.dump_transforms: - if opt.dump_transforms: + # if opts.dump_fields or opts.dump_transforms: + if opts.dump_transforms: assert ( - opt.save_data + opts.save_data ), "-save_data should be set if set -dump_transforms." # Check embeddings stuff - if opt.both_embeddings is not None: + if opts.both_embeddings is not None: assert ( - opt.src_embeddings is None and opt.tgt_embeddings is None + opts.src_embeddings is None and opts.tgt_embeddings is None ), "You don't need -src_embeddings or -tgt_embeddings \ if -both_embeddings is set." - if any([opt.both_embeddings is not None, opt.src_embeddings is not None, opt.tgt_embeddings is not None]): - assert opt.embeddings_type is not None, "You need to specify an -embedding_type!" + if any([opts.both_embeddings is not None, opts.src_embeddings is not None, opts.tgt_embeddings is not None]): + assert opts.embeddings_type is not None, "You need to specify an -embedding_type!" assert ( - opt.save_data + opts.save_data ), "-save_data should be set if use pretrained embeddings." @classmethod - def _validate_language_model_compatibilities_opts(cls, opt): - if opt.model_task != ModelTask.LANGUAGE_MODEL: + def _validate_language_model_compatibilities_opts(cls, opts): + if opts.model_task != ModelTask.LANGUAGE_MODEL: return logger.info("encoder is not used for LM task") - assert opt.share_vocab and (opt.tgt_vocab is None), "vocab must be shared for LM task" + assert opts.share_vocab and (opts.tgt_vocab is None), "vocab must be shared for LM task" - assert opt.decoder_type == "transformer", "Only transformer decoder is supported for LM task" + assert opts.decoder_type == "transformer", "Only transformer decoder is supported for LM task" @classmethod - def validate_prepare_opts(cls, opt, build_vocab_only=False): + def validate_prepare_opts(cls, opts, build_vocab_only=False): """Validate all options relate to prepare (data/transform/vocab).""" - if opt.n_sample != 0: + if opts.n_sample != 0: assert ( - opt.save_data + opts.save_data ), "-save_data should be set if \ want save samples." - cls._validate_data(opt) - cls._get_all_transform(opt) - cls._validate_transforms_opts(opt) - cls._validate_fields_opts(opt, build_vocab_only=build_vocab_only) + cls._validate_data(opts) + cls._get_all_transform(opts) + cls._validate_transforms_opts(opts) + cls._validate_fields_opts(opts, build_vocab_only=build_vocab_only) @classmethod - def validate_model_opts(cls, opt): - cls._validate_language_model_compatibilities_opts(opt) + def validate_model_opts(cls, opts): + cls._validate_language_model_compatibilities_opts(opts) class ArgumentParser(cfargparse.ArgumentParser, DataOptsCheckerMixin): @@ -319,53 +319,53 @@ def validate_model_opts(cls, model_opt): @classmethod def ckpt_model_opts(cls, ckpt_opt): - # Load default opt values, then overwrite with the opts in + # Load default opts values, then overwrite with the opts in # the checkpoint. That way, if there are new options added, # the defaults are used. - opt = cls.defaults(opts.model_opts) - opt.__dict__.update(ckpt_opt.__dict__) - return opt + the_opts = cls.defaults(opts.model_opts) + the_opts.__dict__.update(ckpt_opt.__dict__) + return the_opts @classmethod - def validate_train_opts(cls, opt): - if opt.epochs: + def validate_train_opts(cls, opts): + if opts.epochs: raise AssertionError("-epochs is deprecated please use -train_steps.") - if opt.truncated_decoder > 0 and max(opt.accum_count) > 1: + if opts.truncated_decoder > 0 and max(opts.accum_count) > 1: raise AssertionError("BPTT is not compatible with -accum > 1") - if opt.gpuid: + if opts.gpuid: raise AssertionError("gpuid is deprecated see world_size and gpu_ranks") - if torch.cuda.is_available() and not opt.gpu_ranks: + if torch.cuda.is_available() and not opts.gpu_ranks: logger.warn("You have a CUDA device, should run with -gpu_ranks") - if opt.world_size < len(opt.gpu_ranks): + if opts.world_size < len(opts.gpu_ranks): raise AssertionError("parameter counts of -gpu_ranks must be less or equal than -world_size.") - if len(opt.gpu_ranks) > 0 and opt.world_size == len(opt.gpu_ranks) and min(opt.gpu_ranks) > 0: + if len(opts.gpu_ranks) > 0 and opts.world_size == len(opts.gpu_ranks) and min(opts.gpu_ranks) > 0: raise AssertionError( "-gpu_ranks should have master(=0) rank unless -world_size is greater than len(gpu_ranks)." ) - assert len(opt.dropout) == len(opt.dropout_steps), "Number of dropout values must match accum_steps values" + assert len(opts.dropout) == len(opts.dropout_steps), "Number of dropout values must match accum_steps values" - assert len(opt.attention_dropout) == len( - opt.dropout_steps + assert len(opts.attention_dropout) == len( + opts.dropout_steps ), "Number of attention_dropout values must match accum_steps values" - assert len(opt.accum_count) == len( - opt.accum_steps + assert len(opts.accum_count) == len( + opts.accum_steps ), 'Number of accum_count values must match number of accum_steps' - if opt.update_vocab: - assert opt.train_from, "-update_vocab needs -train_from option" - assert opt.reset_optim in ['states', 'all'], '-update_vocab needs -reset_optim "states" or "all"' + if opts.update_vocab: + assert opts.train_from, "-update_vocab needs -train_from option" + assert opts.reset_optim in ['states', 'all'], '-update_vocab needs -reset_optim "states" or "all"' @classmethod - def validate_translate_opts(cls, opt): - opt.src_feats = eval(opt.src_feats) if opt.src_feats else {} + def validate_translate_opts(cls, opts): + opts.src_feats = eval(opts.src_feats) if opts.src_feats else {} @classmethod - def validate_translate_opts_dynamic(cls, opt): + def validate_translate_opts_dynamic(cls, opts): # It comes from training - # TODO: needs to be added as inference opt - opt.share_vocab = False + # TODO: needs to be added as inference opts + opts.share_vocab = False - opt.stack = yaml.safe_load(opt.stack) + opts.stack = yaml.safe_load(opts.stack) diff --git a/mammoth/utils/report_manager.py b/mammoth/utils/report_manager.py index f52b1cc4..822938d0 100644 --- a/mammoth/utils/report_manager.py +++ b/mammoth/utils/report_manager.py @@ -7,23 +7,23 @@ from mammoth.utils.logging import logger -def build_report_manager(opt, node_rank, local_rank): +def build_report_manager(opts, node_rank, local_rank): # Vanilla mammoth has here an additional gpu_rank <= 0 # which would cause only the first GPU of each node to log. # This change allows all GPUs to log. # Because tensorboard does not allow multiple processes writing into the same directory, # each device is treated as a separate run. - if opt.tensorboard: + if opts.tensorboard: from torch.utils.tensorboard import SummaryWriter - if not hasattr(opt, 'tensorboard_log_dir_dated'): - opt.tensorboard_log_dir_dated = opt.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S") + if not hasattr(opts, 'tensorboard_log_dir_dated'): + opts.tensorboard_log_dir_dated = opts.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S") - writer = SummaryWriter(f'{opt.tensorboard_log_dir_dated}-rank{node_rank}:{local_rank}', comment="Unmt") + writer = SummaryWriter(f'{opts.tensorboard_log_dir_dated}-rank{node_rank}:{local_rank}', comment="Unmt") else: writer = None - report_mgr = ReportMgr(opt.report_every, start_time=-1, tensorboard_writer=writer) + report_mgr = ReportMgr(opts.report_every, start_time=-1, tensorboard_writer=writer) return report_mgr diff --git a/mammoth/utils/rnn_factory.py b/mammoth/utils/rnn_factory.py deleted file mode 100644 index d42212ba..00000000 --- a/mammoth/utils/rnn_factory.py +++ /dev/null @@ -1,17 +0,0 @@ -""" - RNN tools -""" -import torch.nn as nn -import mammoth.models - - -def rnn_factory(rnn_type, **kwargs): - """rnn factory, Use pytorch version when available.""" - no_pack_padded_seq = False - if rnn_type == "SRU": - # SRU doesn't support PackedSequence. - no_pack_padded_seq = True - rnn = mammoth.models.sru.SRU(**kwargs) - else: - rnn = getattr(nn, rnn_type)(**kwargs) - return rnn, no_pack_padded_seq diff --git a/test_communication/test.py b/test_communication/test.py index 922c1bd0..932a1aed 100644 --- a/test_communication/test.py +++ b/test_communication/test.py @@ -35,13 +35,13 @@ def tearDown(self) -> None: child_process.kill() @staticmethod - def _get_model_components(opt) -> List[str]: + def _get_model_components(opts) -> List[str]: # N.B: These components are only valid for very vanilla language-specific xcoder with fully shared AB models - components_enc = [f"encoder_0_{src_lang}" for src_lang in ast.literal_eval(opt.src_vocab).keys()] - components_dec = [f"encoder_0_{tgt_lang}" for tgt_lang in ast.literal_eval(opt.tgt_vocab).keys()] - components_gen = [f"generator_{tgt_lang}" for tgt_lang in ast.literal_eval(opt.tgt_vocab).keys()] - components_src_emb = [f"src_embeddings_{src_lang}" for src_lang in ast.literal_eval(opt.src_vocab).keys()] - components_tgt_emb = [f"tgt_embeddings_{tgt_lang}" for tgt_lang in ast.literal_eval(opt.tgt_vocab).keys()] + components_enc = [f"encoder_0_{src_lang}" for src_lang in ast.literal_eval(opts.src_vocab).keys()] + components_dec = [f"encoder_0_{tgt_lang}" for tgt_lang in ast.literal_eval(opts.tgt_vocab).keys()] + components_gen = [f"generator_{tgt_lang}" for tgt_lang in ast.literal_eval(opts.tgt_vocab).keys()] + components_src_emb = [f"src_embeddings_{src_lang}" for src_lang in ast.literal_eval(opts.src_vocab).keys()] + components_tgt_emb = [f"tgt_embeddings_{tgt_lang}" for tgt_lang in ast.literal_eval(opts.tgt_vocab).keys()] return [ "frame", "attention_bridge", @@ -55,7 +55,7 @@ def _get_model_components(opt) -> List[str]: @timeout_decorator.timeout(60) def test_training_1gpu_4pairs(self): out_model_prefix = "wmt_1gpu_4pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -72,14 +72,14 @@ def test_training_1gpu_4pairs(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -91,7 +91,7 @@ def test_training_1gpu_4pairs(self): @timeout_decorator.timeout(60) def test_training_1gpu_4pairs_ab_lin(self): out_model_prefix = "wmt_1gpu_4pairs_lin" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -114,14 +114,14 @@ def test_training_1gpu_4pairs_ab_lin(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -133,7 +133,7 @@ def test_training_1gpu_4pairs_ab_lin(self): @timeout_decorator.timeout(60) def test_training_1gpu_4pairs_ab_ff(self): out_model_prefix = "wmt_1gpu_4pairs_ff" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -154,14 +154,14 @@ def test_training_1gpu_4pairs_ab_ff(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -173,7 +173,7 @@ def test_training_1gpu_4pairs_ab_ff(self): @timeout_decorator.timeout(60) def test_training_1gpu_4pairs_ab_tf(self): out_model_prefix = "wmt_1gpu_4pairs_tf" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -194,14 +194,14 @@ def test_training_1gpu_4pairs_ab_tf(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -213,7 +213,7 @@ def test_training_1gpu_4pairs_ab_tf(self): @timeout_decorator.timeout(60) def test_training_1gpu_4pairs_ab_simple(self): out_model_prefix = "wmt_1gpu_4pairs_simple" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -234,14 +234,14 @@ def test_training_1gpu_4pairs_ab_simple(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -253,7 +253,7 @@ def test_training_1gpu_4pairs_ab_simple(self): @timeout_decorator.timeout(60) def test_training_1gpu_4pairs_ab_perceiver(self): out_model_prefix = "wmt_1gpu_4pairs_perceiver" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -274,14 +274,14 @@ def test_training_1gpu_4pairs_ab_perceiver(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -293,7 +293,7 @@ def test_training_1gpu_4pairs_ab_perceiver(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs(self): out_model_prefix = "wmt_2gpus_4pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -311,14 +311,14 @@ def test_training_2gpus_4pairs(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -330,7 +330,7 @@ def test_training_2gpus_4pairs(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_ab_lin(self): out_model_prefix = "wmt_2gpus_4pairs_lin" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -354,14 +354,14 @@ def test_training_2gpus_4pairs_ab_lin(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -373,7 +373,7 @@ def test_training_2gpus_4pairs_ab_lin(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_ab_ff(self): out_model_prefix = "wmt_2gpus_4pairs_ff" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -395,14 +395,14 @@ def test_training_2gpus_4pairs_ab_ff(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -414,7 +414,7 @@ def test_training_2gpus_4pairs_ab_ff(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_ab_tf(self): out_model_prefix = "wmt_2gpus_4pairs_tf" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -436,14 +436,14 @@ def test_training_2gpus_4pairs_ab_tf(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -455,7 +455,7 @@ def test_training_2gpus_4pairs_ab_tf(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_ab_simple(self): out_model_prefix = "wmt_2gpus_4pairs_simple" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -479,14 +479,14 @@ def test_training_2gpus_4pairs_ab_simple(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -498,7 +498,7 @@ def test_training_2gpus_4pairs_ab_simple(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_ab_perceiver(self): out_model_prefix = "wmt_2gpus_4pairs_perceiver" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -520,14 +520,14 @@ def test_training_2gpus_4pairs_ab_perceiver(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -539,7 +539,7 @@ def test_training_2gpus_4pairs_ab_perceiver(self): @timeout_decorator.timeout(60) def test_training_2gpus_4pairs_crossed(self): out_model_prefix = "wmt_2gpus_4pairs_crossed" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -557,14 +557,14 @@ def test_training_2gpus_4pairs_crossed(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -576,7 +576,7 @@ def test_training_2gpus_4pairs_crossed(self): @timeout_decorator.timeout(60) def test_training_4gpus_4pairs(self): out_model_prefix = "wmt_4gpus_4pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -596,14 +596,14 @@ def test_training_4gpus_4pairs(self): "0:3", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -615,7 +615,7 @@ def test_training_4gpus_4pairs(self): @timeout_decorator.timeout(120) def test_training_3gpus_12pairs(self): out_model_prefix = "wmt_3gpus_12pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_12pairs.yml", @@ -642,14 +642,14 @@ def test_training_3gpus_12pairs(self): "0:2", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -661,7 +661,7 @@ def test_training_3gpus_12pairs(self): @timeout_decorator.timeout(120) def test_training_3gpus_21pairs(self): out_model_prefix = "wmt_3gpus_21pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_21pairs.yml", @@ -697,14 +697,14 @@ def test_training_3gpus_21pairs(self): "0:2", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -716,7 +716,7 @@ def test_training_3gpus_21pairs(self): @timeout_decorator.timeout(120) def test_training_4gpus_12pairs(self): out_model_prefix = "wmt_4gpus_12pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_12pairs.yml", @@ -744,14 +744,14 @@ def test_training_4gpus_12pairs(self): "0:3", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -763,7 +763,7 @@ def test_training_4gpus_12pairs(self): @timeout_decorator.timeout(120) def test_training_4gpus_24pairs(self): out_model_prefix = "wmt_4gpus_24pairs" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_24pairs.yml", @@ -803,14 +803,14 @@ def test_training_4gpus_24pairs(self): "0:3", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -822,7 +822,7 @@ def test_training_4gpus_24pairs(self): @timeout_decorator.timeout(120) def test_training_1gpu_tensorboard(self): out_model_prefix = "wmt_1gpu_tb" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -843,14 +843,14 @@ def test_training_1gpu_tensorboard(self): "0:0", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -868,7 +868,7 @@ def test_training_1gpu_tensorboard(self): @timeout_decorator.timeout(120) def test_training_2gpus_tensorboard(self): out_model_prefix = "wmt_2gpus_tb" - opt, _ = self.parser.parse_known_args( + opts, _ = self.parser.parse_known_args( [ "-config", "config/wmt_4pairs.yml", @@ -890,14 +890,14 @@ def test_training_2gpus_tensorboard(self): "0:1", ] ) - components = self._get_model_components(opt) + components = self._get_model_components(opts) out_files = ["models/{}_step_4_{}.pt".format(out_model_prefix, cmp) for cmp in components] for out_file in out_files: if os.path.exists(out_file): logger.info("Removing file {}".format(out_file)) os.remove(out_file) logger.info("Launch training") - train(opt) + train(opts) for cmp in components: self.assertNotIn("{}_step_2_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) self.assertIn("{}_step_4_{}.pt".format(out_model_prefix, cmp), os.listdir("models")) @@ -926,7 +926,7 @@ def test_training_2gpus_tensorboard(self): # def test_translate(self): # # TODO: train model instead of loading one the one used now, # # remove all absolute paths, add test data in the repo -# opt, _ = self.parser.parse_known_args( +# opts, _ = self.parser.parse_known_args( # [ # "-gpu", # "0", @@ -945,4 +945,4 @@ def test_training_2gpus_tensorboard(self): # "-use_attention_bridge", # ] # ) -# translate(opt) +# translate(opts) diff --git a/tools/embeddings_to_torch.py b/tools/embeddings_to_torch.py index 1643fa25..00f2d981 100755 --- a/tools/embeddings_to_torch.py +++ b/tools/embeddings_to_torch.py @@ -79,48 +79,48 @@ def main(): parser.add_argument('-verbose', action="store_true", default=False) parser.add_argument('-skip_lines', type=int, default=0, help="Skip first lines of the embedding file") parser.add_argument('-type', choices=["GloVe", "word2vec"], default="GloVe") - opt = parser.parse_args() + opts = parser.parse_args() - enc_vocab, dec_vocab = get_vocabs(opt.dict_file) + enc_vocab, dec_vocab = get_vocabs(opts.dict_file) # Read in embeddings - skip_lines = 1 if opt.type == "word2vec" else opt.skip_lines - if opt.emb_file_both is not None: - if opt.emb_file_enc is not None: + skip_lines = 1 if opts.type == "word2vec" else opts.skip_lines + if opts.emb_file_both is not None: + if opts.emb_file_enc is not None: raise ValueError("If --emb_file_both is passed in, you should not" "set --emb_file_enc.") - if opt.emb_file_dec is not None: + if opts.emb_file_dec is not None: raise ValueError("If --emb_file_both is passed in, you should not" "set --emb_file_dec.") set_of_src_and_tgt_vocab = set(enc_vocab.stoi.keys()) | set(dec_vocab.stoi.keys()) - logger.info("Reading encoder and decoder embeddings from {}".format(opt.emb_file_both)) - src_vectors, total_vec_count = read_embeddings(opt.emb_file_both, skip_lines, set_of_src_and_tgt_vocab) + logger.info("Reading encoder and decoder embeddings from {}".format(opts.emb_file_both)) + src_vectors, total_vec_count = read_embeddings(opts.emb_file_both, skip_lines, set_of_src_and_tgt_vocab) tgt_vectors = src_vectors logger.info("\tFound {} total vectors in file".format(total_vec_count)) else: - if opt.emb_file_enc is None: + if opts.emb_file_enc is None: raise ValueError( "If --emb_file_enc not provided. Please specify " "the file with encoder embeddings, or pass in " "--emb_file_both" ) - if opt.emb_file_dec is None: + if opts.emb_file_dec is None: raise ValueError( "If --emb_file_dec not provided. Please specify " "the file with encoder embeddings, or pass in " "--emb_file_both" ) - logger.info("Reading encoder embeddings from {}".format(opt.emb_file_enc)) - src_vectors, total_vec_count = read_embeddings(opt.emb_file_enc, skip_lines, filter_set=enc_vocab.stoi) + logger.info("Reading encoder embeddings from {}".format(opts.emb_file_enc)) + src_vectors, total_vec_count = read_embeddings(opts.emb_file_enc, skip_lines, filter_set=enc_vocab.stoi) logger.info("\tFound {} total vectors in file.".format(total_vec_count)) - logger.info("Reading decoder embeddings from {}".format(opt.emb_file_dec)) - tgt_vectors, total_vec_count = read_embeddings(opt.emb_file_dec, skip_lines, filter_set=dec_vocab.stoi) + logger.info("Reading decoder embeddings from {}".format(opts.emb_file_dec)) + tgt_vectors, total_vec_count = read_embeddings(opts.emb_file_dec, skip_lines, filter_set=dec_vocab.stoi) logger.info("\tFound {} total vectors in file".format(total_vec_count)) logger.info("After filtering to vectors in vocab:") logger.info("\t* enc: %d match, %d missing, (%.2f%%)" % calc_vocab_load_stats(enc_vocab, src_vectors)) logger.info("\t* dec: %d match, %d missing, (%.2f%%)" % calc_vocab_load_stats(dec_vocab, tgt_vectors)) # Write to file - enc_output_file = opt.output_file + ".enc.pt" - dec_output_file = opt.output_file + ".dec.pt" + enc_output_file = opts.output_file + ".enc.pt" + dec_output_file = opts.output_file + ".dec.pt" logger.info("\nSaving embedding as:\n\t* enc: %s\n\t* dec: %s" % (enc_output_file, dec_output_file)) torch.save(convert_to_torch_tensor(src_vectors, enc_vocab), enc_output_file) torch.save(convert_to_torch_tensor(tgt_vectors, dec_vocab), dec_output_file) diff --git a/tools/extract_embeddings.py b/tools/extract_embeddings.py index 2b30f42e..b439ee06 100644 --- a/tools/extract_embeddings.py +++ b/tools/extract_embeddings.py @@ -31,20 +31,20 @@ def main(): dummy_parser = argparse.ArgumentParser(description='train.py') mammoth.opts.model_opts(dummy_parser) dummy_opt = dummy_parser.parse_known_args([])[0] - opt = parser.parse_args() - opt.cuda = opt.gpu > -1 - if opt.cuda: - torch.cuda.set_device(opt.gpu) + opts = parser.parse_args() + opts.cuda = opts.gpu > -1 + if opts.cuda: + torch.cuda.set_device(opts.gpu) # Add in default model arguments, possibly added since training. - checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) - model_opt = checkpoint['opt'] + checkpoint = torch.load(opts.model, map_location=lambda storage, loc: storage) + model_opt = checkpoint['opts'] fields = checkpoint['vocab'] src_dict = fields['src'].base_field.vocab # assumes src is text tgt_dict = fields['tgt'].base_field.vocab - model_opt = checkpoint['opt'] + model_opt = checkpoint['opts'] for arg in dummy_opt.__dict__: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt.__dict__[arg] @@ -53,7 +53,7 @@ def main(): ArgumentParser.update_model_opts(model_opt) ArgumentParser.validate_model_opts(model_opt) - model = mammoth.model_builder.build_base_model(model_opt, fields, use_gpu(opt), checkpoint) + model = mammoth.model_builder.build_base_model(model_opt, fields, use_gpu(opts), checkpoint) encoder = model.encoder # no encoder for LM task decoder = model.decoder @@ -61,10 +61,10 @@ def main(): decoder_embeddings = decoder.embeddings.word_lut.weight.data.tolist() logger.info("Writing source embeddings") - write_embeddings(opt.output_dir + "/src_embeddings.txt", src_dict, encoder_embeddings) + write_embeddings(opts.output_dir + "/src_embeddings.txt", src_dict, encoder_embeddings) logger.info("Writing target embeddings") - write_embeddings(opt.output_dir + "/tgt_embeddings.txt", tgt_dict, decoder_embeddings) + write_embeddings(opts.output_dir + "/tgt_embeddings.txt", tgt_dict, decoder_embeddings) logger.info('... done.') logger.info('Converting model...') diff --git a/tools/extract_vocabulary.py b/tools/extract_vocabulary.py index e003cc81..3c062e54 100644 --- a/tools/extract_vocabulary.py +++ b/tools/extract_vocabulary.py @@ -60,12 +60,12 @@ def main(): help="""Specifies 'src' or 'tgt' side for 'field' file_type.""", ) - opt = parser.parse_args() + opts = parser.parse_args() vocabulary = {} - if opt.file_type == 'text': + if opts.file_type == 'text': print("Reading input file...") - for batch in read_files_batch(opt.file): + for batch in read_files_batch(opts.file): for sentence in batch: for w in sentence: if w in vocabulary: @@ -74,19 +74,19 @@ def main(): vocabulary[w] = 1 print("Writing vocabulary file...") - with open(opt.out_file, "w") as f: + with open(opts.out_file, "w") as f: for w, count in sorted(vocabulary.items(), key=lambda x: x[1], reverse=True): f.write("{0}\n".format(w)) else: - if opt.side not in ['src', 'tgt']: + if opts.side not in ['src', 'tgt']: raise ValueError("If using -file_type='field', specifies 'src' or 'tgt' argument for -side.") import torch print("Reading input file...") - if not len(opt.file) == 1: + if not len(opts.file) == 1: raise ValueError("If using -file_type='field', only pass one argument for -file.") - vocabs = torch.load(opt.file[0]) - voc = dict(vocabs)[opt.side] + vocabs = torch.load(opts.file[0]) + voc = dict(vocabs)[opts.side] try: word_list = voc[0][1].base_field.vocab.itos @@ -94,7 +94,7 @@ def main(): word_list = voc[0][1].vocab.itos print("Writing vocabulary file...") - with open(opt.out_file, "wb") as f: + with open(opts.out_file, "wb") as f: for w in word_list: f.write(u"{0}\n".format(w).encode("utf-8"))