Skip to content

Commit

Permalink
Merge pull request #252 from stanford-oval/wip/mehrad/cleanups
Browse files Browse the repository at this point in the history
Major Cleanup #1
  • Loading branch information
Mehrad0711 authored Mar 2, 2022
2 parents a5089ae + 91c3031 commit fb35bef
Show file tree
Hide file tree
Showing 33 changed files with 1,094 additions and 3,746 deletions.
5 changes: 0 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ jobs:
stage: test
script:
- bash ./tests/test_main_almond.sh
-
name: "Main tests for almond_multilingual task"
stage: test
script:
- bash ./tests/test_main_almond_multilingual.sh
-
name: "Paraphrasing tests"
stage: test
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ genienlp run-paraphrase --model_name_or_path <model_dir> --temperature 0.3 --rep
Use the following command for training/ finetuning an NMT model:

```bash
genienlp train --train_tasks almond_translate --data <data_directory> --train_languages <src_lang> --eval_languages <tgt_lang> --no_commit --train_iterations <iterations> --preserve_case --save <save_dir> --exist_ok --skip_cache --model TransformerSeq2Seq --pretrained_model <hf_model_name>
genienlp train --train_tasks almond_translate --data <data_directory> --train_languages <src_lang> --eval_languages <tgt_lang> --no_commit --train_iterations <iterations> --preserve_case --save <save_dir> --exist_ok --model TransformerSeq2Seq --pretrained_model <hf_model_name>
```

We currently support MarianMT, MBART, MT5, and M2M100 models.<br>
Expand All @@ -134,7 +134,7 @@ To save a pretrained model in genienlp format without any finetuning, set train_
To produce translations for an eval/ test set run the following command:

```bash
genienlp predict --tasks almond_translate --data <data_directory> --pred_languages <src_lang> --pred_tgt_languages <tgt_lang> --path <path_to_saved_model> --eval_dir <eval_dir> --skip_cache --val_batch_size 4000 --evaluate <valid/test> --overwrite --silent
genienlp predict --tasks almond_translate --data <data_directory> --pred_languages <src_lang> --pred_tgt_languages <tgt_lang> --path <path_to_saved_model> --eval_dir <eval_dir> --val_batch_size 4000 --evaluate <valid/test> --overwrite --silent
```

If your dataset is a document or contains long examples, pass `--translate_example_split` to break the examples down into individual sentences before translation for better results. <br>
Expand Down
10 changes: 0 additions & 10 deletions genienlp/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def parse_argv(parser):
parser.add_argument('--data', default='.data/', type=str, help='where to load data from.')
parser.add_argument('--save', required=True, type=str, help='where to save results.')
parser.add_argument('--embeddings', default='.embeddings/', type=str, help='where to save embeddings.')
parser.add_argument('--cache', default='.cache/', type=str, help='where to save cached files')

parser.add_argument(
'--train_languages',
Expand Down Expand Up @@ -409,10 +408,6 @@ def parse_argv(parser):
help='Ignore all conditions and use fast version of huggingface tokenizer',
)

parser.add_argument('--skip_cache', action='store_true', help='whether to use existing cached splits or generate new ones')
parser.add_argument(
'--cache_input_data', action='store_true', help='Cache examples from input data for faster subsequent trainings'
)
parser.add_argument('--use_curriculum', action='store_true', help='Use curriculum learning')
parser.add_argument(
'--aux_dataset', default='', type=str, help='path to auxiliary dataset (ignored if curriculum is not used)'
Expand Down Expand Up @@ -539,11 +534,6 @@ def parse_argv(parser):
# token classification task args
parser.add_argument('--num_labels', type=int, help='num_labels for classification tasks')
parser.add_argument('--crossner_domains', nargs='+', type=str, help='domains to use for CrossNER task')
parser.add_argument(
'--hf_test_overfit',
action='store_true',
help='Debugging flag for hf datasets where validation will be performed on train set',
)

parser.add_argument(
'--e2e_dialogue_evaluation',
Expand Down
6 changes: 3 additions & 3 deletions genienlp/kfserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@


class KFModelServer(kfserving.KFModel):
def __init__(self, name, args, numericalizer, model, device, confidence_estimators, estimator_filenames, ned_model):
def __init__(self, name, args, model, device, confidence_estimators, estimator_filenames, ned_model):
super().__init__(name)
self.server = Server(args, numericalizer, model, device, confidence_estimators, estimator_filenames, ned_model)
self.server = Server(args, model, device, confidence_estimators, estimator_filenames, ned_model)

def load(self):
log_model_size(logger, self.server.model, self.server.args.model)
Expand All @@ -57,7 +57,7 @@ def predict(self, request):
def main(args):
model, device, confidence_estimators, estimator_filenames, ned_model = init(args)
model_server = KFModelServer(
args.inference_name, args, model.numericalizer, model, device, confidence_estimators, estimator_filenames, ned_model
args.inference_name, args, model, device, confidence_estimators, estimator_filenames, ned_model
)
model_server.load()
kfserving.KFServer(workers=1).start([model_server])
Loading

0 comments on commit fb35bef

Please sign in to comment.