Skip to content

Commit

Permalink
Remove no longer needed arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
Mehrad0711 committed Feb 28, 2022
1 parent 1c301f2 commit 5ccaa1a
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 36 deletions.
5 changes: 0 additions & 5 deletions genienlp/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,6 @@ def parse_argv(parser):
# token classification task args
parser.add_argument('--num_labels', type=int, help='num_labels for classification tasks')
parser.add_argument('--crossner_domains', nargs='+', type=str, help='domains to use for CrossNER task')
parser.add_argument(
'--hf_test_overfit',
action='store_true',
help='Debugging flag for hf datasets where validation will be performed on train set',
)

parser.add_argument(
'--e2e_dialogue_evaluation',
Expand Down
3 changes: 0 additions & 3 deletions genienlp/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,6 @@ def prepare_data(args):
if len(args.pred_src_languages) == 1 and len(args.tasks) > 1:
args.pred_src_languages *= len(args.tasks)
for i, task in enumerate(args.tasks):
task_languages = args.pred_src_languages[i]
logger.info(f'Loading {task}')
kwargs = {'train': None, 'validation': None, 'test': None}
if args.evaluate == 'train':
Expand All @@ -330,11 +329,9 @@ def prepare_data(args):
kwargs.update(
{
'subsample': args.subsample,
'all_dirs': task_languages,
'num_workers': args.num_workers,
'src_lang': src_lang,
'crossner_domains': args.crossner_domains,
'hf_test_overfit': args.hf_test_overfit,
}
)

Expand Down
6 changes: 0 additions & 6 deletions genienlp/run_bootleg.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,6 @@ def parse_argv(parser):
# token classification task args
parser.add_argument('--num_labels', type=int, help='num_labels for classification tasks')
parser.add_argument('--crossner_domains', nargs='+', type=str, help='domains to use for CrossNER task')
parser.add_argument(
'--hf_test_overfit',
action='store_true',
help='Debugging flag for hf datasets where validation will be performed on train set',
)


def bootleg_dump_entities(args, logger):
Expand All @@ -192,7 +187,6 @@ def bootleg_dump_entities(args, logger):
bootleg_shared_kwargs = {
'subsample': args.subsample,
'num_workers': args.num_workers,
'all_dirs': args.train_src_languages,
'crossner_domains': args.crossner_domains,
}

Expand Down
20 changes: 10 additions & 10 deletions genienlp/tasks/generic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,16 +185,16 @@ def return_splits(cls, path='.data', train='train', validation='dev', test='test
with open(test_path, "r") as fin:
test_data = fin.readlines()

# Uncomment for testing
if kwargs.pop("hf_test_overfit", False):
if validation:
validation_path = os.path.join(path, domain, 'train.txt')
with open(validation_path, "r") as fin:
validation_data = fin.readlines()
if test:
test_path = os.path.join(path, domain, 'train.txt')
with open(test_path, "r") as fin:
test_data = fin.readlines()
# Uncomment for debugging
# if True:
# if validation:
# validation_path = os.path.join(path, domain, 'train.txt')
# with open(validation_path, "r") as fin:
# validation_data = fin.readlines()
# if test:
# test_path = os.path.join(path, domain, 'train.txt')
# with open(test_path, "r") as fin:
# test_data = fin.readlines()

kwargs['domain'] = domain

Expand Down
17 changes: 9 additions & 8 deletions genienlp/tasks/hf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,15 @@ def return_splits(cls, name, root='.data', train='train', validation='validation
test_data = load_dataset(name, split='test', cache_dir=root)
test_path = test_data.cache_files[0]['filename']

if kwargs.pop('hf_test_overfit', False):
# override validation/ test data with train data
if validation:
validation_data = load_dataset(name, split='train', cache_dir=root)
validation_path = validation_data.cache_files[0]['filename']
if test:
test_data = load_dataset(name, split='train', cache_dir=root)
test_path = test_data.cache_files[0]['filename']
# Uncomment for debugging
# if True:
# # override validation/ test data with train data
# if validation:
# validation_data = load_dataset(name, split='train', cache_dir=root)
# validation_path = validation_data.cache_files[0]['filename']
# if test:
# test_data = load_dataset(name, split='train', cache_dir=root)
# test_path = test_data.cache_files[0]['filename']

train_data = None if train is None else cls(train_data, **kwargs)
validation_data = None if validation is None else cls(validation_data, **kwargs)
Expand Down
3 changes: 0 additions & 3 deletions genienlp/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def prepare_data(args, logger):
kwargs = {'test': None, 'validation': None}
kwargs['train'] = args.train_set_name
kwargs.update(train_eval_shared_kwargs)
kwargs['all_dirs'] = args.train_src_languages
kwargs['crossner_domains'] = args.crossner_domains
if args.use_curriculum:
kwargs['curriculum'] = True
Expand Down Expand Up @@ -140,9 +139,7 @@ def prepare_data(args, logger):
if args.eval_set_name is not None:
kwargs['validation'] = args.eval_set_name
kwargs.update(train_eval_shared_kwargs)
kwargs['all_dirs'] = args.eval_src_languages
kwargs['crossner_domains'] = args.crossner_domains
kwargs['hf_test_overfit'] = args.hf_test_overfit

logger.info(f'Adding {task.name} to validation datasets')
splits, paths = task.get_splits(args.data, lower=args.lower, **kwargs)
Expand Down
1 change: 0 additions & 1 deletion genienlp/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,6 @@ def load_config_json(args):
'no_separator',
'num_labels',
'crossner_domains',
'hf_test_overfit',
'override_valid_metrics',
'eval_src_languages',
'eval_tgt_languages',
Expand Down

0 comments on commit 5ccaa1a

Please sign in to comment.