diff --git a/prepro.py b/prepro.py index c08c7c9..1decc25 100644 --- a/prepro.py +++ b/prepro.py @@ -35,7 +35,7 @@ def prepro(hp): logging.info("# Preprocessing") # train _prepro = lambda x: [line.strip() for line in open(x, 'r').read().split("\n") \ - if not line.startswith("<")] + if not (line.startswith("<") or line.startswith(" <"))] prepro_train1, prepro_train2 = _prepro(train1), _prepro(train2) assert len(prepro_train1)==len(prepro_train2), "Check if train source and target files match." @@ -109,4 +109,4 @@ def _segment_and_write(sents, fname): parser = hparams.parser hp = parser.parse_args() prepro(hp) - logging.info("Done") \ No newline at end of file + logging.info("Done")