diff --git a/augur/merge.py b/augur/merge.py index a69125057..990eccf99 100644 --- a/augur/merge.py +++ b/augur/merge.py @@ -285,6 +285,8 @@ def merge_sequences(args): # Reversed because seqkit rmdup keeps the first entry but this command # should keep the last entry. # FIXME: don't use shell. just using it to get a sense of feasibility. + # FIXME: is seqkit overkill here? compare to ncov's drop_duplicate_sequences which is plain Python. + # https://github.com/nextstrain/ncov/blob/0769ac0429df8456ce70be2f74dc885d7b7fab12/scripts/sanitize_sequences.py#L127 cat_processes = (f"<({cat(filepath)})" for filepath in reversed(args.sequences)) shell_cmd = f"cat {' '.join(cat_processes)} | seqkit rmdup" print_debug(F"running shell command {shell_cmd!r}")