Skip to content

Commit

Permalink
Merge pull request #40 from PennChopMicrobiomeProgram/38-allow-non-de…
Browse files Browse the repository at this point in the history
…fault-path-to-ltp-data

Put all db files in .unassigner/ by default
  • Loading branch information
kylebittinger authored Nov 29, 2024
2 parents 5909a47 + bd1b85b commit 2d8b445
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 33 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# LTP refs
LTP_*.csv
LTP_*.fasta

# Vsearch databases
*.udb

Expand Down
10 changes: 0 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,6 @@ Step 3: The last part of the software relies on building a database of the seque
Please see the output of `trimragged --help` for a list of the available
options.

### Count mismatches



### Percent ID ANI sample



Should there also be a command and section for prepare_strain_data?

## Contributing

We welcome ideas from our users about how to improve this
Expand Down
49 changes: 31 additions & 18 deletions unassigner/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@ def main(argv=None):
"--output_dir",
help=(
"Output directory (default: basename of query sequences FASTA "
"file, plus '_unassigned')"
"file, plus '_unassigned')."
),
)
p.add_argument(
"--type_strain_fasta",
default="unassigner_species.fasta",
help=(
"Type strain sequences FASTA file (default: %(default)s). "
"If the default file is not found, sequences are downloaded "
"and re-formatted automatically."
"DEPRECATED. FASTA file containing sequences of type strains. If not provided, "
"the default database is used. Note that this WILL NOT DOWNLOAD a new db."
),
)
p.add_argument(
"--db_dir",
default=os.path.expanduser("~/.unassigner/"),
help=(
"Directory containing the reference database. If not provided, "
"the default database is used."
),
)
p.add_argument(
Expand Down Expand Up @@ -85,30 +91,37 @@ def main(argv=None):

query_seqs = list(parse_fasta(args.query_fasta, trim_desc=True))

# Download type strain files if needed
# 1. If arg is set, download the file and use it
# 2. If default file exists, use it
# 3. Otherwise put it in the output directory
if args.type_strain_fasta is not None:
logging.warning(
"The --type_strain_fasta argument is deprecated. Please use --db_dir instead."
)
ltp_fp = args.type_strain_fasta
elif os.path.exists("unassigner_species.fasta"):
ltp_fp = "unassigner_species.fasta"
else:
os.makedirs(args.db_dir, exist_ok=True)
ltp_fp = download_type_strain_data(output_dir=args.db_dir)

with open(ltp_fp) as f:
species_names = dict(parse_species_names(f))

if args.output_dir is None:
output_dir = os.path.splitext(args.query_fasta.name)[0] + "_unassigned"
else:
output_dir = args.output_dir

# Download type strain files if needed
type_strain_fp_is_default = args.type_strain_fasta == p.get_default(
"type_strain_fasta"
)
type_strain_fp_is_missing = not os.path.exists(args.type_strain_fasta)
if type_strain_fp_is_default and type_strain_fp_is_missing:
download_type_strain_data()

with open(args.type_strain_fasta) as f:
species_names = dict(parse_species_names(f))

writer = OutputWriter(output_dir, species_names)

alignment_query_fp = writer.output_fp("unassigner_query.fasta")
alignment_output_fp = writer.output_fp("unassigner_query_hits.txt")
if os.path.exists(alignment_output_fp):
a = FileAligner(args.type_strain_fasta, alignment_output_fp)
a = FileAligner(ltp_fp, alignment_output_fp)
else:
a = UnassignAligner(args.type_strain_fasta)
a = UnassignAligner(ltp_fp)
a.species_input_fp = alignment_query_fp
a.species_output_fp = alignment_output_fp
a.num_cpus = args.num_cpus
Expand Down
6 changes: 5 additions & 1 deletion unassigner/prepare_strain_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ def main(argv=None):
action="store_true",
help=("Remove all downloaded and processed files."),
)
p.add_argument("--db-dir", help=("Filepath to download the files to."))
p.add_argument(
"--db-dir",
default="~/.unassigner/",
help=("Filepath to download the files to."),
)
args = p.parse_args(argv)

if args.db_dir:
Expand Down

0 comments on commit 2d8b445

Please sign in to comment.