From 0b9c37b0f0a92a2e3b76105ef8c33bdab6d1b068 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Fri, 12 Apr 2024 17:02:29 +0200 Subject: [PATCH] Added explore as a command --- anglerfish/cli.py | 180 +++++++++++++++++++++++++++++++++++--- anglerfish/explore/cli.py | 100 --------------------- 2 files changed, 170 insertions(+), 110 deletions(-) delete mode 100644 anglerfish/explore/cli.py diff --git a/anglerfish/cli.py b/anglerfish/cli.py index c653b41..6d8b16d 100644 --- a/anglerfish/cli.py +++ b/anglerfish/cli.py @@ -1,12 +1,15 @@ import argparse +import datetime as dt import os -from datetime import datetime as dt from enum import Enum +import pkg_resources import typer from typing_extensions import Annotated +from typing import Optional from .anglerfish import run_demux +from .explore.explore import run_explore app = typer.Typer(pretty_exceptions_show_locals=False) @@ -15,7 +18,155 @@ class IndexOrientations(str, Enum): i7 = "i7" i5 = "i5" i7i5 = "i7+i5" - default = "default" + + +def version_callback(value: bool): + if value: + print(f'anglerfish {pkg_resources.get_distribution("bio-anglerfish").version}') + raise typer.Exit() + + +def deprecated_callback(value: bool): + if value: + raise typer.BadParameter( + "Please use the 'anglerfish run -s' command to run anglerfish with a samplesheet. Running only 'anglerfish -s' is not supported as of version 0.7.0" + ) + + +@app.callback() +def main( + version: Annotated[ + Optional[bool], + typer.Option( + "--version", + "-v", + help="Print version and quit", + is_eager=True, + callback=version_callback, + ), + ] = False, + samplesheet: Annotated[ + Optional[str], + typer.Option( + "--samplesheet", + "-s", + hidden=True, + is_eager=True, + callback=deprecated_callback, + ), + ] = "", +): + """ + Anglerfish is a tool designed to demultiplex Illumina libraries sequenced on Oxford Nanopore flowcells. + The primary purpose for this would be to do QC, i.e. to check pool balancing, assess contamination, library insert sizes and so on. + """ + if samplesheet: + raise typer.BadParameter( + "Please use the 'run' command to run anglerfish with a samplesheet. Running only 'anglerfish' is not supported as of version 0.7.0" + ) + + +@app.command() +def explore( + fastq: Annotated[str, typer.Option("--fastq", "-f", help="Fastq file to align")], + outdir: Annotated[str, typer.Option("--outdir", "-o", help="Output directory")], + threads: Annotated[ + int, + typer.Option( + "--threads", + "-t", + help="Number of threads specified to minimap2", + ), + ] = 4, + use_existing: Annotated[ + bool, + typer.Option( + "--use-existing", + "-e", + help="Use existing alignments if found in the specified output directory.", + ), + ] = False, + good_hit_threshold: Annotated[ + float, + typer.Option( + "--good_hit_threshold", + "-g", + help="Fraction of adaptor bases immediately before and immediately after index insert required to match perfectly for a hit to be considered a good hit", + ), + ] = 0.9, + insert_thres_low: Annotated[ + int, + typer.Option( + "--insert_thres_low", + "-i", + help="Lower threshold for index(+UMI) insert length, with value included.", + ), + ] = 4, + insert_thres_high: Annotated[ + int, + typer.Option( + "--insert_thres_high", + "-j", + help="Upper threshold for index(+UMI) insert length, with value included.", + ), + ] = 30, + minimap_b: Annotated[ + int, + typer.Option( + "--minimap_b", + "-B", + help="Minimap2 -B parameter, mismatch penalty.", + ), + ] = 4, + min_hits_per_adaptor: Annotated[ + int, + typer.Option( + "--min_hits_per_adaptor", + "-m", + help="Minimum number of good hits for an adaptor to be included in the analysis.", + ), + ] = 50, + umi_threshold: Annotated[ + float, + typer.Option( + "--umi_threshold", + "-u", + help="Minimum number of bases in insert to perform entropy calculation.", + ), + ] = 11, + kmer_length: Annotated[ + int, + typer.Option( + "--kmer_length", + "-k", + help="Kmer length for entropy calculation.", + ), + ] = 2, + version: Annotated[ + Optional[bool], + typer.Option( + "--version", + "-v", + help="Print version and quit", + is_eager=True, + callback=version_callback, + ), + ] = False, +): + """This is an advanced samplesheet-free version of anglerfish.""" + run_explore( + fastq, + outdir, + threads, + use_existing, + good_hit_threshold, + insert_thres_low, + insert_thres_high, + minimap_b, + min_hits_per_adaptor, + umi_threshold, + kmer_length, + ) @app.command() @@ -70,15 +221,15 @@ def run( "-x", help="If lenient is set, this is the minimum factor of additional matches required to reverse complement the index", ), - ] = 2, + ] = 4.0, force_rc: Annotated[ IndexOrientations, typer.Option( "--force_rc", "-p", - help="Force reverse complementing the I5 and/or I7 indices. This will disregard lenient mode.", + help="Force reverse complementing the I5 and/or I7 indices. If set to anything other than 'original' this will disregard lenient mode.", ), - ] = IndexOrientations.default, + ] = None, ont_barcodes: Annotated[ bool, typer.Option( @@ -89,10 +240,17 @@ def run( ] = False, debug: Annotated[bool, typer.Option("--debug", "-d", help="Debug mode")] = False, version: Annotated[ - bool, typer.Option("--version", "-v", help="Print version and quit") + Optional[bool], + typer.Option( + "--version", + "-v", + help="Print version and quit", + is_eager=True, + callback=version_callback, + ), ] = False, ): - """Run anglerfish demux. Now with emojis 💩✨""" + """Run anglerfish. This is the main command for anglerfish""" args = argparse.Namespace( samplesheet=samplesheet, out_fastq=out_fastq, @@ -108,10 +266,12 @@ def run( debug=debug, version=version, ) - utcnow = dt.utcnow() + utcnow = dt.datetime.now(dt.timezone.utc) runname = utcnow.strftime(f"{args.run_name}_%Y_%m_%d_%H%M%S") - assert os.path.exists(args.out_fastq) - assert os.path.exists(args.samplesheet) + assert os.path.exists(args.out_fastq), f"Output folder '{args.out_fastq}' not found" + assert os.path.exists( + args.samplesheet + ), f"Samplesheet file '{args.samplesheet}' not found, please provide a valid path when using the --samplesheet option." args.out_fastq = os.path.join(os.path.abspath(args.out_fastq), runname) args.samplesheet = os.path.abspath(args.samplesheet) args.run_name = runname diff --git a/anglerfish/explore/cli.py b/anglerfish/explore/cli.py deleted file mode 100644 index 1cceab5..0000000 --- a/anglerfish/explore/cli.py +++ /dev/null @@ -1,100 +0,0 @@ -import click - -from anglerfish.explore.explore import run_explore - - -@click.command() -@click.option("-f", "--fastq", required=True, help="Fastq file to align") -@click.option("-o", "--outdir", required=True, help="Output directory") -@click.option( - "-t", - "--threads", - default=4, - type=int, - help="Number of threads specified to minimap2", -) -@click.option( - "-e", - "--use-existing", - is_flag=True, - help="Use existing alignments if found in the specified output directory.", -) -@click.option( - "-g", - "--good_hit_threshold", - default=0.9, - type=float, - help="Fraction of adaptor bases immediately before and immediately after index insert required to match perfectly for a hit to be considered a good hit (default=0.9).", -) -@click.option( - "-i", - "--insert_thres_low", - default=4, - type=int, - help="Lower threshold for index(+UMI) insert length, with value included (deafult=4).", -) -@click.option( - "-j", - "--insert_thres_high", - default=30, - type=int, - help="Upper threshold for index(+UMI) insert length, with value included (default=30).", -) -@click.option( - "-B", - "--minimap_b", - default=4, - type=int, - help="Minimap2 -B parameter, mismatch penalty (default=4).", -) -@click.option( - "-m", - "--min_hits_per_adaptor", - default=50, - type=int, - help="Minimum number of good hits for an adaptor to be included in the analysis (default=50).", -) -@click.option( - "-u", - "--umi_threshold", - default=11, - type=float, - help="Minimum number of bases in insert to perform entropy calculation (default=11).", -) -@click.option( - "-k", - "--kmer_length", - default=2, - type=int, - help="Length of k-mers to use for entropy calculation (default=2).", -) -def main( - fastq, - outdir, - threads, - use_existing, - good_hit_threshold, - insert_thres_low, - insert_thres_high, - minimap_b, - min_hits_per_adaptor, - umi_threshold, - kmer_length, -): - run_explore( - fastq, - outdir, - threads, - use_existing, - good_hit_threshold, - insert_thres_low, - insert_thres_high, - minimap_b, - min_hits_per_adaptor, - umi_threshold, - kmer_length, - ) - - -if __name__ == "__main__": - main()