diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 24d41b55..76fa33c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,4 +139,4 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --include ALL --exclude busco_btk + nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --include ALL --exclude btk_busco diff --git a/bin/ascc_merge_tables.py b/bin/ascc_merge_tables.py index 278ce409..6045600e 100755 --- a/bin/ascc_merge_tables.py +++ b/bin/ascc_merge_tables.py @@ -5,7 +5,7 @@ Script for merging contaminant check results into one table Version: {VERSION} --- -Written by Eerik Aunin +Written by Eerik Anuin Re-Written by Damon-Lee Pointon (dp24/DLBPointon) """ @@ -35,7 +35,7 @@ def parse_args(): parser.add_argument("-ud", "--uniprot_diamond", type=str, help="Uniprot Diamond file") parser.add_argument("-cv", "--contigviz", type=str, help="Contigviz file") parser.add_argument("-btk", "--blobtoolkit", type=str, help="Blobtoolkit file") - parser.add_argument("-bb", "--busco_btk", type=str, help="Busco Blobtoolkit file") + parser.add_argument("-bb", "--btk_busco", type=str, help="Busco Blobtoolkit file") parser.add_argument("-fg", "--fcs_gx", type=str, help="FCS_GX file") parser.add_argument("-n", "--sample_name", type=str, help="Name for the sample") parser.add_argument("-m", "--markerscan", type=str, help="MarkerScan file") @@ -284,7 +284,7 @@ def main(args): paths_dict["cobiontid_markerscan"] = args.markerscan paths_dict["contigviz"] = args.contigviz paths_dict["blobtoolkit"] = args.blobtoolkit - paths_dict["btk_busco"] = args.busco_btk + paths_dict["btk_busco"] = args.btk_busco paths_dict["fcs_gx"] = args.fcs_gx required_files = ["gc_content"] diff --git a/bin/autofilter.py b/bin/autofilter.py index 93849f67..8c1dc4e4 100755 --- a/bin/autofilter.py +++ b/bin/autofilter.py @@ -42,9 +42,9 @@ def parse_args(): help="Path to the assembly_autofiltered.fasta file", default="autofiltered.fasta", ) - parser.add_argument( - "-c", "--fcs_gx_and_tiara_summary", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file" - ) + # parser.add_argument( + # "-c", "--fcs_gx_and_tiara_summary", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file" + # ) parser.add_argument( "-r", "--rejected_seq", @@ -56,6 +56,13 @@ def parse_args(): parser.add_argument( "-n", "--ncbi_rankedlineage_path", type=str, help="Path to the rankedlineage.dmp of NCBI taxonomy" ) + parser.add_argument( + "--tiara_action_mode", + type=str, + choices=["warn", "remove"], + default="warn", + help="Action when Tiara detects a putative contaminant that is not reported as a contaminant by FCS-GX. The choices are 'warn' (print a warning) or 'remove' (remove this sequence from the assembly). Default: warn", + ) parser.add_argument("-v", "--version", action="version", version=VERSION) return parser.parse_args() @@ -179,7 +186,7 @@ def main(): tiara_results_path = args.tiara fcs_gx_summary_path = args.fcsgx_summary filtered_assembly_path = args.output_auto_filtered - combined_summary = args.fcs_gx_and_tiara_summary + # combined_summary = args.fcs_gx_and_tiara_summary excluded_seq_list_path = args.rejected_seq ncbi_rankedlist = args.ncbi_rankedlineage_path @@ -187,7 +194,7 @@ def main(): for i in [ncbi_rankedlist, tiara_results_path, fcs_gx_summary_path, assembly_path]: if not os.path.isfile(i): - sys.stderr.write(f"{i} WAS NOT AT THE EXPECTED LOCATION\n") + sys.stderr.write(f"{i} was not at the expected location\n") sys.exit(1) target_domain = get_domain_from_taxid(args.taxid, ncbi_rankedlist) @@ -207,8 +214,12 @@ def main(): tiara_action = tiara_action_dict[scaff] combined_action = fcs_gx_action if fcs_gx_action == "NA" and tiara_action == "EXCLUDE": - combined_action = "EXCLUDE" - combined_action_source = "Tiara" + if args.tiara_action_mode == "remove": + combined_action = "EXCLUDE" + combined_action_source = "Tiara" + elif args.tiara_action_mode == "warn": + combined_action = "WARN" + combined_action_source = "Tiara" if fcs_gx_action == "EXCLUDE" and tiara_action == "EXCLUDE": combined_action_source = "FCS-GX_and_Tiara" if combined_action == "EXCLUDE": diff --git a/modules/local/ascc_merge_tables.nf b/modules/local/ascc_merge_tables.nf index da7d59b3..2dea7aa2 100644 --- a/modules/local/ascc_merge_tables.nf +++ b/modules/local/ascc_merge_tables.nf @@ -2,10 +2,10 @@ process ASCC_MERGE_TABLES { tag "$meta.id" label 'process_low' - conda "conda-forge::python=3.9" + conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9' : - 'biocontainers/python:3.9' }" + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'quay.io/biocontainers/pandas:1.5.2' }" input: tuple val(meta), path(gc_content, stageAs: "GC.txt") diff --git a/modules/local/merge_btk_datasets.nf b/modules/local/merge_btk_datasets.nf index 707c33ab..7a818013 100644 --- a/modules/local/merge_btk_datasets.nf +++ b/modules/local/merge_btk_datasets.nf @@ -9,7 +9,7 @@ process MERGE_BTK_DATASETS { input: tuple val(meta), path(create_btk_datasets) - tuple val(meta2), path(busco_btk_datasets) + tuple val(meta2), path(btk_busco_datasets) output: tuple val(meta), path("merged_datasets"), emit: merged_datasets @@ -29,7 +29,7 @@ process MERGE_BTK_DATASETS { merge_btk_datasets.py \\ -m $create_btk_datasets \\ -o ./merged_datasets \\ - -b $busco_btk_datasets \\ + -b $btk_busco_datasets \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index fec146c7..f1dd1523 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -32,7 +32,7 @@ process SANGER_TOL_BTK { def profiles = task.ext.profiles ?: "" def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET" def btk_config = btk_config_file ? "-c $btk_config_file" : "" - def pipeline_version = task.ext.version ?: "main" + def pipeline_version = task.ext.version ?: "draft_assemblies" // YAML used to avoid the use of GCA accession number // https://github.com/sanger-tol/blobtoolkit/issues/77 @@ -42,8 +42,6 @@ process SANGER_TOL_BTK { // outdir should be an arg - // --accession draft \\ - // blastx and blastp use the same database hence the StageAs @@ -76,7 +74,7 @@ process SANGER_TOL_BTK { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def pipeline_version = task.ext.version ?: "main" + def pipeline_version = task.ext.version ?: "draft_assemblies" """ mkdir -p ${prefix}_btk_out/blobtoolkit/$gca_accession diff --git a/workflows/ascc.nf b/workflows/ascc.nf index 2318b58c..e4a4ec4b 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -79,6 +79,8 @@ workflow ASCC { include_workflow_steps = params.include ? params.include.split(",") : "" exclude_workflow_steps = params.exclude ? params.exclude.split(",") : "" + btk_busco_run_mode = params.btk_busco_run_mode ? params.btk_busco_run_mode : "conditional" + full_list = ["kmers", "tiara", "coverage", "nt_blast", "nr_diamond", "uniprot_diamond", "kraken", "fcs-gx", "fcs-adaptor", "vecscreen", "btk_busco", "pacbio_barcodes", "organellar_blast", "autofilter_assembly", "ALL", ""] if (!full_list.containsAll(include_workflow_steps) && !full_list.containsAll(exclude_workflow_steps)) { @@ -290,7 +292,7 @@ workflow ASCC { // // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA // - if ( include_workflow_steps.contains('barcodes') || include_workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('pacbio_barcodes') || include_workflow_steps.contains('ALL') ) { PACBIO_BARCODE_CHECK ( YAML_INPUT.out.reference_tuple, YAML_INPUT.out.pacbio_tuple, @@ -315,7 +317,7 @@ workflow ASCC { // // SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE // - if ( include_workflow_steps.contains('coverage') || include_workflow_steps.contains('busco_btk') || include_workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('coverage') || include_workflow_steps.contains('btk_busco') || include_workflow_steps.contains('ALL') ) { RUN_READ_COVERAGE ( YAML_INPUT.out.reference_tuple, YAML_INPUT.out.assembly_path, @@ -372,12 +374,12 @@ workflow ASCC { modified_input, YAML_INPUT.out.diamond_nr_database_path ) - nt_full = NUCLEOT_DIAMOND.out.reformed.map{it[1]} - nt_hits = NUCLEOT_DIAMOND.out.hits_file.map{it[1]} + nr_full = NUCLEOT_DIAMOND.out.reformed.map{it[1]} + nr_hits = NUCLEOT_DIAMOND.out.hits_file.map{it[1]} ch_versions = ch_versions.mix(NUCLEOT_DIAMOND.out.versions) } else { - nt_hits = [] - nt_full = [] + nr_hits = [] + nr_full = [] } // @@ -411,7 +413,7 @@ workflow ASCC { ch_kraken1, ch_kraken2, ch_kraken3, - nt_full, + nr_full, un_full, YAML_INPUT.out.ncbi_taxonomy_path.first() ) @@ -449,8 +451,7 @@ workflow ASCC { // WE ARE USING THE PIPELINE HERE AS A MODULE THIS REQUIRES IT // TO BE USED AS A AN INTERACTIVE JOB ON WHAT EVER EXECUTOR YOU ARE USING. // This will also eventually check for the above run_btk boolean from autofilter - if ( !exclude_workflow_steps.contains("busco_btk") && include_workflow_steps.contains('busco_btk') && include_workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("busco_btk") && include_workflow_steps.contains('ALL') ) { - + if ( !exclude_workflow_steps.contains("btk_busco") && include_workflow_steps.contains('btk_busco') && btk_busco_run_mode == "conditional" && include_workflow_steps.contains("autofilter_assembly") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("btk_busco") && include_workflow_steps.contains('ALL') || btk_busco_run_mode == "mandatory" && !exclude_workflow_steps.contains('btk_busco') && include_workflow_steps.contains('btk_busco') ) { YAML_INPUT.out.reference_tuple .combine(ch_bam) .map{ meta, ref, bam -> @@ -505,7 +506,7 @@ workflow ASCC { ch_kraken3, // FROM -- RUN_NT_KRAKEN.out.lineage.map{it[1]} ch_nt_blast, // FROM -- EXTRACT_NT_BLAST.out.ch_blast_hits.map{it[1]} ch_kmers, // FROM -- GET_KMERS_PROFILE.out.combined_csv - nt_hits, // FROM -- NUCLEOT_DIAMOND.out.reformed.map{it[1]} + nr_hits, // FROM -- NUCLEOT_DIAMOND.out.reformed.map{it[1]} un_hits, // FROM -- UNIPROT_DIAMOND.out.reformed.map{it[1]} [], // <-- MARKER SCAN -- NOT IN PIPELINE YET [], // <-- CONTIGVIZ -- NOT IN PIPELINE YET