Fixing reviewer comments

sanger-tol · Jul 18, 2024 · cf544bb · cf544bb
1 parent 7e6eae7
commit cf544bb
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 62 deletions.
diff --git a/bin/autofilter.py b/bin/autofilter.py
@@ -13,7 +13,7 @@
 
 Script for filtering the assembly to
 remove putative contaminants based on
-FGCS-GX and Tiara results.
+FCS-GX and Tiara results.
 -------------------------------------
 
 """
@@ -32,17 +32,19 @@ def parse_args():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description=textwrap.dedent(DESCRIPTION),
     )
-    parser.add_argument("fasta", type=str, help="Path to the fasta assembly file")
-    parser.add_argument("-t", "--tiara", type=str, help="Path to the tiara summary file")
-    parser.add_argument("-s", "--fcsgx_sum", type=str, help="Path to the fcs-gx_summary.csv file")
+    parser.add_argument("fasta", type=str, help="Path to the assembly FASTA file")
+    parser.add_argument("-t", "--tiara", type=str, help="Path to the Tiara summary file")
+    parser.add_argument("-s", "--fcsgx_summary", type=str, help="Path to the fcs-gx_summary.csv file")
     parser.add_argument(
         "-o",
         "--output_auto_filtered",
         type=str,
         help="Path to the assembly_autofiltered.fasta file",
         default="autofiltered.fasta",
     )
-    parser.add_argument("-c", "--combined_sum", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file")
+    parser.add_argument(
+        "-c", "--fcs_gx_and_tiara_summary", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file"
+    )
     parser.add_argument(
         "-r",
         "--rejected_seq",
@@ -175,9 +177,9 @@ def main():
 
     assembly_path = args.fasta
     tiara_results_path = args.tiara
-    fcs_gx_summary_path = args.fcsgx_sum
+    fcs_gx_summary_path = args.fcsgx_summary
     filtered_assembly_path = args.output_auto_filtered
-    combined_summary = args.combined_sum
+    combined_summary = args.fcs_gx_and_tiara_summary
     excluded_seq_list_path = args.rejected_seq
     ncbi_rankedlist = args.ncbi_rankedlineage_path
 

diff --git a/bin/generate_samplesheet.py b/bin/generate_samplesheet.py
@@ -5,7 +5,9 @@
 import argparse
 
 """
-A simple script to generate csv file
+A simple script to generate a csv file required for the sanger-tol/blobtoolkit pipeline-module.
+
+Required input include the sample ID and the mapped BAM file generated with PacBio data and input FASTA assembly
 
 Written by Damon-Lee Pointon (dp24/DLBPointon)
 """
@@ -14,7 +16,11 @@
 def parse_args():
     parser = argparse.ArgumentParser(description="Generate a csv file for BTK")
     parser.add_argument("sample_name", type=str, help="Name of sample")
-    parser.add_argument("pacbio_path", type=str, help="Path containing the pacbio files")
+    parser.add_argument(
+        "mapped_bam_file",
+        type=str,
+        help="Path containing the mapped BAM generated with PacBio data and the ASCC input assembly",
+    )
     parser.add_argument("-v", "--version", action="version", version="1.0.0")
     return parser.parse_args()
 
@@ -25,8 +31,8 @@ def main():
     data_list = []
 
     data_list.append("sample,datatype,datafile\n")
-    if args.pacbio_path.endswith(".bam"):
-        data_list.append(f"{args.sample_name},pacbio,{args.pacbio_path}\n")
+    if args.mapped_bam_file.endswith(".bam"):
+        data_list.append(f"{args.sample_name},pacbio,{args.mapped_bam_file}\n")
     else:
         sys.exit("I was expecting a mapped BAM file")
 

diff --git a/bin/process_result_tables.py b/bin/process_result_tables.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 """
 Script processing the cobiont check result tables to add a combined classification ('merged_classif') column that is based
     on the output of multiple tools. Also generates a table for estimated coverages per 'merged_classif' column

diff --git a/conf/test_full.config b/conf/test_full.config
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
@@ -16,12 +16,12 @@ process SANGER_TOL_BTK {
     val gca_accession
 
     output:
-    path("${meta.id}_btk_out/blobtoolkit/plots"),                   emit: btk_plots
-    path("${meta.id}_btk_out/blobtoolkit/draft"),                   emit: btk_dataset
-    path("${meta.id}_btk_out/blobtoolkit/draft/summary.json.gz"),   emit: btk_summary
-    path("${meta.id}_btk_out/busco"),                               emit: btk_busco
-    path("${meta.id}_btk_out/multiqc"),                             emit: btk_multiqc
-    path("blobtoolkit_pipeline_info"),                              emit: btk_pipeline
+    path("${meta.id}_btk_out/blobtoolkit/plots"),                   emit: plots
+    path("${meta.id}_btk_out/blobtoolkit/draft"),                   emit: dataset
+    path("${meta.id}_btk_out/blobtoolkit/draft/summary.json.gz"),   emit: summary_json
+    path("${meta.id}_btk_out/busco"),                               emit: busco_data
+    path("${meta.id}_btk_out/multiqc"),                             emit: multiqc_report
+    path("blobtoolkit_pipeline_info"),                              emit: pipeline_info
 
     script:
     def prefix              =   task.ext.prefix         ?:  "${meta.id}"

diff --git a/workflows/ascc.nf b/workflows/ascc.nf
@@ -391,22 +391,22 @@ workflow ASCC {
         un_full         = []
     }
 
-    ch_got_genome = GENERATE_GENOME.out.dot_genome.map{it[1]}
+    ch_dot_genome = GENERATE_GENOME.out.dot_genome.map{it[1]}
 
     CREATE_BTK_DATASET (
-        GENERATE_GENOME.out.reference_tuple.first(),
-        ch_got_genome,
+        GENERATE_GENOME.out.reference_tuple,
+        ch_dot_genome,
         ch_kmers,
-        ch_tiara.first(),
+        ch_tiara,
         ch_nt_blast,
-        ch_fcsgx.first(),
-        ch_bam.first(),
-        ch_coverage.first(),
-        ch_kraken1.first(),
-        ch_kraken2.first(),
-        ch_kraken3.first(),
-        nt_hits.first(),
-        un_hits.first(),
+        ch_fcsgx,
+        ch_bam,
+        ch_coverage,
+        ch_kraken1,
+        ch_kraken2,
+        ch_kraken3,
+        nt_hits,
+        un_hits,
         YAML_INPUT.out.ncbi_taxonomy_path.first()
     )
     //ch_versions                 = ch_versions.mix(CREATE_BTK_DATASET.out.versions)
@@ -443,7 +443,7 @@ workflow ASCC {
     //              WE ARE USING THE PIPELINE HERE AS A MODULE THIS REQUIRES IT
     //              TO BE USED AS A AN INTERACTIVE JOB ON WHAT EVER EXECUTOR YOU ARE USING.
     //              This will also eventually check for the above run_btk boolean from autofilter
-    if ( !exclude_workflow_steps.contains("btk") && include_workflow_steps.contains('busco_btk') && include_workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("btk") && include_workflow_steps.contains('ALL') ) {
+    if ( !exclude_workflow_steps.contains("busbo_btk") && include_workflow_steps.contains('busco_btk') && include_workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("busco_btk") && include_workflow_steps.contains('ALL') ) {
 
         YAML_INPUT.out.reference_tuple
             .combine(ch_bam)
@@ -477,7 +477,7 @@ workflow ASCC {
 
         MERGE_BTK_DATASETS (
             CREATE_BTK_DATASET.out.btk_datasets,
-            SANGER_TOL_BTK.out.btk_dataset
+            SANGER_TOL_BTK.out.dataset
         )
         //ch_versions              = ch_versions.mix(MERGE_BTK_DATASETS.out.versions)