From 7e6eae70ea9412dbffdbac0d2a674cc19bb67c05 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 12 Jul 2024 14:46:23 +0100 Subject: [PATCH] Updates, changes to flags and ci to allow for turning off btk --- .github/workflows/ci.yml | 2 +- assets/test.yaml | 4 +-- workflows/ascc.nf | 53 +++++++++++++--------------------------- 3 files changed, 20 insertions(+), 39 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5505fcc2..86bc8d40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,4 +139,4 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --steps ALL + nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --include ALL --exclude btk diff --git a/assets/test.yaml b/assets/test.yaml index 3f6c1f4b..850c2c2e 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -14,8 +14,8 @@ dimensionality_reduction_methods: "pca,random_trees" # "pca,umap,t-sne,isomap,lle_standard,lle_hessian,lle_modified,mds,se,random_trees,kernel_pca,pca_svd,autoencoder_sigmoid,autoencoder_linear,autoencoder_selu,autoencoder_relu,nmf" nt_database: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_blast_tiny_testdb/blastdb/ nt_database_prefix: tiny_plasmodium_blastdb.fa -nt_kraken_db_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/nt/nt -ncbi_accessionids_folder: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/ncbi_taxonomy/20230509_accession2taxid/ +nt_kraken_db_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/kraken2/kraken2/ +ncbi_accessionids_folder: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/20240709_tiny_accession2taxid/ ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-08-27/lineages diff --git a/workflows/ascc.nf b/workflows/ascc.nf index c4a93e56..4f348079 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -76,7 +76,8 @@ workflow ASCC { ch_versions = Channel.empty() ch_out_merge = Channel.empty() - workflow_steps = params.steps.split(",") + include_workflow_steps = params.include ? params.include.split(",") : "" + exclude_workflow_steps = params.exclude ? params.exclude.split(",") : "" input_ch = Channel.fromPath(params.input, checkIfExists: true) @@ -132,7 +133,7 @@ workflow ASCC { // SUBWORKFLOW: COUNT KMERS, THEN REDUCE DIMENSIONS USING SELECTED METHODS // - if ( workflow_steps.contains('kmers') || workflow_steps.contains('ALL')) { + if ( include_workflow_steps.contains('kmers') || include_workflow_steps.contains('ALL')) { GENERATE_GENOME.out.reference_tuple .map { meta, file -> @@ -160,7 +161,7 @@ workflow ASCC { // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA // - if ( workflow_steps.contains('tiara') || workflow_steps.contains('ALL')) { + if ( include_workflow_steps.contains('tiara') || include_workflow_steps.contains('ALL')) { EXTRACT_TIARA_HITS ( GENERATE_GENOME.out.reference_tuple ) @@ -173,7 +174,7 @@ workflow ASCC { // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST // - if ( workflow_steps.contains('nt_blast') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('nt_blast') || include_workflow_steps.contains('ALL') ) { // // NOTE: ch_nt_blast needs to be set in two places incase it // fails during the run @@ -193,7 +194,7 @@ workflow ASCC { ch_nt_blast = [] } - if ( workflow_steps.contains('mito') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('mito') || include_workflow_steps.contains('ALL') ) { // // LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH // @@ -219,7 +220,7 @@ workflow ASCC { ch_mito = [] } - if ( workflow_steps.contains('chloro') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('chloro') || include_workflow_steps.contains('ALL') ) { // // LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH @@ -248,7 +249,7 @@ workflow ASCC { // // SUBWORKFLOW: // - if ( workflow_steps.contains('fcs_adapt') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('fcs_adapt') || include_workflow_steps.contains('ALL') ) { RUN_FCSADAPTOR ( YAML_INPUT.out.reference_tuple ) @@ -266,7 +267,7 @@ workflow ASCC { // // SUBWORKFLOW: // - if ( workflow_steps.contains('fcsgx') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('fcsgx') || include_workflow_steps.contains('ALL') ) { RUN_FCSGX ( YAML_INPUT.out.reference_tuple, YAML_INPUT.out.fcs_gx_database_path, @@ -283,7 +284,7 @@ workflow ASCC { // // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA // - if ( workflow_steps.contains('barcodes') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('barcodes') || include_workflow_steps.contains('ALL') ) { PACBIO_BARCODE_CHECK ( YAML_INPUT.out.reference_tuple, YAML_INPUT.out.pacbio_tuple, @@ -308,7 +309,7 @@ workflow ASCC { // // SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE // - if ( workflow_steps.contains('coverage') || workflow_steps.contains('busco_btk') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('coverage') || include_workflow_steps.contains('busco_btk') || include_workflow_steps.contains('ALL') ) { RUN_READ_COVERAGE ( YAML_INPUT.out.reference_tuple, YAML_INPUT.out.assembly_path, @@ -326,7 +327,7 @@ workflow ASCC { // // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS // - if ( workflow_steps.contains('vecscreen') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('vecscreen') || include_workflow_steps.contains('ALL') ) { RUN_VECSCREEN ( GENERATE_GENOME.out.reference_tuple, YAML_INPUT.out.vecscreen_database_path @@ -340,7 +341,7 @@ workflow ASCC { // // SUBWORKFLOW: Run the kraken classifier // - if ( workflow_steps.contains('kraken') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('kraken') || include_workflow_steps.contains('ALL') ) { RUN_NT_KRAKEN( GENERATE_GENOME.out.reference_tuple, YAML_INPUT.out.nt_kraken_db_path, @@ -360,7 +361,7 @@ workflow ASCC { // // SUBWORKFLOW: DIAMOND BLAST FOR INPUT ASSEMBLY // - if ( workflow_steps.contains('nt_diamond') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('nt_diamond') || include_workflow_steps.contains('ALL') ) { NUCLEOT_DIAMOND ( modified_input, YAML_INPUT.out.diamond_nr_database_path @@ -377,7 +378,7 @@ workflow ASCC { // SUBWORKFLOW: DIAMOND BLAST FOR INPUT ASSEMBLY // //qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids sscinames sskingdoms sphylums salltitles - if ( workflow_steps.contains('uniprot_diamond') || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('uniprot_diamond') || include_workflow_steps.contains('ALL') ) { UNIPROT_DIAMOND ( modified_input, YAML_INPUT.out.diamond_uniprot_database_path @@ -414,7 +415,7 @@ workflow ASCC { // // MODULE: AUTOFILTER ASSEMBLY BY TIARA AND FCSGX RESULTS // - if ( workflow_steps.contains('tiara') && workflow_steps.contains('fcsgx') && workflow_steps.contains("autofilter") || workflow_steps.contains('ALL') ) { + if ( include_workflow_steps.contains('tiara') && include_workflow_steps.contains('fcsgx') && include_workflow_steps.contains("autofilter") || include_workflow_steps.contains('ALL') ) { AUTOFILTER_AND_CHECK_ASSEMBLY ( YAML_INPUT.out.reference_tuple, EXTRACT_TIARA_HITS.out.ch_tiara, @@ -442,7 +443,7 @@ workflow ASCC { // WE ARE USING THE PIPELINE HERE AS A MODULE THIS REQUIRES IT // TO BE USED AS A AN INTERACTIVE JOB ON WHAT EVER EXECUTOR YOU ARE USING. // This will also eventually check for the above run_btk boolean from autofilter - if ( workflow_steps.contains('busco_btk') && workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || workflow_steps.contains('ALL') ) { + if ( !exclude_workflow_steps.contains("btk") && include_workflow_steps.contains('busco_btk') && include_workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("btk") && include_workflow_steps.contains('ALL') ) { YAML_INPUT.out.reference_tuple .combine(ch_bam) @@ -483,26 +484,6 @@ workflow ASCC { } - // - // SUBWORKFLOW: MERGES DATA THAT IS NOT USED IN THE CREATION OF THE BTK_DATASETS FOLDER - // -/* ASCC_MERGE_TABLES ( - GC_CONTENT.out.txt, // FROM -- GC_COVERAGE.out.tsv - ch_coverage, // FROM -- RUN_COVERAGE.out.tsv.map{it[1]} - ch_tiara, // FROM -- TIARA_TIARA.out.classifications.map{it[1]} - [], // <-- BACTERIAL KRAKEN -- NOT IN PIPELINE YET - ch_kraken3, // FROM -- RUN_NT_KRAKEN.out.lineage.map{it[1]} - ch_nt_blast, // FROM -- EXTRACT_NT_BLAST.out.ch_blast_hits.map{it[1]} - ch_kmers, // FROM -- GET_KMERS_PROFILE.out.combined_csv - nt_hits, // FROM -- NUCLEOT_DIAMOND.out.reformed.map{it[1]} - un_hits, // FROM -- UNIPROT_DIAMOND.out.reformed.map{it[1]} - [], // <-- MARKER SCAN -- NOT IN PIPELINE YET - [], // <-- CONTIGVIZ -- NOT IN PIPELINE YET - CREATE_BTK_DATASET.out.create_summary.map{it[1]}, - [], // <-- BUSCO_BTK -- NOT IN PIPELINE YET - ch_fcsgx // FROM -- PARSE_FCSGX_RESULT.out.fcsgxresult.map{it[1]} - ) */ - // // SUBWORKFLOW: Collates version data from prior subworflows //