diff --git a/modules/local/bin_summary/environment.yml b/modules/local/bin_summary/environment.yml new file mode 100644 index 0000000..d4d242d --- /dev/null +++ b/modules/local/bin_summary/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-base=4.4 + - conda-forge::r-tidyverse=2.0.0 diff --git a/modules/local/bin_summary/main.nf b/modules/local/bin_summary/main.nf new file mode 100644 index 0000000..af8351d --- /dev/null +++ b/modules/local/bin_summary/main.nf @@ -0,0 +1,17 @@ +process BIN_SUMMARY { + label "process_low" + + conda "${moduleDir}/environment.yml" + container "docker.io/rocker/tidyverse:4.4" + + input: + tuple val(meta), path(stats), path(checkm2), path(taxonomy) + + output: + tuple val(meta), path("bin_summary.tsv") + + script: + """ + echo test > bin_summary.tsv + """ +} diff --git a/subworkflows/local/bin_qc.nf b/subworkflows/local/bin_qc.nf index cc4f569..7acdbb2 100644 --- a/subworkflows/local/bin_qc.nf +++ b/subworkflows/local/bin_qc.nf @@ -37,7 +37,7 @@ workflow BIN_QC { ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions) emit: - checkm_tsv = ch_checkm2_tsv - stats = SEQKIT_STATS.out.stats - versions = ch_versions + checkm2_tsv = ch_checkm2_tsv + stats = SEQKIT_STATS.out.stats + versions = ch_versions } diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf index 7437162..39bab3d 100644 --- a/subworkflows/local/binning.nf +++ b/subworkflows/local/binning.nf @@ -36,7 +36,7 @@ workflow BINNING { ch_versions = ch_versions.mix(GAWK_MAXBIN2_DEPTHS.out.versions) ch_maxbin2_input = assemblies - | combine(MAXBIN2_DEPTHS.out.output, by: 0) + | combine(GAWK_MAXBIN2_DEPTHS.out.output, by: 0) | map { meta, contigs, depths -> [meta, contigs, [], depths] } diff --git a/workflows/longreadmag.nf b/workflows/longreadmag.nf index 6034a6f..68cce29 100644 --- a/workflows/longreadmag.nf +++ b/workflows/longreadmag.nf @@ -14,7 +14,8 @@ include { BINNING } from '../subworkflows/local/binn include { BIN_QC } from '../subworkflows/local/bin_qc.nf' include { BIN_TAXONOMY } from '../subworkflows/local/bin_taxonomy' include { BIN_REFINEMENT } from '../subworkflows/local/bin_refinement' -include { CONTIG2BIN2FASTA as BINS_TO_PROTEIN } from '../modules/local/contig2bin2fasta/main' +include { BIN_SUMMARY } from '../modules/local/bin_summary' +include { CONTIG2BIN2FASTA as BINS_TO_PROTEIN } from '../modules/local/contig2bin2fasta' include { PREPARE_DATA } from '../subworkflows/local/prepare_data' include { READ_MAPPING } from '../subworkflows/local/read_mapping' @@ -113,14 +114,36 @@ workflow LONGREADMAG { if(params.enable_binqc) { BIN_QC(ch_bins, ch_aa_bins) ch_versions = ch_versions.mix(BIN_QC.out.versions) - ch_checkm2_tsv = BIN_QC.out.checkm_tsv - } else { - ch_checkm2_tsv = Channel.empty() - } - if(params.enable_taxonomy) { - BIN_TAXONOMY(ch_aa_bins, ch_checkm2_tsv) - ch_versions = ch_versions.mix(BIN_TAXONOMY.out.versions) + if(params.enable_taxonomy) { + BIN_TAXONOMY(ch_aa_bins, BIN_QC.out.checkm2_tsv) + ch_versions = ch_versions.mix(BIN_TAXONOMY.out.versions) + + ch_taxonomy_tsv = BIN_TAXONOMY.out.gtdb_ncbi + } else { + ch_taxonomy_tsv = Channel.empty() + } + + if(params.enable_summary) { + ch_stats_collated = BIN_QC.out.stats + | map { meta, stats -> [ meta.subMap('id'), stats] } + | groupTuple(by: 0) + + ch_checkm2_collated = BIN_QC.out.checkm2_tsv + | map { meta, stats -> [ meta.subMap('id'), stats] } + | groupTuple(by: 0) + + ch_taxonomy_collated = ch_taxonomy_tsv + | map { meta, stats -> [ meta.subMap('id'), stats] } + | groupTuple(by: 0) + + ch_bin_summary_input = ch_stats_collated + | combine(ch_checkm2_collated, by: 0) + | combine(ch_taxonomy_collated, by: 0) + + BIN_SUMMARY(ch_bin_summary_input) + ch_versions = ch_versions.mix(BIN_SUMMARY.out.versions) + } } } }