Skip to content

Commit

Permalink
Prepare bam file to each individual taxid
Browse files Browse the repository at this point in the history
  • Loading branch information
LilyAnderssonLee committed Oct 24, 2024
1 parent 37ddce2 commit 3ae2e29
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 2 deletions.
65 changes: 63 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ process {
]
}

withName: RM_EMPTY_CENTRIFUGE {
withName: RM_EMPTY_CENTRIFUGE {
publishDir = [
path: { "${params.outdir}/extracted_reads/centrifuge" },
enabled : false
]
}

withName: RM_EMPTY_DIAMOND {
withName: RM_EMPTY_DIAMOND {
publishDir = [
path: { "${params.outdir}/extracted_reads/diamond" },
enabled : false
Expand Down Expand Up @@ -162,6 +162,67 @@ process {
publishDir = [ enabled: false ]
}

withName: '.*TAXID_BAM_SHORTREAD:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "$params.outdir/pathogens/bowtie2/stats/" },
mode: params.publish_dir_mode,
pattern: '*.idxstats'
]
}

withName: 'SUBSET_BAM' {
ext.prefix = { "${meta.id}_${meta.taxid}"}
ext.args = '-bh'
publishDir = [
path: { "$params.outdir/pathogens/taxid_bam/" },
enabled : false
]
}

withName: '.*TAXID_BAM_SHORTREAD:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}_${meta.taxid}_sorted"}
publishDir = [
path: { "$params.outdir/pathogens/taxid_bam/" },
mode: params.publish_dir_mode,
pattern: '*.{bam}'
]
}

withName: '.*TAXID_BAM_SHORTREAD:SAMTOOLS_INDEX' {
ext.prefix = { "${meta.id}_${meta.taxid}_sorted"}
publishDir = [
path: { "$params.outdir/pathogens/taxid_bam/" },
mode: params.publish_dir_mode,
pattern: '*.{bai}'
]
}

withName: '.*TAXID_BAM_LONGREAD:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "$params.outdir/pathogens/minimap2/stats/" },
mode: params.publish_dir_mode,
pattern: '*.idxstats'
]
}

withName: '.*TAXID_BAM_LONGREAD:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}_${meta.taxid}_sorted"}
publishDir = [
path: { "$params.outdir/pathogens/taxid_bam/" },
mode: params.publish_dir_mode,
pattern: '*.{bam}'
]
}

withName: '.*TAXID_BAM_LONGREAD:SAMTOOLS_INDEX' {
ext.prefix = { "${meta.id}_${meta.taxid}_sorted"}
publishDir = [
path: { "$params.outdir/pathogens/taxid_bam/" },
mode: params.publish_dir_mode,
pattern: '*.{bai}'
]
}

withName: MULTIQC {
ext.args = { { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } }
publishDir = [
Expand Down
34 changes: 34 additions & 0 deletions modules/local/subset_bam.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process SUBSET_BAM {

tag "$meta.id"
label 'process_low'

conda "bioconda::samtools:1.21"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0':
'biocontainers/samtools:1.21--h50ea8bc_0' }"

input:
tuple val(meta), path(bam), path(bai)
val taxid_accession

output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}"
def accessions = taxid_accession.join(" ")

"""
samtools view $bam $accessions -o ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
64 changes: 64 additions & 0 deletions subworkflows/local/taxid_bam.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
include { SUBSET_BAM } from '../../modules/local/subset_bam'
include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main'


workflow TAXID_BAM {
take:
bam
bai
accession2taxid

main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

input_bam = bam.combine( bai,by: 0 )
SAMTOOLS_IDXSTATS( input_bam )
ch_accession = SAMTOOLS_IDXSTATS.out.idxstats
.map { it[1] }
.splitCsv( header: false,sep:"\t" )
.filter { it -> it[0]!= "*" }

ch_versions.mix( SAMTOOLS_IDXSTATS.out.versions.first() )

// Load accession2taxid.map
ch_accession2taxidmap = accession2taxid.splitCsv( header: false,sep:"\t" )

ch_accession_taxid = ch_accession2taxidmap
.join( ch_accession )
.filter { it -> it[3] != "0" }
.map { [ it[0], it[1] ] }
.groupTuple( by: 1 )

ch_samtools_view = ch_accession_taxid
.combine(input_bam)
//.view()
.map {accession_list, taxid, meta, bam, bam_index ->
def new_meta = meta.clone()
new_meta.taxid = taxid
return [ new_meta, bam, bam_index, accession_list ]
}
.multiMap {
meta, bam, bam_index, accession_list ->
bam: [meta, bam, bam_index]
accession: accession_list.flatten()
}

SUBSET_BAM ( ch_samtools_view.bam, ch_samtools_view.accession )
ch_versions = ch_versions.mix( SUBSET_BAM.out.versions.first() )

SAMTOOLS_SORT ( SUBSET_BAM.out.bam, [[],[]] )
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())

SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )

emit:
accession = ch_accession
versions = ch_versions
taxid_bam = SAMTOOLS_SORT.out.bam
taxid_bam_bai = SAMTOOLS_INDEX.out.bai

}
11 changes: 11 additions & 0 deletions workflows/metaval.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ include { BOWTIE2_BUILD as BOWTIE2_BUILD_PATHOGEN } from '../modules/nf-core/b
include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main'
include { LONGREAD_SCREENPATHOGEN } from '../subworkflows/local/longread_screenpathogen'

// Calling consensus
include { TAXID_BAM as TAXID_BAM_SHORTREAD } from '../subworkflows/local/taxid_bam'
include { TAXID_BAM as TAXID_BAM_LONGREAD } from '../subworkflows/local/taxid_bam'

// Summary subworkflow
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
Expand Down Expand Up @@ -171,6 +175,13 @@ workflow METAVAL {
// Map long reads to the pathogens genome
LONGREAD_SCREENPATHOGEN ( ch_input.long_reads, ch_reference )
ch_versions = ch_versions.mix( LONGREAD_SCREENPATHOGEN.out.versions )

// Subset bam file for each taxID
accession2taxid_map = Channel.fromPath ( params.accession2taxid, checkIfExists: true )
TAXID_BAM_SHORTREAD ( FASTQ_ALIGN_BOWTIE2.out.bam,FASTQ_ALIGN_BOWTIE2.out.bai,accession2taxid_map )
TAXID_BAM_LONGREAD( LONGREAD_SCREENPATHOGEN.out.bam,LONGREAD_SCREENPATHOGEN.out.bai,accession2taxid_map )

// Calling consensus
}

//
Expand Down

0 comments on commit 3ae2e29

Please sign in to comment.