Skip to content

Commit

Permalink
Merge pull request #7 from Zymo-Research/zliu-issue6
Browse files Browse the repository at this point in the history
add downsampling
  • Loading branch information
zxl124 authored Sep 1, 2023
2 parents 8ba46ca + 4dbf6fb commit 437a52f
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 2 deletions.
7 changes: 6 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Channel.from( summary.collect{ [it.key, it.value] } )
* PROCESS DEFINITION
*/
include { check_design } from "./processes/check_design"
include { downsample } from "./processes/downsample" addParams(
downsample_num: params.downsample_num
)
include { miqscoreShotgun } from "./processes/miqscoreShotgun" addParams(
publish_dir: "${outdir}/miqscoreShotgun",
)
Expand All @@ -64,7 +67,9 @@ workflow {
parse_design(it)
}
.set { input }
miqscoreShotgun(input)
downsample(input)
miqscore_input = params.downsample_num ? downsample.out.reads : input
miqscoreShotgun(miqscore_input)
miqscoreShotgun.out.report.map { "${outdir}/miqscoreShotgun/" + it.getName() }
.collectFile(name: "${outdir}/download_data/file_locations.txt", newLine: true)
.set { output_locations }
Expand Down
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ params {
// Workflow input
design = false

// Input Options
// Downsample
downsample_num = 2000000

// Max resources, expected to be overwritten if run on your own system
max_memory = 60.GB
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@
"type": "object",
"description": "Less common options for the pipeline, typically set in a config file",
"properties": {
"downsample_num": {
"type": "integer",
"description": "Number of read pairs to downsample to. If input has fewer reads than this, downsampling will not happen. Set to 0 to turn this off.",
"minimum": 0,
"default": 2000000
},
"name": {
"type": "string",
"description": "Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic",
Expand Down
42 changes: 42 additions & 0 deletions processes/downsample.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Downsample FASTQ files

process downsample {
container 'quay.io/biocontainers/seqtk:1.4--he4a0461_1'

input:
tuple val(meta), path(reads)

when:
params.downsample_num

output:
tuple val(meta), path("${meta.name}_downsample_*.fastq.gz"), emit: reads

script:
if (meta.single_end) {
"""
readnum=\$((\$(zcat ${reads[0]} | wc -l) / 4))
if ((\$readnum > $params.downsample_num))
then
seqtk sample -s1000 ${reads[0]} $params.downsample_num > ${meta.name}_downsample_R1.fastq
gzip ${meta.name}_downsample_R1.fastq
else
[ ! -f ${meta.name}_downsample_R1.fastq.gz ] && ln -s ${reads[0]} ${meta.name}_downsample_R1.fastq.gz
fi
"""
} else {
"""
readnum=\$((\$(zcat ${reads[0]} | wc -l) / 4))
if ((\$readnum > $params.downsample_num))
then
seqtk sample -s1000 ${reads[0]} $params.downsample_num > ${meta.name}_downsample_R1.fastq
gzip ${meta.name}_downsample_R1.fastq
seqtk sample -s1000 ${reads[1]} $params.downsample_num > ${meta.name}_downsample_R2.fastq
gzip ${meta.name}_downsample_R2.fastq
else
[ ! -f ${meta.name}_downsample_R1.fastq.gz ] && ln -s ${reads[0]} ${meta.name}_downsample_R1.fastq.gz
[ ! -f ${meta.name}_downsample_R2.fastq.gz ] && ln -s ${reads[1]} ${meta.name}_downsample_R2.fastq.gz
fi
"""
}
}

0 comments on commit 437a52f

Please sign in to comment.