diff --git a/main.nf b/main.nf index 6ba27ae..41ea7f8 100644 --- a/main.nf +++ b/main.nf @@ -177,7 +177,7 @@ process cmsearch_orientations { """ python3 -c "from Bio import SeqIO;import gzip;SeqIO.write(SeqIO.parse(gzip.open('${R1}', 'rt'), 'fastq'), 'R1.fa', 'fasta')" - cmsearch -E 10.0 --cpu 32 --hmmonly --noali --tblout scores.txt ${model} R1.fa + cmsearch -E 10.0 --cpu ${params.nproc} --hmmonly --noali --tblout scores.txt ${model} R1.fa split_reads.py --counts counts.csv --cmsearch scores.txt ${sampleid} ${R1} ${R2} """ } @@ -198,7 +198,7 @@ process vsearch_orientations { """ python3 -c "from Bio import SeqIO;import gzip;SeqIO.write(SeqIO.parse(gzip.open('${R1}', 'rt'), 'fastq'), 'R1.fa', 'fasta')" - vsearch --usearch_global R1.fa --db ${library} --id 0.75 --query_cov 0.8 --strand both --threads 32 --top_hits_only --userfields query+qstrand --userout hits.tsv + vsearch --usearch_global R1.fa --db ${library} --id 0.75 --query_cov 0.8 --strand both --threads ${params.nproc} --top_hits_only --userfields query+qstrand --userout hits.tsv split_reads.py --counts counts.csv --vsearch hits.tsv ${sampleid} ${R1} ${R2} """ } @@ -271,7 +271,7 @@ process learn_errors { process dada_dereplicate { // NOTE: sequences in reverse orientation are reverse complemented to forward orientation for clustering - label 'med_cpu_mem' + label 'c5d_2xlarge' input: tuple val(sampleid), val(batch), val(orientation), path(R1), path(R2), path(model) @@ -291,16 +291,17 @@ process dada_dereplicate { """ dada2_dada.R ${R1} ${R2} \ + --counts counts.csv \ + --data dada.rds \ --errors ${model} \ - --sampleid ${sampleid} \ + --nthreads ${params.nproc} \ --orientation ${orientation} \ + --overlaps overlaps.csv \ --params ${dada_params} \ - --data dada.rds \ + --sampleid ${sampleid} \ --seqtab seqtab.csv \ --seqtab-r1 seqtab_r1.csv \ - --seqtab-r2 seqtab_r2.csv \ - --counts counts.csv \ - --overlaps overlaps.csv + --seqtab-r2 seqtab_r2.csv get_unmerged.R dada.rds \ --forward-seqs unmerged_F.fasta \ --reverse-seqs unmerged_R.fasta diff --git a/nextflow.config b/nextflow.config index 6f8b954..2ced88a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,6 +9,7 @@ params { // Docker image is hosted on github container = "ghcr.io/nhoffman/dada2-nf:2.0.3" min_reads = 1 + nproc = 4 work_dir = "work" } @@ -32,6 +33,7 @@ profiles { } params { output = 'output' + nproc = 32 } process { container = params.container @@ -43,8 +45,11 @@ profiles { // no more than 2 forks at a time. maxForks = 2 } + withLabel: med_cpu_mem { + maxForks = 2 + } executor{ - queueSize = 16 + queueSize = 32 } } @@ -56,6 +61,7 @@ profiles { } params { output = 'output' + nproc = 32 } process { container = params.container @@ -74,6 +80,7 @@ profiles { resume = true // null for no resume params { output = 'output' + nproc = 32 } process { container = params.container @@ -92,6 +99,10 @@ profiles { uw_batch { workDir = 's3://molmicro-data/nextflow-workdir/dada2-nf' + params { + output = 'output' + nproc = 32 + } process { scratch = "/docker_scratch" queue = 'molmicro-queue' @@ -130,6 +141,7 @@ profiles { } params { output = 'output' + nproc = 4 } process { container = params.container @@ -139,8 +151,14 @@ profiles { errorStrategy = 'retry' maxRetries = 3 } + withLabel: c5d_2xlarge { + // Meant to match the AWS Batch cd5.9xlarge which has 32 cpus. For + // local execution cpus must be configured at the application and + // no more than 2 forks at a time. + maxForks = 4 + } executor{ - queueSize = 4 + queueSize = 14 } } @@ -150,6 +168,7 @@ profiles { resume = true params { output = 'output' + nproc = 32 } process { executor = 'awsbatch'