diff --git a/README.md b/README.md index c99e7cda0..360549449 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=4kBAQAEEc39zo9ACoThH&branch=master)](https://travis-ci.com/mskcc/tempo) +[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=DokCkCiDp43sqzeuXUHD&branch=master)](https://travis-ci.com/mskcc/tempo) # Tempo diff --git a/conf/resources_aws.config b/conf/resources_aws.config index 13f8879c4..2899ed444 100644 --- a/conf/resources_aws.config +++ b/conf/resources_aws.config @@ -27,15 +27,15 @@ } withName:MarkDuplicates { cpus = { 2 } - memory = { 8.GB } + memory = { 10.GB + task.attempt * 2 } } withName:CreateRecalibrationTable { cpus = { 8 } - memory = { 24.GB } + memory = { 24.GB + task.attempt * 2 } } withName:RecalibrateBam { cpus = { 8 } - memory = { 24.GB } + memory = { 24.GB + task.attempt * 2 } } withName:Alfred { cpus = { 1 } diff --git a/conf/resources_aws_genome.config b/conf/resources_aws_genome.config index ece6f2ab0..8117e93b9 100644 --- a/conf/resources_aws_genome.config +++ b/conf/resources_aws_genome.config @@ -2,7 +2,7 @@ * ------------------------------------------------- * AWS/genome resource config * ------------------------------------------------- - * Process resources for analyzing genomes on Juno + * Process resources for analyzing genomes on AWS * ------------------------------------------------- */ @@ -27,15 +27,15 @@ } withName:MarkDuplicates { cpus = { 2 } - memory = { 10.GB } + memory = { 10.GB + task.attempt * 2 } } withName:CreateRecalibrationTable { cpus = { 8 } - memory = { 24.GB } + memory = { 24.GB + task.attempt * 2 } } withName:RecalibrateBam { cpus = { 8 } - memory = { 24.GB } + memory = { 24.GB + task.attempt * 2 } } withName:Alfred { cpus = { 1 } diff --git a/conf/resources_juno.config b/conf/resources_juno.config index 02674abe2..83fc814cf 100644 --- a/conf/resources_juno.config +++ b/conf/resources_juno.config @@ -27,15 +27,15 @@ } withName:MarkDuplicates { cpus = { 2 } - memory = { 5.GB } + memory = { 5.GB + task.attempt } } withName:CreateRecalibrationTable { cpus = { 4 } - memory = { 6.GB } + memory = { 6.GB + task.attempt } } withName:RecalibrateBam { cpus = { 4 } - memory = { 6.GB } + memory = { 6.GB + task.attempt } } withName:Alfred { cpus = { 1 } diff --git a/conf/resources_juno_genome.config b/conf/resources_juno_genome.config index cd9ed8e15..56619f66a 100644 --- a/conf/resources_juno_genome.config +++ b/conf/resources_juno_genome.config @@ -27,16 +27,15 @@ } withName:MarkDuplicates { cpus = { 2 } - memory = { 5.GB } - time = { 32.h * task.attempt } + memory = { 5.GB + task.attempt } } withName:CreateRecalibrationTable { cpus = { 8 } - memory = { 3.GB } + memory = { 3.GB + task.attempt } } withName:RecalibrateBam { cpus = { 8 } - memory = { 3.GB } + memory = { 3.GB + task.attempt } } withName:Alfred { cpus = { 1 } diff --git a/docs/variant-annotation-and-filtering.md b/docs/variant-annotation-and-filtering.md index ecfce1759..3eb9d47e1 100644 --- a/docs/variant-annotation-and-filtering.md +++ b/docs/variant-annotation-and-filtering.md @@ -1,5 +1,11 @@ # Variant Annotation and Filtering +:::tip Note +Hard-coded filter thresholds are viewable and editable in the configuration files here: +[* Exomes](../conf/exome.config) +[* Genomes](../conf/genome.config) +::: + ::: warning Be aware * These components of the pipeline are subject to constant change. * Users should be aware of the pitfalls and challenges of filtering somatic variant calls, which are not further discussed here. diff --git a/pipeline.nf b/pipeline.nf index feb3eb38e..ca6528b2b 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -332,10 +332,7 @@ if (!params.bam_pairing) { process AlignReads { tag {idSample + "@" + fileID} // The tag directive allows you to associate each process executions with a custom label - publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.html" - if (publishAll) { - publishDir "${params.outDir}/qc/fastp/json", mode: params.publishDirMode, pattern: "*.json" - } + publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.{html,json}" input: set idSample, fileID, file(fastqFile1), sizeFastqFile1, file(fastqFile2), sizeFastqFile2, assay, targetFile, rgID from fastqFiles @@ -364,6 +361,9 @@ if (!params.bam_pairing) { task.time = task.exitStatus != 140 ? { 6.h } : { 72.h } } } + + // if it's the last time to try, use 72h as time limit no matter for what reason it failed before + task.time = task.attempt < 3 ? task.time : { 72.h } // mem --- total size of the FASTQ pairs in MB (max memory `samtools sort` can take advantage of) // memDivider --- If mem_per_core is true, use 1. Else, use task.cpus @@ -453,7 +453,7 @@ if (!params.bam_pairing) { file ("${idSample}.bam.metrics") into markDuplicatesReport script: - if (workflow.profile == "juno" && params.assayType == "exome") { + if (workflow.profile == "juno") { if(bam.size() > 120.GB) { task.time = { 72.h } } @@ -464,8 +464,14 @@ if (!params.bam_pairing) { task.time = task.exitStatus != 140 ? { 6.h } : { 72.h } } } + // if it's the last time to try, use 72h as time limit no matter for what reason it failed before + task.time = task.attempt < 3 ? task.time : { 72.h } + memMultiplier = params.mem_per_core ? task.cpus : 1 - maxMem = (memMultiplier * task.memory.toString().split(" ")[0].toInteger() - 3) + + // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection + originalMem = task.attempt ==1 ? task.memory : originalMem + maxMem = (memMultiplier * originalMem.toString().split(" ")[0].toInteger() - 3) maxMem = maxMem < 4 ? 5 : maxMem javaOptions = "--java-options '-Xms4000m -Xmx" + maxMem + "g'" """ @@ -531,7 +537,10 @@ if (!params.bam_pairing) { } memMultiplier = params.mem_per_core ? task.cpus : 1 - javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'" + // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection + originalMem = task.attempt ==1 ? task.memory : originalMem + javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'" + knownSites = knownIndels.collect{ "--known-sites ${it}" }.join(' ') """ gatk \ @@ -590,8 +599,11 @@ if (!params.bam_pairing) { task.memory = { 6.GB } task.time = { 72.h } } + memMultiplier = params.mem_per_core ? task.cpus : 1 - javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'" + // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection + originalMem = task.attempt ==1 ? task.memory : originalMem + javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'" """ echo -e "${idSample}\t${bam.size()}" > file-size.txt gatk \ @@ -760,7 +772,7 @@ if (!params.bam_pairing) { file("${idSample}.alfred*tsv.gz.pdf") into bamsQcPdfs script: - if (workflow.profile == "juno" && params.assayType == "exome") { + if (workflow.profile == "juno") { if (bam.size() > 200.GB) { task.time = { 72.h } } @@ -1841,8 +1853,9 @@ process SomaticFacetsAnnotation { set idTumor, idNormal, target, file(purity_rdata), file(purity_cncf), file(hisens_cncf), facetsPath, file(maf) from facetsMafFileSomatic output: - set idTumor, idNormal, target, file("${outputPrefix}.facets.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs + set idTumor, idNormal, target, file("${outputPrefix}.facets.zygosity.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs set file("${outputPrefix}.armlevel.unfiltered.txt"), file("${outputPrefix}.genelevel.unfiltered.txt") into FacetsArmGeneOutputs + file("file-size.txt") into mafSize when: tools.containsAll(["facets", "mutect2", "manta", "strelka2"]) && runSomatic @@ -1872,6 +1885,7 @@ process SomaticFacetsAnnotation { sed -i -e s@${idTumor}@${outputPrefix}@g ${outputPrefix}.armlevel.unfiltered.txt Rscript --no-init-file /usr/bin/annotate-with-zygosity-somatic.R ${outputPrefix}.facets.maf ${outputPrefix}.facets.zygosity.maf + echo -e "${outputPrefix}\t`wc -l ${outputPrefix}.facets.zygosity.maf | cut -d ' ' -f1`" > file-size.txt """ } @@ -1905,7 +1919,6 @@ process RunNeoantigen { output: set idTumor, idNormal, target, file("${outputDir}/*") into neoantigenOut - file("file-size.txt") into mafSize file("${idTumor}__${idNormal}.all_neoantigen_predictions.txt") into NetMhcStatsOutput file("${outputDir}/*.maf") into NeoantigenMafOutput @@ -1930,7 +1943,6 @@ process RunNeoantigen { tmpDir = "${outputDir}-tmp" tmpDirFullPath = "\$PWD/${tmpDir}/" // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace """ - echo -e "${outputPrefix}\t`wc -l ${mafFile} | cut -d ' ' -f1`" > file-size.txt export TMPDIR=${tmpDirFullPath} mkdir -p ${tmpDir} chmod 777 ${tmpDir}