Skip to content

Commit

Permalink
Merge pull request #683 from mskcc/develop
Browse files Browse the repository at this point in the history
Release 1.0.1
  • Loading branch information
gongyixiao authored Nov 1, 2019
2 parents 0a84024 + d70bbd3 commit b2b05bf
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=4kBAQAEEc39zo9ACoThH&branch=master)](https://travis-ci.com/mskcc/tempo)
[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=DokCkCiDp43sqzeuXUHD&branch=master)](https://travis-ci.com/mskcc/tempo)

# Tempo

Expand Down
6 changes: 3 additions & 3 deletions conf/resources_aws.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
}
withName:MarkDuplicates {
cpus = { 2 }
memory = { 8.GB }
memory = { 10.GB + task.attempt * 2 }
}
withName:CreateRecalibrationTable {
cpus = { 8 }
memory = { 24.GB }
memory = { 24.GB + task.attempt * 2 }
}
withName:RecalibrateBam {
cpus = { 8 }
memory = { 24.GB }
memory = { 24.GB + task.attempt * 2 }
}
withName:Alfred {
cpus = { 1 }
Expand Down
8 changes: 4 additions & 4 deletions conf/resources_aws_genome.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* -------------------------------------------------
* AWS/genome resource config
* -------------------------------------------------
* Process resources for analyzing genomes on Juno
* Process resources for analyzing genomes on AWS
* -------------------------------------------------
*/

Expand All @@ -27,15 +27,15 @@
}
withName:MarkDuplicates {
cpus = { 2 }
memory = { 10.GB }
memory = { 10.GB + task.attempt * 2 }
}
withName:CreateRecalibrationTable {
cpus = { 8 }
memory = { 24.GB }
memory = { 24.GB + task.attempt * 2 }
}
withName:RecalibrateBam {
cpus = { 8 }
memory = { 24.GB }
memory = { 24.GB + task.attempt * 2 }
}
withName:Alfred {
cpus = { 1 }
Expand Down
6 changes: 3 additions & 3 deletions conf/resources_juno.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
}
withName:MarkDuplicates {
cpus = { 2 }
memory = { 5.GB }
memory = { 5.GB + task.attempt }
}
withName:CreateRecalibrationTable {
cpus = { 4 }
memory = { 6.GB }
memory = { 6.GB + task.attempt }
}
withName:RecalibrateBam {
cpus = { 4 }
memory = { 6.GB }
memory = { 6.GB + task.attempt }
}
withName:Alfred {
cpus = { 1 }
Expand Down
7 changes: 3 additions & 4 deletions conf/resources_juno_genome.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@
}
withName:MarkDuplicates {
cpus = { 2 }
memory = { 5.GB }
time = { 32.h * task.attempt }
memory = { 5.GB + task.attempt }
}
withName:CreateRecalibrationTable {
cpus = { 8 }
memory = { 3.GB }
memory = { 3.GB + task.attempt }
}
withName:RecalibrateBam {
cpus = { 8 }
memory = { 3.GB }
memory = { 3.GB + task.attempt }
}
withName:Alfred {
cpus = { 1 }
Expand Down
6 changes: 6 additions & 0 deletions docs/variant-annotation-and-filtering.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Variant Annotation and Filtering

:::tip Note
Hard-coded filter thresholds are viewable and editable in the configuration files here:
[* Exomes](../conf/exome.config)
[* Genomes](../conf/genome.config)
:::

::: warning Be aware
* These components of the pipeline are subject to constant change.
* Users should be aware of the pitfalls and challenges of filtering somatic variant calls, which are not further discussed here.
Expand Down
36 changes: 24 additions & 12 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,7 @@ if (!params.bam_pairing) {
process AlignReads {
tag {idSample + "@" + fileID} // The tag directive allows you to associate each process executions with a custom label

publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.html"
if (publishAll) {
publishDir "${params.outDir}/qc/fastp/json", mode: params.publishDirMode, pattern: "*.json"
}
publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.{html,json}"

input:
set idSample, fileID, file(fastqFile1), sizeFastqFile1, file(fastqFile2), sizeFastqFile2, assay, targetFile, rgID from fastqFiles
Expand Down Expand Up @@ -364,6 +361,9 @@ if (!params.bam_pairing) {
task.time = task.exitStatus != 140 ? { 6.h } : { 72.h }
}
}

// if it's the last time to try, use 72h as time limit no matter for what reason it failed before
task.time = task.attempt < 3 ? task.time : { 72.h }

// mem --- total size of the FASTQ pairs in MB (max memory `samtools sort` can take advantage of)
// memDivider --- If mem_per_core is true, use 1. Else, use task.cpus
Expand Down Expand Up @@ -453,7 +453,7 @@ if (!params.bam_pairing) {
file ("${idSample}.bam.metrics") into markDuplicatesReport

script:
if (workflow.profile == "juno" && params.assayType == "exome") {
if (workflow.profile == "juno") {
if(bam.size() > 120.GB) {
task.time = { 72.h }
}
Expand All @@ -464,8 +464,14 @@ if (!params.bam_pairing) {
task.time = task.exitStatus != 140 ? { 6.h } : { 72.h }
}
}
// if it's the last time to try, use 72h as time limit no matter for what reason it failed before
task.time = task.attempt < 3 ? task.time : { 72.h }

memMultiplier = params.mem_per_core ? task.cpus : 1
maxMem = (memMultiplier * task.memory.toString().split(" ")[0].toInteger() - 3)

// when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
originalMem = task.attempt ==1 ? task.memory : originalMem
maxMem = (memMultiplier * originalMem.toString().split(" ")[0].toInteger() - 3)
maxMem = maxMem < 4 ? 5 : maxMem
javaOptions = "--java-options '-Xms4000m -Xmx" + maxMem + "g'"
"""
Expand Down Expand Up @@ -531,7 +537,10 @@ if (!params.bam_pairing) {
}

memMultiplier = params.mem_per_core ? task.cpus : 1
javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
// when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
originalMem = task.attempt ==1 ? task.memory : originalMem
javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'"

knownSites = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
"""
gatk \
Expand Down Expand Up @@ -590,8 +599,11 @@ if (!params.bam_pairing) {
task.memory = { 6.GB }
task.time = { 72.h }
}

memMultiplier = params.mem_per_core ? task.cpus : 1
javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
// when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
originalMem = task.attempt ==1 ? task.memory : originalMem
javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
"""
echo -e "${idSample}\t${bam.size()}" > file-size.txt
gatk \
Expand Down Expand Up @@ -760,7 +772,7 @@ if (!params.bam_pairing) {
file("${idSample}.alfred*tsv.gz.pdf") into bamsQcPdfs

script:
if (workflow.profile == "juno" && params.assayType == "exome") {
if (workflow.profile == "juno") {
if (bam.size() > 200.GB) {
task.time = { 72.h }
}
Expand Down Expand Up @@ -1841,8 +1853,9 @@ process SomaticFacetsAnnotation {
set idTumor, idNormal, target, file(purity_rdata), file(purity_cncf), file(hisens_cncf), facetsPath, file(maf) from facetsMafFileSomatic

output:
set idTumor, idNormal, target, file("${outputPrefix}.facets.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs
set idTumor, idNormal, target, file("${outputPrefix}.facets.zygosity.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs
set file("${outputPrefix}.armlevel.unfiltered.txt"), file("${outputPrefix}.genelevel.unfiltered.txt") into FacetsArmGeneOutputs
file("file-size.txt") into mafSize

when: tools.containsAll(["facets", "mutect2", "manta", "strelka2"]) && runSomatic

Expand Down Expand Up @@ -1872,6 +1885,7 @@ process SomaticFacetsAnnotation {
sed -i -e s@${idTumor}@${outputPrefix}@g ${outputPrefix}.armlevel.unfiltered.txt
Rscript --no-init-file /usr/bin/annotate-with-zygosity-somatic.R ${outputPrefix}.facets.maf ${outputPrefix}.facets.zygosity.maf
echo -e "${outputPrefix}\t`wc -l ${outputPrefix}.facets.zygosity.maf | cut -d ' ' -f1`" > file-size.txt
"""
}

Expand Down Expand Up @@ -1905,7 +1919,6 @@ process RunNeoantigen {

output:
set idTumor, idNormal, target, file("${outputDir}/*") into neoantigenOut
file("file-size.txt") into mafSize
file("${idTumor}__${idNormal}.all_neoantigen_predictions.txt") into NetMhcStatsOutput
file("${outputDir}/*.maf") into NeoantigenMafOutput

Expand All @@ -1930,7 +1943,6 @@ process RunNeoantigen {
tmpDir = "${outputDir}-tmp"
tmpDirFullPath = "\$PWD/${tmpDir}/" // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace
"""
echo -e "${outputPrefix}\t`wc -l ${mafFile} | cut -d ' ' -f1`" > file-size.txt
export TMPDIR=${tmpDirFullPath}
mkdir -p ${tmpDir}
chmod 777 ${tmpDir}
Expand Down

0 comments on commit b2b05bf

Please sign in to comment.