Merge pull request #683 from mskcc/develop

Release 1.0.1
mskcc · Nov 1, 2019 · b2b05bf · b2b05bf
2 parents 0a84024 + d70bbd3
commit b2b05bf
Show file tree

Hide file tree

Showing 7 changed files with 44 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=4kBAQAEEc39zo9ACoThH&branch=master)](https://travis-ci.com/mskcc/tempo)
+[![Build Status](https://travis-ci.com/mskcc/tempo.svg?token=DokCkCiDp43sqzeuXUHD&branch=master)](https://travis-ci.com/mskcc/tempo)
 
 # Tempo 
 

diff --git a/conf/resources_aws.config b/conf/resources_aws.config
@@ -27,15 +27,15 @@
   }
   withName:MarkDuplicates {
     cpus = { 2 }
-    memory = { 8.GB }
+    memory = { 10.GB + task.attempt * 2 }
   }
   withName:CreateRecalibrationTable {
     cpus = { 8 }
-    memory = { 24.GB }
+    memory = { 24.GB + task.attempt * 2 }
   }
   withName:RecalibrateBam {
     cpus = { 8 }
-    memory = { 24.GB }
+    memory = { 24.GB + task.attempt * 2 }
   }
   withName:Alfred {
     cpus = { 1 }

diff --git a/conf/resources_aws_genome.config b/conf/resources_aws_genome.config
@@ -2,7 +2,7 @@
  * -------------------------------------------------
  * AWS/genome resource config
  * -------------------------------------------------
- * Process resources for analyzing genomes on Juno
+ * Process resources for analyzing genomes on AWS
  * -------------------------------------------------
  */
 
@@ -27,15 +27,15 @@
   }
   withName:MarkDuplicates {
     cpus = { 2 }
-    memory = { 10.GB }
+    memory = { 10.GB + task.attempt * 2 }
   }
   withName:CreateRecalibrationTable {
     cpus = { 8 }
-    memory = { 24.GB }
+    memory = { 24.GB + task.attempt * 2 }
   }
   withName:RecalibrateBam {
     cpus = { 8 }
-    memory = { 24.GB }
+    memory = { 24.GB + task.attempt * 2 }
   }
   withName:Alfred {
     cpus = { 1 }

diff --git a/conf/resources_juno.config b/conf/resources_juno.config
@@ -27,15 +27,15 @@
   }
   withName:MarkDuplicates {
     cpus = { 2 }
-    memory = { 5.GB }
+    memory = { 5.GB + task.attempt }
   }
   withName:CreateRecalibrationTable {
     cpus = { 4 }
-    memory = { 6.GB }
+    memory = { 6.GB + task.attempt }
   }
   withName:RecalibrateBam {
     cpus = { 4 }
-    memory = { 6.GB }
+    memory = { 6.GB + task.attempt }
   }
   withName:Alfred {
     cpus = { 1 }

diff --git a/conf/resources_juno_genome.config b/conf/resources_juno_genome.config
@@ -27,16 +27,15 @@
   }
   withName:MarkDuplicates {
     cpus = { 2 }
-    memory = { 5.GB }
-    time = { 32.h * task.attempt }
+    memory = { 5.GB + task.attempt }
   }
   withName:CreateRecalibrationTable {
     cpus = { 8 }
-    memory = { 3.GB }
+    memory = { 3.GB + task.attempt }
   }
   withName:RecalibrateBam {
     cpus = { 8 }
-    memory = { 3.GB }
+    memory = { 3.GB + task.attempt }
   }
   withName:Alfred {
     cpus = { 1 }

diff --git a/docs/variant-annotation-and-filtering.md b/docs/variant-annotation-and-filtering.md
@@ -1,5 +1,11 @@
 # Variant Annotation and Filtering
 
+:::tip Note
+Hard-coded filter thresholds are viewable and editable in the configuration files here:
+[* Exomes](../conf/exome.config)
+[* Genomes](../conf/genome.config)
+:::
+
 ::: warning Be aware
 * These components of the pipeline are subject to constant change.
 * Users should be aware of the pitfalls and challenges of filtering somatic variant calls, which are not further discussed here.

diff --git a/pipeline.nf b/pipeline.nf
@@ -332,10 +332,7 @@ if (!params.bam_pairing) {
   process AlignReads {
     tag {idSample + "@" + fileID}   // The tag directive allows you to associate each process executions with a custom label
 
-    publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.html"
-    if (publishAll) { 
-      publishDir "${params.outDir}/qc/fastp/json", mode: params.publishDirMode, pattern: "*.json" 
-    }
+    publishDir "${params.outDir}/qc/fastp/${idSample}", mode: params.publishDirMode, pattern: "*.{html,json}"
 
     input:
       set idSample, fileID, file(fastqFile1), sizeFastqFile1, file(fastqFile2), sizeFastqFile2, assay, targetFile, rgID from fastqFiles
@@ -364,6 +361,9 @@ if (!params.bam_pairing) {
         task.time = task.exitStatus != 140 ? { 6.h } : { 72.h }
       }
     }
+
+    // if it's the last time to try, use 72h as time limit no matter for what reason it failed before
+    task.time = task.attempt < 3 ? task.time : { 72.h }
 
     // mem --- total size of the FASTQ pairs in MB (max memory `samtools sort` can take advantage of)
     // memDivider --- If mem_per_core is true, use 1. Else, use task.cpus
@@ -453,7 +453,7 @@ if (!params.bam_pairing) {
       file ("${idSample}.bam.metrics") into markDuplicatesReport
 
     script:
-    if (workflow.profile == "juno" && params.assayType == "exome") {
+    if (workflow.profile == "juno") {
       if(bam.size() > 120.GB) {
         task.time = { 72.h }
       }
@@ -464,8 +464,14 @@ if (!params.bam_pairing) {
         task.time = task.exitStatus != 140 ? { 6.h } : { 72.h }
       }
     }
+    // if it's the last time to try, use 72h as time limit no matter for what reason it failed before
+    task.time = task.attempt < 3 ? task.time : { 72.h }
+
     memMultiplier = params.mem_per_core ? task.cpus : 1
-    maxMem = (memMultiplier * task.memory.toString().split(" ")[0].toInteger() - 3)
+
+    // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
+    originalMem = task.attempt ==1 ? task.memory : originalMem
+    maxMem = (memMultiplier * originalMem.toString().split(" ")[0].toInteger() - 3)
     maxMem = maxMem < 4 ? 5 : maxMem
     javaOptions = "--java-options '-Xms4000m -Xmx" + maxMem + "g'"
     """
@@ -531,7 +537,10 @@ if (!params.bam_pairing) {
     }
 
     memMultiplier = params.mem_per_core ? task.cpus : 1
-    javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
+    // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
+    originalMem = task.attempt ==1 ? task.memory : originalMem
+    javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
+
     knownSites = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
     """
     gatk \
@@ -590,8 +599,11 @@ if (!params.bam_pairing) {
       task.memory = { 6.GB }
       task.time = { 72.h }
     }
+
     memMultiplier = params.mem_per_core ? task.cpus : 1
-    javaOptions = "--java-options '-Xmx" + task.memory.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
+    // when increase memory requested from system every time it retries, keep java Xmx steady, in order to give more memory for java garbadge collection
+    originalMem = task.attempt ==1 ? task.memory : originalMem
+    javaOptions = "--java-options '-Xmx" + originalMem.toString().split(" ")[0].toInteger() * memMultiplier + "g'"
     """
     echo -e "${idSample}\t${bam.size()}" > file-size.txt
     gatk \
@@ -760,7 +772,7 @@ if (!params.bam_pairing) {
       file("${idSample}.alfred*tsv.gz.pdf") into bamsQcPdfs
 
     script:
-    if (workflow.profile == "juno" && params.assayType == "exome") {
+    if (workflow.profile == "juno") {
       if (bam.size() > 200.GB) {
         task.time = { 72.h }
       }
@@ -1841,8 +1853,9 @@ process SomaticFacetsAnnotation {
     set idTumor, idNormal, target, file(purity_rdata), file(purity_cncf), file(hisens_cncf), facetsPath, file(maf) from facetsMafFileSomatic
 
   output:
-    set idTumor, idNormal, target, file("${outputPrefix}.facets.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs
+    set idTumor, idNormal, target, file("${outputPrefix}.facets.zygosity.maf"), file("${outputPrefix}.armlevel.unfiltered.txt") into FacetsAnnotationOutputs
     set file("${outputPrefix}.armlevel.unfiltered.txt"), file("${outputPrefix}.genelevel.unfiltered.txt") into FacetsArmGeneOutputs
+    file("file-size.txt") into mafSize
 
   when: tools.containsAll(["facets", "mutect2", "manta", "strelka2"]) && runSomatic
 
@@ -1872,6 +1885,7 @@ process SomaticFacetsAnnotation {
   sed -i -e s@${idTumor}@${outputPrefix}@g ${outputPrefix}.armlevel.unfiltered.txt
 
   Rscript --no-init-file /usr/bin/annotate-with-zygosity-somatic.R ${outputPrefix}.facets.maf ${outputPrefix}.facets.zygosity.maf
+  echo -e "${outputPrefix}\t`wc -l ${outputPrefix}.facets.zygosity.maf | cut -d ' ' -f1`" > file-size.txt
   """
 }
 
@@ -1905,7 +1919,6 @@ process RunNeoantigen {
 
   output:
     set idTumor, idNormal, target, file("${outputDir}/*") into neoantigenOut
-    file("file-size.txt") into mafSize
     file("${idTumor}__${idNormal}.all_neoantigen_predictions.txt") into NetMhcStatsOutput
     file("${outputDir}/*.maf") into NeoantigenMafOutput
 
@@ -1930,7 +1943,6 @@ process RunNeoantigen {
   tmpDir = "${outputDir}-tmp"
   tmpDirFullPath = "\$PWD/${tmpDir}/"  // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace
   """
-  echo -e "${outputPrefix}\t`wc -l ${mafFile} | cut -d ' ' -f1`" > file-size.txt
   export TMPDIR=${tmpDirFullPath}
   mkdir -p ${tmpDir}
   chmod 777 ${tmpDir}