Merge branch 'release/1.23.0'

DataBiosphere · Aug 15, 2018 · 7a5954a · 7a5954a
2 parents d40a057 + e598097
commit 7a5954a
Show file tree

Hide file tree

Showing 10 changed files with 89 additions and 75 deletions.
diff --git a/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl b/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl
@@ -1,5 +1,5 @@
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2
 
 workflow CRAMMd5sumChecker {
   File inputCRAMFile

diff --git a/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl b/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl
@@ -1,5 +1,5 @@
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker
 
 
 

diff --git a/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl b/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl
@@ -30,6 +30,15 @@ workflow TopMedAligner {
   File dbSNP_vcf
   File dbSNP_vcf_index
 
+  Int? PreAlign_CPUs
+  Int PreAlign_CPUs_default = select_first([PreAlign_CPUs, 2])
+
+  Int? Align_CPUs
+  Int Align_CPUs_default = select_first([Align_CPUs, 32])
+
+  Int? PostAlign_CPUs
+  Int PostAlign_CPUs_default = select_first([PostAlign_CPUs, 2])
+
   # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
   Int? increase_disk_size
 
@@ -66,7 +75,8 @@ workflow TopMedAligner {
       disk_size = ref_size + (bwa_disk_multiplier * cram_size) + (sort_sam_disk_multiplier * cram_size) + cram_size + additional_disk + fastq_gz_files_size,
       docker_image = docker_image,
       ref_fasta = ref_fasta,
-      ref_fasta_index = ref_fasta_index
+      ref_fasta_index = ref_fasta_index,
+      PreAlign_CPUs_default = PreAlign_CPUs_default
   }
 
   call Align {
@@ -84,7 +94,9 @@ workflow TopMedAligner {
       ref_amb = ref_amb,
       ref_sa = ref_sa,
       ref_fasta = ref_fasta,
-      ref_fasta_index = ref_fasta_index
+      ref_fasta_index = ref_fasta_index,
+      Align_CPUs_default = Align_CPUs_default
+
   }
 
   Float CRAMS_files_size = fastq_gz_to_CRAM_multiplier * cram_size
@@ -102,7 +114,8 @@ workflow TopMedAligner {
       ref_fasta_index = ref_fasta_index,
 
       dbSNP_vcf = dbSNP_vcf,
-      dbSNP_vcf_index = dbSNP_vcf_index
+      dbSNP_vcf_index = dbSNP_vcf_index,
+      PostAlign_CPUs_default = PostAlign_CPUs_default
   }
 
   output {
@@ -120,6 +133,8 @@ workflow TopMedAligner {
      File ref_fasta
      File ref_fasta_index
 
+     Int PreAlign_CPUs_default
+
      # Assign a basename to the intermediate files
      String pre_output_base = "pre_output_base"
 
@@ -153,7 +168,7 @@ workflow TopMedAligner {
     }
    runtime {
       memory: "10 GB"
-      cpu: "32"
+      cpu: sub(PreAlign_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image
@@ -178,6 +193,8 @@ workflow TopMedAligner {
      File ref_fasta
      File ref_fasta_index
 
+     Int Align_CPUs_default
+
      # We have to use a trick to make Cromwell
      # skip substitution when using the bash ${<variable} syntax
      # This is necessary to get the <var>=$(<command>) sub shell
@@ -231,7 +248,7 @@ workflow TopMedAligner {
     }
    runtime {
       memory: "10 GB"
-      cpu: "32"
+      cpu: sub(Align_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image
@@ -250,6 +267,8 @@ task PostAlign {
 
      Array[File] input_cram_files
 
+     Int PostAlign_CPUs_default
+
      # We have to use a trick to make Cromwell
      # skip substitution when using the bash ${<variable} syntax
      # This is necessary to get the <var>=$(<command>) sub shell 
@@ -307,7 +326,7 @@ task PostAlign {
     }
    runtime {
       memory: "10 GB"
-      cpu: "32"
+      cpu: sub(PostAlign_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image

diff --git a/...ant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl b/...ant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl
@@ -1,18 +1,11 @@
 class: CommandLineTool
 cwlVersion: v1.0
 id: >-
-  vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/topmed_freeze3_calling/29
+  vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_topmed_freeze3_calling_29
 baseCommand: []
 inputs:
   - id: bam_cram_file
     type: File
-    inputBinding:
-      position: 1
-      shellQuote: false
-      valueFrom: |-
-        ${
-            return ''
-        }
     label: BAM/CRAM Files
     secondaryFiles:
       - |-
@@ -109,9 +102,7 @@ arguments:
           }
           var comm = "cat "
 
-          for (var i = 0; i < inputs.index_files.length; i++) {
-              comm += inputs.index_files[i].path + " ";
-          }
+          comm += inputs.index_files.path + " ";
           comm += "> trio_data.index && "
           /*
           for (var i = 0; i < inputs.bam_cram_files.length; i++) {

diff --git a/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl b/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl
@@ -1,16 +1,10 @@
 class: CommandLineTool
 cwlVersion: v1.0
-id: vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/verifybamid_cwl1/10
+id: vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_verifybamid_cwl1_10
 baseCommand: []
 inputs:
   - id: bam_cram_file
     type: File
-    inputBinding:
-      position: 1
-      valueFrom: |-
-        ${
-            return ''
-        }
     label: BAM/CRAM Files
     doc: Bam or Cram file for the sample
     secondaryFiles:
@@ -20,15 +14,6 @@ inputs:
         }
   - id: reference
     type: File
-    inputBinding:
-      position: 1
-      shellQuote: false
-      valueFrom: |-
-        ${
-
-            return ""
-
-        }
     label: Reference
     doc: Reference file
     secondaryFiles:
@@ -210,4 +195,3 @@ requirements:
         };
 $namespaces:
   sbg: 'https://sevenbridges.com'
-
diff --git a/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json b/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json
@@ -1,31 +1,31 @@
 {
-    "bam_cram_file": {
-      "class": "File",
-      "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram",
-      "secondaryFiles": [
-        {
-          "class": "File",
-          "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai"
-        }
-      ]
-    },
-    "reference": {
-      "class": "File",
-      "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa",
-      "secondaryFiles" : [
-       {
-          "path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai",
-          "class" : "File"
-       }
-     ]
-    },
-    "reference_genome": "hg38",
-    "chromosomes": ["20"],
-    "discover_unit": 20000000,
-    "genotype_unit": 1000000,
-    "num_of_jobs": 4,
-    "reference_file":{
-      "class": "File",
-      "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz"
-    }
+  "bam_cram_file": {
+    "class": "File",
+    "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram",
+    "secondaryFiles": [
+      {
+        "class": "File",
+        "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai"
+      }
+    ]
+  },
+  "reference": {
+    "class": "File",
+    "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa",
+    "secondaryFiles" : [
+     {
+        "path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai",
+        "class" : "File"
+     }
+   ]
+  },
+  "reference_genome": "hg38",
+  "chromosomes": ["20"],
+  "discover_unit": 20000000,
+  "genotype_unit": 1000000,
+  "num_of_jobs": 4,
+  "reference_file":{
+    "class": "File",
+    "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz"
+  }
 }
diff --git a/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl b/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl
@@ -78,7 +78,7 @@ outputs:
     outputSource:
       - topmed_freeze3_calling/vcf_output
     'sbg:fileTypes': GZ
-    type: 'File[]?'
+    type: File?
     'sbg:x': 421.19287109375
     'sbg:y': -622.8525390625
   - id: vcf_index_output

diff --git a/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl b/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl
@@ -1,5 +1,5 @@
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker
 
 workflow checkerWorkflow {
   File inputTruthVCFFile

diff --git a/variant-caller/variant-caller-wdl/calculate_contamination.wdl b/variant-caller/variant-caller-wdl/calculate_contamination.wdl
@@ -10,6 +10,10 @@ workflow calulateDNAContamination {
   File ref_fasta
   File ref_fasta_index
 
+  Int? CalcContamination_CPUs
+  Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2])
+
+
   # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
   Int? increase_disk_size
 
@@ -35,6 +39,7 @@ workflow calulateDNAContamination {
 
       reference_genome = reference_genome,
 
+      CalcContamination_CPUs_default = CalcContamination_CPUs_default,
       disk_size = cram_size + reference_size +  + additional_disk,
       docker_image = docker_image
 
@@ -56,6 +61,7 @@ workflow calulateDNAContamination {
 
      String reference_genome
 
+     Int CalcContamination_CPUs_default
      Float disk_size
      String docker_image
 
@@ -107,7 +113,7 @@ workflow calulateDNAContamination {
     }
    runtime {
       memory: "10 GB"
-      cpu: "32"
+      cpu: sub(CalcContamination_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image

diff --git a/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl b/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl
@@ -1,4 +1,4 @@
-import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination
+import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination
 
 ## This is the U of Michigan variant caller workflow WDL for the workflow code located here:
 ## https://github.com/statgen/topmed_freeze3_calling
@@ -17,6 +17,14 @@ workflow TopMedVariantCaller {
   Boolean? calculate_DNA_contamination
   Boolean calculate_contamination = select_first([calculate_DNA_contamination, true])
 
+  Int? SumCRAMs_CPUs
+  Int SumCRAMs_CPUs_default = select_first([SumCRAMs_CPUs, 2])
+
+  Int? CalcContamination_CPUs
+  Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2])
+
+  Int? VariantCaller_CPUs
+  Int VariantCaller_CPUs_default = select_first([VariantCaller_CPUs, 32])
 
   Array[File] input_crai_files
   Array[File] input_cram_files
@@ -157,6 +165,7 @@ workflow TopMedVariantCaller {
       input_crams = input_cram_files,
       input_crais = input_crai_files,
       disk_size = reference_size + additional_disk,
+      SumCRAMs_CPUs_default = SumCRAMs_CPUs_default,
       docker_image = docker_image
   }
 
@@ -169,7 +178,9 @@ workflow TopMedVariantCaller {
               input_crai_file = cram_or_crai_file.right,
 
               ref_fasta = ref_hs38DH_fa,
-              ref_fasta_index = ref_hs38DH_fa_fai
+              ref_fasta_index = ref_hs38DH_fa_fai,
+
+              CalcContamination_CPUs = CalcContamination_CPUs_default 
         }
     }
 
@@ -187,6 +198,7 @@ workflow TopMedVariantCaller {
       input_crais = input_crai_files,
       input_crams = input_cram_files,
       disk_size = sumCRAMSizes.total_size + reference_size + additional_disk,
+      VariantCaller_CPUs_default = VariantCaller_CPUs_default,
       docker_image = docker_image,
 
       ref_1000G_omni2_5_b38_sites_PASS_vcf_gz = ref_1000G_omni2_5_b38_sites_PASS_vcf_gz,
@@ -259,6 +271,7 @@ workflow TopMedVariantCaller {
   task sumCRAMSizes {
     Array[File] input_crams
     Array[File] input_crais
+    Int SumCRAMs_CPUs_default
     Float disk_size
     String docker_image
 
@@ -290,7 +303,7 @@ workflow TopMedVariantCaller {
     }
     runtime {
       memory: "10 GB"
-      cpu: "16"
+      cpu: sub(SumCRAMs_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image
@@ -320,6 +333,7 @@ workflow TopMedVariantCaller {
      Array[File] input_crams
 
      Float disk_size
+     Int VariantCaller_CPUs_default
      String docker_image
 
      File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz
@@ -634,7 +648,7 @@ workflow TopMedVariantCaller {
     }
    runtime {
       memory: "10 GB"
-      cpu: "16"
+      cpu: sub(VariantCaller_CPUs_default, "\\..*", "")
       disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
       zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
       docker: docker_image