Skip to content

Commit

Permalink
Merge branch 'release/1.23.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Walt Shands committed Aug 15, 2018
2 parents d40a057 + e598097 commit 7a5954a
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 75 deletions.
4 changes: 2 additions & 2 deletions CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2

workflow CRAMMd5sumChecker {
File inputCRAMFile
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker



Expand Down
31 changes: 25 additions & 6 deletions aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ workflow TopMedAligner {
File dbSNP_vcf
File dbSNP_vcf_index

Int? PreAlign_CPUs
Int PreAlign_CPUs_default = select_first([PreAlign_CPUs, 2])

Int? Align_CPUs
Int Align_CPUs_default = select_first([Align_CPUs, 32])

Int? PostAlign_CPUs
Int PostAlign_CPUs_default = select_first([PostAlign_CPUs, 2])

# Optional input to increase all disk sizes in case of outlier sample with strange size behavior
Int? increase_disk_size

Expand Down Expand Up @@ -66,7 +75,8 @@ workflow TopMedAligner {
disk_size = ref_size + (bwa_disk_multiplier * cram_size) + (sort_sam_disk_multiplier * cram_size) + cram_size + additional_disk + fastq_gz_files_size,
docker_image = docker_image,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index
ref_fasta_index = ref_fasta_index,
PreAlign_CPUs_default = PreAlign_CPUs_default
}

call Align {
Expand All @@ -84,7 +94,9 @@ workflow TopMedAligner {
ref_amb = ref_amb,
ref_sa = ref_sa,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index
ref_fasta_index = ref_fasta_index,
Align_CPUs_default = Align_CPUs_default

}

Float CRAMS_files_size = fastq_gz_to_CRAM_multiplier * cram_size
Expand All @@ -102,7 +114,8 @@ workflow TopMedAligner {
ref_fasta_index = ref_fasta_index,

dbSNP_vcf = dbSNP_vcf,
dbSNP_vcf_index = dbSNP_vcf_index
dbSNP_vcf_index = dbSNP_vcf_index,
PostAlign_CPUs_default = PostAlign_CPUs_default
}

output {
Expand All @@ -120,6 +133,8 @@ workflow TopMedAligner {
File ref_fasta
File ref_fasta_index

Int PreAlign_CPUs_default

# Assign a basename to the intermediate files
String pre_output_base = "pre_output_base"

Expand Down Expand Up @@ -153,7 +168,7 @@ workflow TopMedAligner {
}
runtime {
memory: "10 GB"
cpu: "32"
cpu: sub(PreAlign_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand All @@ -178,6 +193,8 @@ workflow TopMedAligner {
File ref_fasta
File ref_fasta_index

Int Align_CPUs_default

# We have to use a trick to make Cromwell
# skip substitution when using the bash ${<variable} syntax
# This is necessary to get the <var>=$(<command>) sub shell
Expand Down Expand Up @@ -231,7 +248,7 @@ workflow TopMedAligner {
}
runtime {
memory: "10 GB"
cpu: "32"
cpu: sub(Align_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand All @@ -250,6 +267,8 @@ task PostAlign {

Array[File] input_cram_files

Int PostAlign_CPUs_default

# We have to use a trick to make Cromwell
# skip substitution when using the bash ${<variable} syntax
# This is necessary to get the <var>=$(<command>) sub shell
Expand Down Expand Up @@ -307,7 +326,7 @@ task PostAlign {
}
runtime {
memory: "10 GB"
cpu: "32"
cpu: sub(PostAlign_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
class: CommandLineTool
cwlVersion: v1.0
id: >-
vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/topmed_freeze3_calling/29
vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_topmed_freeze3_calling_29
baseCommand: []
inputs:
- id: bam_cram_file
type: File
inputBinding:
position: 1
shellQuote: false
valueFrom: |-
${
return ''
}
label: BAM/CRAM Files
secondaryFiles:
- |-
Expand Down Expand Up @@ -109,9 +102,7 @@ arguments:
}
var comm = "cat "

for (var i = 0; i < inputs.index_files.length; i++) {
comm += inputs.index_files[i].path + " ";
}
comm += inputs.index_files.path + " ";
comm += "> trio_data.index && "
/*
for (var i = 0; i < inputs.bam_cram_files.length; i++) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
class: CommandLineTool
cwlVersion: v1.0
id: vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/verifybamid_cwl1/10
id: vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_verifybamid_cwl1_10
baseCommand: []
inputs:
- id: bam_cram_file
type: File
inputBinding:
position: 1
valueFrom: |-
${
return ''
}
label: BAM/CRAM Files
doc: Bam or Cram file for the sample
secondaryFiles:
Expand All @@ -20,15 +14,6 @@ inputs:
}
- id: reference
type: File
inputBinding:
position: 1
shellQuote: false
valueFrom: |-
${

return ""

}
label: Reference
doc: Reference file
secondaryFiles:
Expand Down Expand Up @@ -210,4 +195,3 @@ requirements:
};
$namespaces:
sbg: 'https://sevenbridges.com'

58 changes: 29 additions & 29 deletions variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
{
"bam_cram_file": {
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram",
"secondaryFiles": [
{
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai"
}
]
},
"reference": {
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa",
"secondaryFiles" : [
{
"path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai",
"class" : "File"
}
]
},
"reference_genome": "hg38",
"chromosomes": ["20"],
"discover_unit": 20000000,
"genotype_unit": 1000000,
"num_of_jobs": 4,
"reference_file":{
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz"
}
"bam_cram_file": {
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram",
"secondaryFiles": [
{
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai"
}
]
},
"reference": {
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa",
"secondaryFiles" : [
{
"path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai",
"class" : "File"
}
]
},
"reference_genome": "hg38",
"chromosomes": ["20"],
"discover_unit": 20000000,
"genotype_unit": 1000000,
"num_of_jobs": 4,
"reference_file":{
"class": "File",
"path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ outputs:
outputSource:
- topmed_freeze3_calling/vcf_output
'sbg:fileTypes': GZ
type: 'File[]?'
type: File?
'sbg:x': 421.19287109375
'sbg:y': -622.8525390625
- id: vcf_index_output
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker

workflow checkerWorkflow {
File inputTruthVCFFile
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ workflow calulateDNAContamination {
File ref_fasta
File ref_fasta_index

Int? CalcContamination_CPUs
Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2])


# Optional input to increase all disk sizes in case of outlier sample with strange size behavior
Int? increase_disk_size

Expand All @@ -35,6 +39,7 @@ workflow calulateDNAContamination {

reference_genome = reference_genome,

CalcContamination_CPUs_default = CalcContamination_CPUs_default,
disk_size = cram_size + reference_size + + additional_disk,
docker_image = docker_image

Expand All @@ -56,6 +61,7 @@ workflow calulateDNAContamination {

String reference_genome

Int CalcContamination_CPUs_default
Float disk_size
String docker_image

Expand Down Expand Up @@ -107,7 +113,7 @@ workflow calulateDNAContamination {
}
runtime {
memory: "10 GB"
cpu: "32"
cpu: sub(CalcContamination_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand Down
22 changes: 18 additions & 4 deletions variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination
import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination

## This is the U of Michigan variant caller workflow WDL for the workflow code located here:
## https://github.com/statgen/topmed_freeze3_calling
Expand All @@ -17,6 +17,14 @@ workflow TopMedVariantCaller {
Boolean? calculate_DNA_contamination
Boolean calculate_contamination = select_first([calculate_DNA_contamination, true])

Int? SumCRAMs_CPUs
Int SumCRAMs_CPUs_default = select_first([SumCRAMs_CPUs, 2])

Int? CalcContamination_CPUs
Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2])

Int? VariantCaller_CPUs
Int VariantCaller_CPUs_default = select_first([VariantCaller_CPUs, 32])

Array[File] input_crai_files
Array[File] input_cram_files
Expand Down Expand Up @@ -157,6 +165,7 @@ workflow TopMedVariantCaller {
input_crams = input_cram_files,
input_crais = input_crai_files,
disk_size = reference_size + additional_disk,
SumCRAMs_CPUs_default = SumCRAMs_CPUs_default,
docker_image = docker_image
}

Expand All @@ -169,7 +178,9 @@ workflow TopMedVariantCaller {
input_crai_file = cram_or_crai_file.right,

ref_fasta = ref_hs38DH_fa,
ref_fasta_index = ref_hs38DH_fa_fai
ref_fasta_index = ref_hs38DH_fa_fai,

CalcContamination_CPUs = CalcContamination_CPUs_default
}
}

Expand All @@ -187,6 +198,7 @@ workflow TopMedVariantCaller {
input_crais = input_crai_files,
input_crams = input_cram_files,
disk_size = sumCRAMSizes.total_size + reference_size + additional_disk,
VariantCaller_CPUs_default = VariantCaller_CPUs_default,
docker_image = docker_image,

ref_1000G_omni2_5_b38_sites_PASS_vcf_gz = ref_1000G_omni2_5_b38_sites_PASS_vcf_gz,
Expand Down Expand Up @@ -259,6 +271,7 @@ workflow TopMedVariantCaller {
task sumCRAMSizes {
Array[File] input_crams
Array[File] input_crais
Int SumCRAMs_CPUs_default
Float disk_size
String docker_image

Expand Down Expand Up @@ -290,7 +303,7 @@ workflow TopMedVariantCaller {
}
runtime {
memory: "10 GB"
cpu: "16"
cpu: sub(SumCRAMs_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand Down Expand Up @@ -320,6 +333,7 @@ workflow TopMedVariantCaller {
Array[File] input_crams
Float disk_size
Int VariantCaller_CPUs_default
String docker_image
File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz
Expand Down Expand Up @@ -634,7 +648,7 @@ workflow TopMedVariantCaller {
}
runtime {
memory: "10 GB"
cpu: "16"
cpu: sub(VariantCaller_CPUs_default, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand Down

0 comments on commit 7a5954a

Please sign in to comment.