diff --git a/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl b/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl index 337d972..1f2f7da 100644 --- a/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl +++ b/CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl @@ -1,5 +1,5 @@ -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1 -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2 +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/md5sum/CRAM_md5sum.wdl" as f1 +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/CRAM-no-header-md5sum/checker/CRAM_md5sum_checker.wdl" as f2 workflow CRAMMd5sumChecker { File inputCRAMFile diff --git a/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl b/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl index 23264fe..d045ee7 100644 --- a/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl +++ b/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker.wdl @@ -1,5 +1,5 @@ -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl" as TopMed_aligner +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/aligner/u_of_michigan_aligner-checker/u_of_michigan_aligner_checker_calculation.wdl" as checker diff --git a/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl b/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl index e4af698..805e333 100644 --- a/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl +++ b/aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl @@ -30,6 +30,15 @@ workflow TopMedAligner { File dbSNP_vcf File dbSNP_vcf_index + Int? PreAlign_CPUs + Int PreAlign_CPUs_default = select_first([PreAlign_CPUs, 2]) + + Int? Align_CPUs + Int Align_CPUs_default = select_first([Align_CPUs, 32]) + + Int? PostAlign_CPUs + Int PostAlign_CPUs_default = select_first([PostAlign_CPUs, 2]) + # Optional input to increase all disk sizes in case of outlier sample with strange size behavior Int? increase_disk_size @@ -66,7 +75,8 @@ workflow TopMedAligner { disk_size = ref_size + (bwa_disk_multiplier * cram_size) + (sort_sam_disk_multiplier * cram_size) + cram_size + additional_disk + fastq_gz_files_size, docker_image = docker_image, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index + ref_fasta_index = ref_fasta_index, + PreAlign_CPUs_default = PreAlign_CPUs_default } call Align { @@ -84,7 +94,9 @@ workflow TopMedAligner { ref_amb = ref_amb, ref_sa = ref_sa, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index + ref_fasta_index = ref_fasta_index, + Align_CPUs_default = Align_CPUs_default + } Float CRAMS_files_size = fastq_gz_to_CRAM_multiplier * cram_size @@ -102,7 +114,8 @@ workflow TopMedAligner { ref_fasta_index = ref_fasta_index, dbSNP_vcf = dbSNP_vcf, - dbSNP_vcf_index = dbSNP_vcf_index + dbSNP_vcf_index = dbSNP_vcf_index, + PostAlign_CPUs_default = PostAlign_CPUs_default } output { @@ -120,6 +133,8 @@ workflow TopMedAligner { File ref_fasta File ref_fasta_index + Int PreAlign_CPUs_default + # Assign a basename to the intermediate files String pre_output_base = "pre_output_base" @@ -153,7 +168,7 @@ workflow TopMedAligner { } runtime { memory: "10 GB" - cpu: "32" + cpu: sub(PreAlign_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image @@ -178,6 +193,8 @@ workflow TopMedAligner { File ref_fasta File ref_fasta_index + Int Align_CPUs_default + # We have to use a trick to make Cromwell # skip substitution when using the bash ${=$() sub shell @@ -231,7 +248,7 @@ workflow TopMedAligner { } runtime { memory: "10 GB" - cpu: "32" + cpu: sub(Align_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image @@ -250,6 +267,8 @@ task PostAlign { Array[File] input_cram_files + Int PostAlign_CPUs_default + # We have to use a trick to make Cromwell # skip substitution when using the bash ${=$() sub shell @@ -307,7 +326,7 @@ task PostAlign { } runtime { memory: "10 GB" - cpu: "32" + cpu: sub(PostAlign_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image diff --git a/variant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl b/variant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl index 7943d89..f92fe8e 100644 --- a/variant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl +++ b/variant-caller/sbg-variant-caller-cwl/steps/topmed_freeze3_calling/topmed_freeze3_calling.cwl @@ -1,18 +1,11 @@ class: CommandLineTool cwlVersion: v1.0 id: >- - vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/topmed_freeze3_calling/29 + vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_topmed_freeze3_calling_29 baseCommand: [] inputs: - id: bam_cram_file type: File - inputBinding: - position: 1 - shellQuote: false - valueFrom: |- - ${ - return '' - } label: BAM/CRAM Files secondaryFiles: - |- @@ -109,9 +102,7 @@ arguments: } var comm = "cat " - for (var i = 0; i < inputs.index_files.length; i++) { - comm += inputs.index_files[i].path + " "; - } + comm += inputs.index_files.path + " "; comm += "> trio_data.index && " /* for (var i = 0; i < inputs.bam_cram_files.length; i++) { diff --git a/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl b/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl index 989cde5..6287d5e 100755 --- a/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl +++ b/variant-caller/sbg-variant-caller-cwl/steps/verifybamid/verifybamid.cwl @@ -1,16 +1,10 @@ class: CommandLineTool cwlVersion: v1.0 -id: vladimir_obucina/topmed-freeze-3a-variant-calling-pipeline/verifybamid_cwl1/10 +id: vladimir_obucina_topmed_freeze_3a_variant_calling_pipeline_verifybamid_cwl1_10 baseCommand: [] inputs: - id: bam_cram_file type: File - inputBinding: - position: 1 - valueFrom: |- - ${ - return '' - } label: BAM/CRAM Files doc: Bam or Cram file for the sample secondaryFiles: @@ -20,15 +14,6 @@ inputs: } - id: reference type: File - inputBinding: - position: 1 - shellQuote: false - valueFrom: |- - ${ - - return "" - - } label: Reference doc: Reference file secondaryFiles: @@ -210,4 +195,3 @@ requirements: }; $namespaces: sbg: 'https://sevenbridges.com' - diff --git a/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json b/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json index b91012f..c70dbbd 100755 --- a/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json +++ b/variant-caller/sbg-variant-caller-cwl/topmed_freeze3_calling.json @@ -1,31 +1,31 @@ { - "bam_cram_file": { - "class": "File", - "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram", - "secondaryFiles": [ - { - "class": "File", - "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai" - } - ] - }, - "reference": { - "class": "File", - "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa", - "secondaryFiles" : [ - { - "path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai", - "class" : "File" - } - ] - }, - "reference_genome": "hg38", - "chromosomes": ["20"], - "discover_unit": 20000000, - "genotype_unit": 1000000, - "num_of_jobs": 4, - "reference_file":{ - "class": "File", - "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz" - } + "bam_cram_file": { + "class": "File", + "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram", + "secondaryFiles": [ + { + "class": "File", + "path": "gs://topmed_workflow_testing/topmed_variant_caller/input_files/NWD176325.recab.cram.crai" + } + ] + }, + "reference": { + "class": "File", + "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa", + "secondaryFiles" : [ + { + "path" : "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH.fa.fai", + "class" : "File" + } + ] + }, + "reference_genome": "hg38", + "chromosomes": ["20"], + "discover_unit": 20000000, + "genotype_unit": 1000000, + "num_of_jobs": 4, + "reference_file":{ + "class": "File", + "path": "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/hs38DH-db142-v1.tgz" + } } diff --git a/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl b/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl index 96a3f5d..ea905b4 100755 --- a/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl +++ b/variant-caller/sbg-variant-caller-cwl/topmed_variant_calling_pipeline.cwl @@ -78,7 +78,7 @@ outputs: outputSource: - topmed_freeze3_calling/vcf_output 'sbg:fileTypes': GZ - type: 'File[]?' + type: File? 'sbg:x': 421.19287109375 'sbg:y': -622.8525390625 - id: vcf_index_output diff --git a/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl b/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl index 46cfc52..784149b 100644 --- a/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl +++ b/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.wdl @@ -1,5 +1,5 @@ -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker workflow checkerWorkflow { File inputTruthVCFFile diff --git a/variant-caller/variant-caller-wdl/calculate_contamination.wdl b/variant-caller/variant-caller-wdl/calculate_contamination.wdl index bb756b6..7a56d59 100644 --- a/variant-caller/variant-caller-wdl/calculate_contamination.wdl +++ b/variant-caller/variant-caller-wdl/calculate_contamination.wdl @@ -10,6 +10,10 @@ workflow calulateDNAContamination { File ref_fasta File ref_fasta_index + Int? CalcContamination_CPUs + Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2]) + + # Optional input to increase all disk sizes in case of outlier sample with strange size behavior Int? increase_disk_size @@ -35,6 +39,7 @@ workflow calulateDNAContamination { reference_genome = reference_genome, + CalcContamination_CPUs_default = CalcContamination_CPUs_default, disk_size = cram_size + reference_size + + additional_disk, docker_image = docker_image @@ -56,6 +61,7 @@ workflow calulateDNAContamination { String reference_genome + Int CalcContamination_CPUs_default Float disk_size String docker_image @@ -107,7 +113,7 @@ workflow calulateDNAContamination { } runtime { memory: "10 GB" - cpu: "32" + cpu: sub(CalcContamination_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image diff --git a/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl b/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl index 7c73206..54d8cc4 100644 --- a/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl +++ b/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl @@ -1,4 +1,4 @@ -import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.22.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination +import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.23.0/variant-caller/variant-caller-wdl/calculate_contamination.wdl" as getDNAContamination ## This is the U of Michigan variant caller workflow WDL for the workflow code located here: ## https://github.com/statgen/topmed_freeze3_calling @@ -17,6 +17,14 @@ workflow TopMedVariantCaller { Boolean? calculate_DNA_contamination Boolean calculate_contamination = select_first([calculate_DNA_contamination, true]) + Int? SumCRAMs_CPUs + Int SumCRAMs_CPUs_default = select_first([SumCRAMs_CPUs, 2]) + + Int? CalcContamination_CPUs + Int CalcContamination_CPUs_default = select_first([CalcContamination_CPUs, 2]) + + Int? VariantCaller_CPUs + Int VariantCaller_CPUs_default = select_first([VariantCaller_CPUs, 32]) Array[File] input_crai_files Array[File] input_cram_files @@ -157,6 +165,7 @@ workflow TopMedVariantCaller { input_crams = input_cram_files, input_crais = input_crai_files, disk_size = reference_size + additional_disk, + SumCRAMs_CPUs_default = SumCRAMs_CPUs_default, docker_image = docker_image } @@ -169,7 +178,9 @@ workflow TopMedVariantCaller { input_crai_file = cram_or_crai_file.right, ref_fasta = ref_hs38DH_fa, - ref_fasta_index = ref_hs38DH_fa_fai + ref_fasta_index = ref_hs38DH_fa_fai, + + CalcContamination_CPUs = CalcContamination_CPUs_default } } @@ -187,6 +198,7 @@ workflow TopMedVariantCaller { input_crais = input_crai_files, input_crams = input_cram_files, disk_size = sumCRAMSizes.total_size + reference_size + additional_disk, + VariantCaller_CPUs_default = VariantCaller_CPUs_default, docker_image = docker_image, ref_1000G_omni2_5_b38_sites_PASS_vcf_gz = ref_1000G_omni2_5_b38_sites_PASS_vcf_gz, @@ -259,6 +271,7 @@ workflow TopMedVariantCaller { task sumCRAMSizes { Array[File] input_crams Array[File] input_crais + Int SumCRAMs_CPUs_default Float disk_size String docker_image @@ -290,7 +303,7 @@ workflow TopMedVariantCaller { } runtime { memory: "10 GB" - cpu: "16" + cpu: sub(SumCRAMs_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image @@ -320,6 +333,7 @@ workflow TopMedVariantCaller { Array[File] input_crams Float disk_size + Int VariantCaller_CPUs_default String docker_image File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz @@ -634,7 +648,7 @@ workflow TopMedVariantCaller { } runtime { memory: "10 GB" - cpu: "16" + cpu: sub(VariantCaller_CPUs_default, "\\..*", "") disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD" zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c" docker: docker_image