diff --git a/README.md b/README.md
index 3dc28bb3..fcdb85a5 100644
--- a/README.md
+++ b/README.md
@@ -205,8 +205,8 @@ These files will be output for each sample defined in the cohort.
| Array[File] | sample_hiphase_blocks | Phase block list written by [HiPhase](https://github.com/PacificBiosciences/HiPhase/blob/main/docs/user_guide.md#phase-block-file---blocks-file) | |
| Array[File] | sample_hiphase_haplotags | Per-read haplotag information, written by [HiPhase](https://github.com/PacificBiosciences/HiPhase/blob/main/docs/user_guide.md#haplotag-file---haplotag-file) | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | merged_haplotagged_bam | Aligned (by [pbmm2](https://github.com/PacificBiosciences/pbmm2)), haplotagged (by [HiPhase](https://github.com/PacificBiosciences/HiPhase/blob/main/docs/user_guide.md#haplotagged-bam-files)) reads (with index) | |
-| Array[File] | haplotagged_bam_mosdepth_summary | [mosdepth](https://github.com/brentp/mosdepth) summary of median depths per chromosome | |
-| Array[File] | haplotagged_bam_mosdepth_region_bed | mosdepthhttps://github.com/brentp/mosdepth BED of median coverage depth per 500 bp window | |
+| Array[File] | mosdepth_summary | [mosdepth](https://github.com/brentp/mosdepth) summary of median depths per chromosome | |
+| Array[File] | mosdepth_region_bed | mosdepth BED of median coverage depth per 500 bp window | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | trgt_repeat_vcf | Tandem repeat genotypes from [TRGT](https://github.com/PacificBiosciences/trgt/blob/main/docs/vcf_files.md) (with index) | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | trgt_spanning_reads | Fragments of HiFi reads spanning loci genotyped by TRGT (with index) | |
| Array[File] | trgt_dropouts | Regions with insufficient coverage to genotype by TRGT | |
@@ -259,7 +259,7 @@ The Docker image used by a particular step of the workflow can be identified by
| deepvariant | User-defined; default is version [1.6.0](https://github.com/google/deepvariant/releases/tag/v1.6.0) | [DeepVariant GitHub](https://github.com/google/deepvariant) |
| glnexus |
- [glnexus v1.4.3](https://github.com/dnanexus-rnd/GLnexus/releases/tag/v1.4.3)
| [GLnexus GitHub](https://github.com/dnanexus-rnd/GLnexus) |
| hificnv | - [HiFiCNV v0.1.7](https://github.com/PacificBiosciences/HiFiCNV/releases/tag/v0.1.7)
- [bcftools 1.16](https://github.com/samtools/bcftools/releases/tag/1.16)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0b0fbe939648087e9fdea4497ae08dc76538ebf0/docker/hificnv) |
-| hiphase | - [HiPhase 1.0.0](https://github.com/PacificBiosciences/HiPhase/releases/tag/v1.0.0)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d26db6204409dfeff56e169cdba0cc14bc272f15/docker/hiphase) |
+| hiphase | - [HiPhase 1.1.0](https://github.com/PacificBiosciences/HiPhase/releases/tag/v1.1.0)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/02f2bae9d10504c587990995fba3aa7335f910f8/docker/hiphase) |
| htslib | - [htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/htslib) |
| mosdepth | - [mosdepth 0.2.9](https://github.com/brentp/mosdepth/releases/tag/v0.2.9)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/mosdepth) |
| paraphase | - [minimap2 2.26](https://github.com/lh3/minimap2/releases/tag/v2.26)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [paraphase 3.0.0](https://github.com/PacificBiosciences/paraphase)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/4f510e5f434cc138577853f56558b90e059fd770/docker/paraphase) |
@@ -270,7 +270,7 @@ The Docker image used by a particular step of the workflow can be identified by
| samtools | - [samtools 1.14](https://github.com/samtools/samtools/releases/tag/1.14)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/samtools) |
| slivar | - [slivar 0.2.2](https://github.com/brentp/slivar/releases/tag/v0.2.2)
- [bcftools 1.14](https://github.com/samtools/bcftools/releases/tag/1.14)
- [vcfpy 0.13.3](https://github.com/bihealth/vcfpy/releases/tag/v0.13.3)
- [pysam 0.19.1](https://github.com/pysam-developers/pysam/releases/tag/v0.19.1)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/slivar) |
| svpack | - [svpack 36180ae6](https://github.com/PacificBiosciences/svpack/tree/a82598ebc4013bf32e70295b83b380ada6302c4a)
- [htslib 1.18](https://github.com/samtools/htslib/releases/tag/1.18)
- [pysam 0.21.0](https://github.com/pysam-developers/pysam/releases/tag/v0.21.0)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/8edbc516abc0ff43ac279b48018003923721b054/docker/svpack) |
-| trgt | - [trgt 0.8.0](https://github.com/PacificBiosciences/trgt/releases/tag/v0.7.0)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)
- [pysam 0.21.0](https://github.com/pysam-developers/pysam/releases/tag/v0.21.0)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3bbc033f8f942b10a304b4fa907957a789c73ef7/docker/trgt) |
+| trgt | - [trgt 0.8.0](https://github.com/PacificBiosciences/trgt/releases/tag/v0.8.0)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)
- [pysam 0.21.0](https://github.com/pysam-developers/pysam/releases/tag/v0.21.0)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3bbc033f8f942b10a304b4fa907957a789c73ef7/docker/trgt) |
---
diff --git a/wdl-ci.config.json b/wdl-ci.config.json
index 179ef52d..81c3da87 100644
--- a/wdl-ci.config.json
+++ b/wdl-ci.config.json
@@ -348,7 +348,7 @@
"tasks": {
"pbsv_discover": {
"key": "pbsv_discover",
- "digest": "lbv7nwockw3wcbkfvapzoc2wv7fcodnw",
+ "digest": "winvogvhhjzlbxhsknadxpvwfj6f37wm",
"tests": [
{
"inputs": {
@@ -654,7 +654,7 @@
},
"deepvariant_postprocess_variants": {
"key": "deepvariant_postprocess_variants",
- "digest": "ey7zqpajaeesvsg372rehhjmkpqld2qx",
+ "digest": "nxh4wiylfi7ngy6cxe65ovrzrwdoutja",
"tests": [
{
"inputs": {
@@ -881,7 +881,7 @@
"tasks": {
"run_hiphase": {
"key": "run_hiphase",
- "digest": "6k2rtel3k6747xhcfnjufqfzgcnb7g5v",
+ "digest": "6qxqz7p56asf3z6f6cqwebdsd57fs4lh",
"tests": [
{
"inputs": {
diff --git a/workflows/main.wdl b/workflows/main.wdl
index 8197c567..1a8272a3 100644
--- a/workflows/main.wdl
+++ b/workflows/main.wdl
@@ -115,8 +115,8 @@ workflow humanwgs {
Array[File] sample_hiphase_blocks = sample_analysis.hiphase_blocks
Array[File] sample_hiphase_haplotags = sample_analysis.hiphase_haplotags
Array[IndexData] merged_haplotagged_bam = sample_analysis.merged_haplotagged_bam
- Array[File] haplotagged_bam_mosdepth_summary = sample_analysis.haplotagged_bam_mosdepth_summary
- Array[File] haplotagged_bam_mosdepth_region_bed = sample_analysis.haplotagged_bam_mosdepth_region_bed
+ Array[File] mosdepth_summary = sample_analysis.mosdepth_summary
+ Array[File] mosdepth_region_bed = sample_analysis.mosdepth_region_bed
# per sample trgt outputs
Array[IndexData] trgt_spanning_reads = sample_analysis.trgt_spanning_reads
diff --git a/workflows/sample_analysis/sample_analysis.wdl b/workflows/sample_analysis/sample_analysis.wdl
index 4f62c82f..3d859910 100644
--- a/workflows/sample_analysis/sample_analysis.wdl
+++ b/workflows/sample_analysis/sample_analysis.wdl
@@ -49,27 +49,8 @@ workflow sample_analysis {
}
}
- call DeepVariant.deepvariant {
- input:
- sample_id = sample.sample_id,
- aligned_bams = aligned_bam,
- reference_fasta = reference.fasta,
- reference_name = reference.name,
- deepvariant_version = deepvariant_version,
- custom_deepvariant_model_tar = custom_deepvariant_model_tar,
- default_runtime_attributes = default_runtime_attributes
- }
-
- call bcftools {
- input:
- vcf = deepvariant.vcf.data,
- stats_params = "--apply-filters PASS --samples ~{sample.sample_id}",
- reference = reference.fasta.data,
- runtime_attributes = default_runtime_attributes
- }
-
scatter (shard_index in range(length(pbsv_splits))) {
- Array[String] region_set = pbsv_splits[shard_index]
+ Array[String] region_set = pbsv_splits[shard_index]
call PbsvCall.pbsv_call {
input:
@@ -84,6 +65,7 @@ workflow sample_analysis {
}
}
+ # concatenate pbsv vcfs
call ConcatVcf.concat_vcf {
input:
vcfs = pbsv_call.pbsv_vcf,
@@ -92,45 +74,46 @@ workflow sample_analysis {
runtime_attributes = default_runtime_attributes
}
- IndexData zipped_pbsv_vcf = {
- "data": concat_vcf.concatenated_vcf,
- "data_index": concat_vcf.concatenated_vcf_index
- }
-
- call HiPhase.hiphase {
- # vcfs order: small variants, SVs
- input:
- id = sample.sample_id,
- refname = reference.name,
- sample_ids = [sample.sample_id],
- vcfs = [deepvariant.vcf, zipped_pbsv_vcf],
- bams = aligned_bam,
- haplotag = true,
- reference_fasta = reference.fasta,
- default_runtime_attributes = default_runtime_attributes
- }
-
- # merge haplotagged bams if there are multiple
- if (length(hiphase.haplotagged_bams) > 1) {
- scatter (bam_object in hiphase.haplotagged_bams) {
+ # merge aligned bams if there are multiple
+ if (length(aligned_bam) > 1) {
+ scatter (bam_object in aligned_bam) {
File bam_to_merge = bam_object.data
}
call merge_bams {
input:
bams = bam_to_merge,
- output_bam_name = "~{sample.sample_id}.~{reference.name}.haplotagged.bam",
+ output_bam_name = "~{sample.sample_id}.~{reference.name}.bam",
runtime_attributes = default_runtime_attributes
}
}
- # select the merged bam if it exists, otherwise select the first (only) haplotagged bam
- File haplotagged_bam = select_first([merge_bams.merged_bam, hiphase.haplotagged_bams[0].data])
- File haplotagged_bam_index = select_first([merge_bams.merged_bam_index, hiphase.haplotagged_bams[0].data_index])
+ # select the merged bam if it exists, otherwise select the first (only) aligned bam
+ File aligned_bam_data = select_first([merge_bams.merged_bam, aligned_bam[0].data])
+ File aligned_bam_index = select_first([merge_bams.merged_bam_index, aligned_bam[0].data_index])
call Mosdepth.mosdepth {
input:
- aligned_bam = haplotagged_bam,
- aligned_bam_index = haplotagged_bam_index,
+ aligned_bam = aligned_bam_data,
+ aligned_bam_index = aligned_bam_index,
+ runtime_attributes = default_runtime_attributes
+ }
+
+ call DeepVariant.deepvariant {
+ input:
+ sample_id = sample.sample_id,
+ aligned_bams = aligned_bam,
+ reference_fasta = reference.fasta,
+ reference_name = reference.name,
+ deepvariant_version = deepvariant_version,
+ custom_deepvariant_model_tar = custom_deepvariant_model_tar,
+ default_runtime_attributes = default_runtime_attributes
+ }
+
+ call bcftools {
+ input:
+ vcf = deepvariant.vcf.data,
+ stats_params = "--apply-filters PASS --samples ~{sample.sample_id}",
+ reference = reference.fasta.data,
runtime_attributes = default_runtime_attributes
}
@@ -138,18 +121,41 @@ workflow sample_analysis {
input:
sample_id = sample.sample_id,
sex = sample.sex,
- bam = haplotagged_bam,
- bam_index = haplotagged_bam_index,
+ bam = aligned_bam_data,
+ bam_index = aligned_bam_index,
reference = reference.fasta.data,
reference_index = reference.fasta.data_index,
tandem_repeat_bed = reference.trgt_tandem_repeat_bed,
+ output_prefix = "~{sample.sample_id}.~{reference.name}",
runtime_attributes = default_runtime_attributes
}
+ call HiPhase.hiphase {
+ # vcfs order: small variants, SVs, TRGT
+ input:
+ id = sample.sample_id,
+ refname = reference.name,
+ sample_ids = [sample.sample_id],
+ vcfs = [
+ deepvariant.vcf,
+ {"data": concat_vcf.concatenated_vcf, "data_index": concat_vcf.concatenated_vcf_index},
+ {"data": trgt.repeat_vcf, "data_index": trgt.repeat_vcf_index}
+ ],
+ bams = [{"data": aligned_bam_data, "data_index": aligned_bam_index}],
+ haplotag = true,
+ reference_fasta = reference.fasta,
+ default_runtime_attributes = default_runtime_attributes
+ }
+
+ IndexData haplotagged_bam = {
+ "data": hiphase.haplotagged_bams[0].data,
+ "data_index": hiphase.haplotagged_bams[0].data_index
+ }
+
call coverage_dropouts {
input:
- bam = haplotagged_bam,
- bam_index = haplotagged_bam_index,
+ bam = haplotagged_bam.data,
+ bam_index = haplotagged_bam.data_index,
tandem_repeat_bed = reference.trgt_tandem_repeat_bed,
output_prefix = "~{sample.sample_id}.~{reference.name}",
runtime_attributes = default_runtime_attributes
@@ -157,8 +163,8 @@ workflow sample_analysis {
call cpg_pileup {
input:
- bam = haplotagged_bam,
- bam_index = haplotagged_bam_index,
+ bam = haplotagged_bam.data,
+ bam_index = haplotagged_bam.data_index,
output_prefix = "~{sample.sample_id}.~{reference.name}",
reference = reference.fasta.data,
reference_index = reference.fasta.data_index,
@@ -168,8 +174,8 @@ workflow sample_analysis {
call paraphase {
input:
sample_id = sample.sample_id,
- bam = haplotagged_bam,
- bam_index = haplotagged_bam_index,
+ bam = haplotagged_bam.data,
+ bam_index = haplotagged_bam.data_index,
reference = reference.fasta.data,
reference_index = reference.fasta.data_index,
out_directory = "~{sample.sample_id}.paraphase",
@@ -180,8 +186,8 @@ workflow sample_analysis {
input:
sample_id = sample.sample_id,
sex = sample.sex,
- bam = haplotagged_bam,
- bam_index = haplotagged_bam_index,
+ bam = haplotagged_bam.data,
+ bam_index = haplotagged_bam.data_index,
phased_vcf = hiphase.phased_vcfs[0].data,
phased_vcf_index = hiphase.phased_vcfs[0].data_index,
reference = reference.fasta.data,
@@ -195,33 +201,36 @@ workflow sample_analysis {
}
output {
- # per movie stats, alignments, and svsigs
+ # per movie stats, alignments
Array[File] bam_stats = pbmm2_align.bam_stats
Array[File] read_length_summary = pbmm2_align.read_length_summary
Array[File] read_quality_summary = pbmm2_align.read_quality_summary
Array[IndexData] aligned_bams = aligned_bam
+
+ # phased_vcfs ouput order from HiPhase: small variants, SVs, TRGT
+
+ # per sample structural variant signatures and calls
+ IndexData phased_sv_vcf = hiphase.phased_vcfs[1]
Array[File] svsigs = pbsv_discover.svsig
# per sample small variant calls
+ IndexData phased_small_variant_vcf = hiphase.phased_vcfs[0]
IndexData small_variant_gvcf = deepvariant.gvcf
File small_variant_vcf_stats = bcftools.stats
File small_variant_roh_out = bcftools.roh_out
File small_variant_roh_bed = bcftools.roh_bed
- # per sample final phased variant calls and haplotagged alignments
- # phased_vcfs order: small variants, SVs
- IndexData phased_small_variant_vcf = hiphase.phased_vcfs[0]
- IndexData phased_sv_vcf = hiphase.phased_vcfs[1]
+ # per sample phasing stats and haplotagged alignments
File hiphase_stats = hiphase.hiphase_stats
File hiphase_blocks = hiphase.hiphase_blocks
File hiphase_haplotags = select_first([hiphase.hiphase_haplotags])
- IndexData merged_haplotagged_bam = {"data": haplotagged_bam, "data_index": haplotagged_bam_index}
- File haplotagged_bam_mosdepth_summary = mosdepth.summary
- File haplotagged_bam_mosdepth_region_bed = mosdepth.region_bed
+ IndexData merged_haplotagged_bam = haplotagged_bam
+ File mosdepth_summary = mosdepth.summary
+ File mosdepth_region_bed = mosdepth.region_bed
# per sample trgt outputs
+ IndexData trgt_repeat_vcf = hiphase.phased_vcfs[2]
IndexData trgt_spanning_reads = {"data": trgt.spanning_reads, "data_index": trgt.spanning_reads_index}
- IndexData trgt_repeat_vcf = {"data": trgt.repeat_vcf, "data_index": trgt.repeat_vcf_index}
File trgt_dropouts = coverage_dropouts.trgt_dropouts
# per sample cpg outputs
@@ -446,12 +455,13 @@ task trgt {
File reference_index
File tandem_repeat_bed
+ String output_prefix
+
RuntimeAttributes runtime_attributes
}
Boolean sex_defined = defined(sex)
String karyotype = if select_first([sex, "FEMALE"]) == "MALE" then "XY" else "XX"
- String bam_basename = basename(bam, ".bam")
Int threads = 4
Int disk_size = ceil((size(bam, "GB") + size(reference, "GB")) * 2 + 20)
@@ -468,37 +478,37 @@ task trgt {
--genome ~{reference} \
--repeats ~{tandem_repeat_bed} \
--reads ~{bam} \
- --output-prefix ~{bam_basename}.trgt
+ --output-prefix ~{output_prefix}.trgt
bcftools --version
bcftools sort \
--output-type z \
- --output ~{bam_basename}.trgt.sorted.vcf.gz \
- ~{bam_basename}.trgt.vcf.gz
+ --output ~{output_prefix}.trgt.sorted.vcf.gz \
+ ~{output_prefix}.trgt.vcf.gz
bcftools index \
--threads ~{threads - 1} \
--tbi \
- ~{bam_basename}.trgt.sorted.vcf.gz
+ ~{output_prefix}.trgt.sorted.vcf.gz
samtools --version
samtools sort \
-@ ~{threads - 1} \
- -o ~{bam_basename}.trgt.spanning.sorted.bam \
- ~{bam_basename}.trgt.spanning.bam
+ -o ~{output_prefix}.trgt.spanning.sorted.bam \
+ ~{output_prefix}.trgt.spanning.bam
samtools index \
-@ ~{threads - 1} \
- ~{bam_basename}.trgt.spanning.sorted.bam
+ ~{output_prefix}.trgt.spanning.sorted.bam
>>>
output {
- File spanning_reads = "~{bam_basename}.trgt.spanning.sorted.bam"
- File spanning_reads_index = "~{bam_basename}.trgt.spanning.sorted.bam.bai"
- File repeat_vcf = "~{bam_basename}.trgt.sorted.vcf.gz"
- File repeat_vcf_index = "~{bam_basename}.trgt.sorted.vcf.gz.tbi"
+ File spanning_reads = "~{output_prefix}.trgt.spanning.sorted.bam"
+ File spanning_reads_index = "~{output_prefix}.trgt.spanning.sorted.bam.bai"
+ File repeat_vcf = "~{output_prefix}.trgt.sorted.vcf.gz"
+ File repeat_vcf_index = "~{output_prefix}.trgt.sorted.vcf.gz.tbi"
}
runtime {
diff --git a/workflows/wdl-common b/workflows/wdl-common
index ec28a7f6..2523b4d2 160000
--- a/workflows/wdl-common
+++ b/workflows/wdl-common
@@ -1 +1 @@
-Subproject commit ec28a7f674fc382aa13d1b16cc1f4027b695464a
+Subproject commit 2523b4d2703859db55eebaa2497ac24a324178cf