Skip to content

Commit

Permalink
Fixing symbolic decomposition to BND
Browse files Browse the repository at this point in the history
Always assumes DUPs are tandem, also fixed formatting error in DEL
  • Loading branch information
ACEnglish committed Jan 9, 2025
1 parent 66b382c commit 9838f76
Show file tree
Hide file tree
Showing 12 changed files with 70 additions and 63 deletions.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fn.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi
Binary file not shown.
47 changes: 22 additions & 25 deletions repo_utils/answer_key/bench/bench_bnd_decomp/log.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
2025-01-08 20:17:40,376 [INFO] Truvari v5.0.0
2025-01-08 20:17:40,377 [INFO] Command /Users/english/code/truvari/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/
2025-01-08 20:17:40,377 [INFO] Params:
2025-01-09 16:25:00,733 [INFO] Truvari v5.0.0
2025-01-09 16:25:00,733 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/
2025-01-09 16:25:00,734 [INFO] Params:
{
"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp2.vcf.gz",
"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/bnd.comp2.vcf.gz",
"output": "test_results/bench_bnd_decomp/",
"includebed": null,
"extend": 0,
Expand Down Expand Up @@ -35,33 +35,30 @@
"short_circuit": false,
"skip_gt": false
}
2025-01-08 20:17:40,408 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-08 20:17:45,964 [INFO] Zipped 30098 variants Counter({'comp': 29902, 'base': 196})
2025-01-08 20:17:45,965 [INFO] 20659 chunks of 30098 variants Counter({'comp': 29902, 'base': 186, '__filtered': 10})
2025-01-08 20:17:49,303 [INFO] Stats: {
"TP-base": 144,
"TP-comp": 126,
"FP": 28404,
"FN": 42,
"precision": 0.004416403785488959,
"recall": 0.7741935483870968,
"f1": 0.008782706432809682,
2025-01-09 16:25:00,783 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-09 16:25:12,483 [INFO] Zipped 30098 variants Counter({'comp': 29902, 'base': 196})
2025-01-09 16:25:12,483 [INFO] 20659 chunks of 30098 variants Counter({'comp': 29902, 'base': 186, '__filtered': 10})
2025-01-09 16:25:15,625 [INFO] Stats: {
"TP-base": 140,
"TP-comp": 124,
"FP": 28406,
"FN": 46,
"precision": 0.0043463021381002455,
"recall": 0.7526881720430108,
"f1": 0.008642698114935937,
"base cnt": 186,
"comp cnt": 28530,
"TP-comp_TP-gt": 1,
"TP-comp_FP-gt": 125,
"TP-comp_FP-gt": 123,
"TP-base_TP-gt": 1,
"TP-base_FP-gt": 143,
"gt_concordance": 0.007936507936507936,
"TP-base_FP-gt": 139,
"gt_concordance": 0.008064516129032258,
"gt_matrix": {
"(0, 1)": {
"(0, 0)": 132,
"(0, 1)": 1
},
"(0, 0, 1)": {
"(0, 0)": 3
},
"(0, 1, 0)": {
"(0, 0)": 2
},
"(0, 0, 0, 1)": {
Expand All @@ -70,12 +67,12 @@
"(0, 0, 1, 0)": {
"(0, 0)": 1
},
"(0, 1, 1)": {
"(0, 0)": 2
"(0, 1, 0)": {
"(0, 0)": 1
},
"(1, 0)": {
"(0, 0)": 2
}
}
}
2025-01-08 20:17:49,303 [INFO] Finished bench
2025-01-09 16:25:15,626 [INFO] Finished bench
2 changes: 1 addition & 1 deletion repo_utils/answer_key/bench/bench_bnd_decomp/params.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp2.vcf.gz", "output": "test_results/bench_bnd_decomp/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_NP_PB_germline", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 1000000000, "passonly": false, "no_ref": false, "pick": "multi", "ignore_monref": true, "check_multi": true, "check_monref": true, "no_single_bnd": true, "write_resolved": false, "decompose": true, "short_circuit": false, "skip_gt": false}
{"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp2.vcf.gz", "output": "test_results/bench_bnd_decomp/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_NP_PB_germline", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 1000000000, "passonly": false, "no_ref": false, "pick": "multi", "ignore_monref": true, "check_multi": true, "check_monref": true, "no_single_bnd": true, "write_resolved": false, "decompose": true, "short_circuit": false, "skip_gt": false}
27 changes: 12 additions & 15 deletions repo_utils/answer_key/bench/bench_bnd_decomp/summary.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
{
"TP-base": 144,
"TP-comp": 126,
"FP": 28404,
"FN": 42,
"precision": 0.004416403785488959,
"recall": 0.7741935483870968,
"f1": 0.008782706432809682,
"TP-base": 140,
"TP-comp": 124,
"FP": 28406,
"FN": 46,
"precision": 0.0043463021381002455,
"recall": 0.7526881720430108,
"f1": 0.008642698114935937,
"base cnt": 186,
"comp cnt": 28530,
"TP-comp_TP-gt": 1,
"TP-comp_FP-gt": 125,
"TP-comp_FP-gt": 123,
"TP-base_TP-gt": 1,
"TP-base_FP-gt": 143,
"gt_concordance": 0.007936507936507936,
"TP-base_FP-gt": 139,
"gt_concordance": 0.008064516129032258,
"gt_matrix": {
"(0, 1)": {
"(0, 0)": 132,
"(0, 1)": 1
},
"(0, 0, 1)": {
"(0, 0)": 3
},
"(0, 1, 0)": {
"(0, 0)": 2
},
"(0, 0, 0, 1)": {
Expand All @@ -30,8 +27,8 @@
"(0, 0, 1, 0)": {
"(0, 0)": 1
},
"(0, 1, 1)": {
"(0, 0)": 2
"(0, 1, 0)": {
"(0, 0)": 1
},
"(1, 0)": {
"(0, 0)": 2
Expand Down
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-comp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-comp.vcf.gz.tbi
Binary file not shown.
57 changes: 35 additions & 22 deletions truvari/variant_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,62 +290,75 @@ def cpx_match(self, other):

def decompose(self):
"""
Decompose symbolic DEL, DUP, and INV variants into BNDs
Returns a list of new BND variant records
Stores decomposed variants in self.decomposed
Decompose Symbolic Variants into BND Records
This method decomposes symbolic structural variants (SVs) of `<DEL>`, `<DUP>`, and `<INV>` into
Breakend (BND) records. The decomposed variants are stored internally and returned as a list.
Raises:
ValueError: If the variant is not symbolic.
Returns:
list: A list of new BND variant records.
Notes:
- For `INV` (Inversion) variants, four BND records are created
- For `DEL` (Deletion) variants, two BND records are created to represent the deletion breakpoints.
- For `DUP` (Duplication) variants, two BND records are created to represent the duplication breakpoints.
- Assumes `DUP` variants are of type `DUP:TANDEM`.
"""
if not self.is_symbolic():
raise ValueError("Can only decompose symbolic variants")
if self.decomp_repr:

# No need to make twice
if self.decomp_repr is not None:
return self.decomp_repr

svtype = self.var_type()
chrom = self.chrom
pos = self.pos
end = self.end
ret = []

if svtype == truvari.SV.INV:
record1 = self.copy()
record1.alts = (f"[{chrom}:{end}[N",)
record1.alts = (f"[{self.chrom}:{self.end}[N",)
record1.info["SVTYPE"] = "BND"

record2 = self.copy()
record2.alts = (f"N]{chrom}:{end}]",)
record2.alts = (f"N]{self.chrom}:{self.end}]",)
record2.info["SVTYPE"] = "BND"

record3 = self.copy()
record3.pos = end
record3.alts = (f"N]{chrom}:{pos}]",)
record3.pos = self.end
record3.alts = (f"N]{self.chrom}:{self.pos}]",)
record3.info["SVTYPE"] = "BND"

record4 = self.copy()
record4.pos = end
record4.alts = (f"[{chrom}:{pos}[N",)
record4.pos = self.end
record4.alts = (f"[{self.chrom}:{self.pos}[N",)
record4.info["SVTYPE"] = "BND"

ret = [record1, record2, record3, record4]

elif svtype == truvari.SV.DEL:
record1 = self.copy()
record1.alts = (f"N]chr{chrom}:{end}]",)
record1.alts = (f"N]{self.chrom}:{self.end}]",)
record1.info["SVTYPE"] = "BND"

record2 = self.copy()
record2.pos = end
record2.alts = (f"[chr{chrom}:{pos}[N",)
record2.pos = self.end
record2.alts = (f"[{self.chrom}:{self.pos}[N",)
record2.info["SVTYPE"] = "BND"

ret = [record1, record2]

elif svtype == truvari.SV.DUP:
# Assumes DUP:TANDEM
record1 = self.copy()
record1.alts = (f"]{chrom}:{end}]N",)
record1.info["SVTYPE"] == "BND"
record1.alts = (f"N[{self.chrom}:{self.end}[",)
record1.info["SVTYPE"] = "BND"

record2 = self.copy()
record2.pos = end
record2.alts = (f"N[{chrom}:{pos}[",)
record2.pos = self.end
record2.alts = (f"]{self.chrom}:{self.pos}]N",)
record2.info["SVTYPE"] = "BND"

ret = [record1, record2]
Expand All @@ -356,7 +369,7 @@ def decompose(self):
def distance(self, other):
"""
Calculate the start and end distances of the pair. Negative distances
indicate entryA is upstream of entryB
indicate self is upstream of other
:param `other`: Other to compare
:type `other`: :class:`truvari.VariantRecord`
Expand Down Expand Up @@ -669,7 +682,7 @@ def resolve(self, ref):

def same_type(self, other, dup_to_ins=False):
"""
Check if entryA svtype == entryB svtype
Check if self.var_type() == other.var_type() with extra handling for dup-to-ins
:param `other`: Other entry to compare with
:type `other`: :class:`truvari.VariantRecord`
Expand Down

0 comments on commit 9838f76

Please sign in to comment.