diff --git a/repo_utils/answer_key/phab/phab_result.vcf.gz b/repo_utils/answer_key/phab/phab_result.vcf.gz index 38ccaf41..64267d56 100644 Binary files a/repo_utils/answer_key/phab/phab_result.vcf.gz and b/repo_utils/answer_key/phab/phab_result.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/log.txt b/repo_utils/answer_key/refine/refine_output_one/log.txt index bfb7ced7..672b1248 100644 --- a/repo_utils/answer_key/refine/refine_output_one/log.txt +++ b/repo_utils/answer_key/refine/refine_output_one/log.txt @@ -1,6 +1,6 @@ -2023-11-01 14:04:28,717 [INFO] Truvari v4.1.1.dev0+a08d9a7.uc -2023-11-01 14:04:28,719 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/refine_data/hg002_base.vcf.gz -c repo_utils/test_files/refine_data/hg002_comp.vcf.gz --includebed repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed -s 5 -o test_results/refine_output_one -2023-11-01 14:04:28,720 [INFO] Params: +2023-12-19 15:33:50,086 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:33:50,088 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/refine_data/hg002_base.vcf.gz -c repo_utils/test_files/refine_data/hg002_comp.vcf.gz --includebed repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed -s 5 -o test_results/refine_output_one +2023-12-19 15:33:50,089 [INFO] Params: { "base": "/data/repo_utils/test_files/refine_data/hg002_base.vcf.gz", "comp": "/data/repo_utils/test_files/refine_data/hg002_comp.vcf.gz", @@ -28,10 +28,10 @@ "check_monref": true, "check_multi": true } -2023-11-01 14:04:28,793 [INFO] Including 225 bed regions -2023-11-01 14:04:30,301 [INFO] Zipped 7158 variants Counter({'comp': 5303, 'base': 1855}) -2023-11-01 14:04:30,302 [INFO] 212 chunks of 7158 variants Counter({'__filtered': 6120, 'base': 587, 'comp': 451}) -2023-11-01 14:04:30,471 [INFO] Stats: { +2023-12-19 15:33:50,173 [INFO] Including 225 bed regions +2023-12-19 15:33:51,903 [INFO] Zipped 7158 variants Counter({'comp': 5303, 'base': 1855}) +2023-12-19 15:33:51,904 [INFO] 212 chunks of 7158 variants Counter({'__filtered': 6120, 'base': 587, 'comp': 451}) +2023-12-19 15:33:52,077 [INFO] Stats: { "TP-base": 387, "TP-comp": 387, "FP": 64, @@ -83,4 +83,4 @@ } } } -2023-11-01 14:04:30,472 [INFO] Finished bench +2023-12-19 15:33:52,078 [INFO] Finished bench diff --git a/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz b/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz index 96164456..12e40b35 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz and b/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz.tbi index 1fe966ae..4c6a2607 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_one/phab.output.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/candidate.refine.bed b/repo_utils/answer_key/refine/refine_output_one/phab_bench/candidate.refine.bed index 355b2b4a..da120214 100644 --- a/repo_utils/answer_key/refine/refine_output_one/phab_bench/candidate.refine.bed +++ b/repo_utils/answer_key/refine/refine_output_one/phab_bench/candidate.refine.bed @@ -1,9 +1,5 @@ chr20 5040476 5040477 chr20 5041941 5042433 -chr20 7720952 7720968 chr20 16257854 16259205 -chr20 24682040 24682100 -chr20 51953819 51953820 -chr20 55627638 55628305 -chr20 63028079 63029030 -chr20 64134990 64135242 \ No newline at end of file +chr20 32723044 32723045 +chr20 51953819 51953820 \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz index 886198e7..73f87a20 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz.tbi index 594d962f..31e4f39e 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fn.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz index 4da7646d..63d62d0c 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz.tbi index 0c9b564a..f5300778 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/params.json b/repo_utils/answer_key/refine/refine_output_one/phab_bench/params.json index 781e66c6..5463076f 100644 --- a/repo_utils/answer_key/refine/refine_output_one/phab_bench/params.json +++ b/repo_utils/answer_key/refine/refine_output_one/phab_bench/params.json @@ -1 +1 @@ -{"base": "test_results/refine_output_one/phab.output.vcf.gz", "comp": "test_results/refine_output_one/phab.output.vcf.gz", "output": "test_results/refine_output_one/phab_bench", "includebed": "/tmp/c7gxuj5a.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "p:HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": "a", "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file +{"base": "test_results/refine_output_one/phab.output.vcf.gz", "comp": "test_results/refine_output_one/phab.output.vcf.gz", "output": "test_results/refine_output_one/phab_bench", "includebed": "/tmp/yw_a31jd.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "p:HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": "a", "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/summary.json b/repo_utils/answer_key/refine/refine_output_one/phab_bench/summary.json index 0d92566a..e25434c6 100644 --- a/repo_utils/answer_key/refine/refine_output_one/phab_bench/summary.json +++ b/repo_utils/answer_key/refine/refine_output_one/phab_bench/summary.json @@ -1,48 +1,47 @@ { - "TP-base": 379, - "TP-comp": 379, - "FP": 7, - "FN": 3, - "precision": 0.9818652849740933, - "recall": 0.9921465968586387, - "f1": 0.9869791666666666, - "base cnt": 382, - "comp cnt": 386, - "TP-comp_TP-gt": 379, + "TP-base": 381, + "TP-comp": 381, + "FP": 4, + "FN": 2, + "precision": 0.9896103896103896, + "recall": 0.9947780678851175, + "f1": 0.9921875, + "base cnt": 383, + "comp cnt": 385, + "TP-comp_TP-gt": 381, "TP-comp_FP-gt": 0, - "TP-base_TP-gt": 379, + "TP-base_TP-gt": 381, "TP-base_FP-gt": 0, "gt_concordance": 1.0, "gt_matrix": { "(0, 1)": { - "(1, 0)": 143 + "(1, 0)": 145 }, "(1, 0)": { - "(0, 1)": 184, - "(1, 0)": 1 + "(0, 1)": 184 }, "(1, 1)": { - "(1, 1)": 51 + "(1, 1)": 52 } }, "weighted": { "sequence": { - "TP": 384.0702000260353, - "FP": 4.3610000014305115, - "FN": 0.5952999591827393, - "precision": 0.9887727865291919, - "recall": 0.9984524217555106, - "f1": 0.9935890297287107, - "total": 389 + "TP": 382.03810000419617, + "FP": 3.716600000858307, + "FN": 1.2736999988555908, + "precision": 0.9903653798623592, + "recall": 0.9966771176915361, + "f1": 0.9935112243223944, + "total": 387 }, "size": { - "TP": 384.2482999712229, - "FP": 4.497000023722649, - "FN": 0.2547000050544739, - "precision": 0.9884320144223451, - "recall": 0.9993375864295723, - "f1": 0.9938548846095698, - "total": 389 + "TP": 382.14099998772144, + "FP": 3.820500001311302, + "FN": 1.0385000109672546, + "precision": 0.990101344301388, + "recall": 0.9972897819142965, + "f1": 0.9936825627390087, + "total": 387 } } } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz index 2a2d3326..ec079583 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz.tbi index 03d9aa8f..db84618f 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-base.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz index f37cfd32..913f7adb 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz.tbi index 09951382..7ebf847d 100644 Binary files a/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_one/phab_bench/tp-comp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_one/refine.log.txt b/repo_utils/answer_key/refine/refine_output_one/refine.log.txt index e72704e7..597f25d5 100644 --- a/repo_utils/answer_key/refine/refine_output_one/refine.log.txt +++ b/repo_utils/answer_key/refine/refine_output_one/refine.log.txt @@ -1,6 +1,6 @@ -2023-11-01 14:04:32,128 [INFO] Truvari v4.1.1.dev0+a08d9a7.uc -2023-11-01 14:04:32,129 [INFO] Command /data/truvari/__main__.py refine -u -f repo_utils/test_files/refine_data/chr20.fa.gz test_results/refine_output_one -2023-11-01 14:04:32,131 [INFO] Params: +2023-12-19 15:33:53,701 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:33:53,702 [INFO] Command /data/truvari/__main__.py refine -u -f repo_utils/test_files/refine_data/chr20.fa.gz test_results/refine_output_one +2023-12-19 15:33:53,703 [INFO] Params: { "benchdir": "test_results/refine_output_one", "reference": "repo_utils/test_files/refine_data/chr20.fa.gz", @@ -13,17 +13,17 @@ "mafft_params": "--auto --thread 1", "debug": false } -2023-11-01 14:04:32,132 [INFO] Setting up regions -2023-11-01 14:04:32,154 [INFO] Evaluating 225 regions -2023-11-01 14:04:32,712 [INFO] 85 regions to be refined -2023-11-01 14:04:32,757 [INFO] Preparing regions -2023-11-01 14:04:32,769 [INFO] Extracting haplotypes -2023-11-01 14:05:28,548 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) -2023-11-01 14:05:28,553 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) -2023-11-01 14:05:28,557 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) -2023-11-01 14:05:28,651 [INFO] Harmonizing variants -2023-11-01 14:05:31,650 [INFO] Running bench -2023-11-01 14:05:31,716 [INFO] Including 85 bed regions -2023-11-01 14:05:32,942 [INFO] Zipped 4154 variants Counter({'base': 2077, 'comp': 2077}) -2023-11-01 14:05:32,943 [INFO] 85 chunks of 4154 variants Counter({'__filtered': 3386, 'comp': 386, 'base': 382}) -2023-11-01 14:05:33,483 [INFO] Finished refine +2023-12-19 15:33:53,704 [INFO] Setting up regions +2023-12-19 15:33:53,725 [INFO] Evaluating 225 regions +2023-12-19 15:33:54,307 [INFO] 85 regions to be refined +2023-12-19 15:33:54,344 [INFO] Preparing regions +2023-12-19 15:33:54,354 [INFO] Extracting haplotypes +2023-12-19 15:33:55,039 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:33:55,041 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:33:55,154 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:33:55,162 [INFO] Harmonizing variants +2023-12-19 15:33:57,833 [INFO] Running bench +2023-12-19 15:33:57,887 [INFO] Including 85 bed regions +2023-12-19 15:33:59,098 [INFO] Zipped 4106 variants Counter({'base': 2053, 'comp': 2053}) +2023-12-19 15:33:59,100 [INFO] 86 chunks of 4106 variants Counter({'__filtered': 3338, 'comp': 385, 'base': 383}) +2023-12-19 15:33:59,647 [INFO] Finished refine diff --git a/repo_utils/answer_key/refine/refine_output_one/refine.region_summary.json b/repo_utils/answer_key/refine/refine_output_one/refine.region_summary.json index d9692fde..1abab8da 100644 --- a/repo_utils/answer_key/refine/refine_output_one/refine.region_summary.json +++ b/repo_utils/answer_key/refine/refine_output_one/refine.region_summary.json @@ -1,18 +1,18 @@ { - "TP": 194, - "TN": 15, - "FP": 13, - "FN": 3, - "base P": 201, - "base N": 24, + "TP": 199, + "TN": 14, + "FP": 10, + "FN": 2, + "base P": 202, + "base N": 23, "comp P": 210, "comp N": 15, - "PPV": 0.9238095238095239, - "TPR": 0.9651741293532339, - "TNR": 0.625, - "NPV": 1.0, - "ACC": 0.9288888888888889, - "BA": 0.7950870646766169, - "F1": 0.9440389294403894, + "PPV": 0.9476190476190476, + "TPR": 0.9851485148514851, + "TNR": 0.6086956521739131, + "NPV": 0.9333333333333333, + "ACC": 0.9466666666666667, + "BA": 0.7969220835126991, + "F1": 0.9660194174757282, "UND": 0 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt index c0de0125..4c665b54 100644 --- a/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt @@ -19,7 +19,7 @@ chr20 5554104 5554185 0 0 0 0 False 0 0 0 0 TN chr20 5962327 5962410 0 0 0 0 False 0 0 0 0 TN chr20 7435215 7435749 2 2 0 0 False 2 2 0 0 TP chr20 7719330 7719414 1 1 0 0 False 1 1 0 0 TP -chr20 7720911 7721003 1 1 1 0 True 1 1 1 0 FN +chr20 7720911 7721003 1 1 1 0 True 1 1 0 0 TP chr20 7721059 7721319 0 0 0 0 False 0 0 0 0 TN chr20 8661911 8662657 1 1 3 1 True 9 9 0 0 TP chr20 9015548 9016039 1 1 0 0 False 1 1 0 0 TP @@ -55,7 +55,7 @@ chr20 23267965 23268087 2 2 0 0 False 2 2 0 0 TP chr20 23560883 23561191 1 1 2 2 True 3 3 0 0 TP chr20 23942431 23942955 1 1 0 0 False 1 1 0 0 TP chr20 24407983 24408997 3 3 1 0 True 2 2 0 0 TP -chr20 24681988 24682277 2 2 1 0 True 1 1 0 1 FP +chr20 24681988 24682277 2 2 1 0 True 2 2 0 0 TP chr20 24836081 24836384 2 2 0 0 False 2 2 0 0 TP chr20 24956961 24957198 2 2 0 0 False 2 2 0 0 TP chr20 24982027 24982423 1 1 0 0 False 1 1 0 0 TP @@ -66,7 +66,7 @@ chr20 29401872 29402283 0 0 0 0 False 0 0 0 0 TN chr20 29799755 29799978 0 0 0 0 False 0 0 0 0 TN chr20 30404513 30406201 0 0 0 0 False 0 0 0 0 TN chr20 31689079 31689527 2 2 0 0 False 2 2 0 0 TP -chr20 32722424 32723212 0 0 1 0 True 0 0 0 0 TN +chr20 32722424 32723212 0 0 1 0 True 0 0 1 0 FN chr20 32723469 32724637 0 0 0 0 False 0 0 0 0 TN chr20 32724927 32724999 0 0 0 0 False 0 0 0 0 TN chr20 34205619 34205960 1 1 0 0 False 1 1 0 0 TP @@ -113,7 +113,7 @@ chr20 53913028 53913352 2 2 0 0 False 2 2 0 0 TP chr20 54690771 54690873 2 2 0 0 False 2 2 0 0 TP chr20 55532159 55533378 1 1 0 0 False 1 1 0 0 TP chr20 55624482 55625807 6 6 6 0 True 11 11 0 0 TP -chr20 55627344 55628450 7 7 4 0 True 7 7 0 1 FP +chr20 55627344 55628450 7 7 4 0 True 8 8 0 0 TP chr20 55943919 55945807 2 2 2 1 True 5 5 0 0 TP chr20 56280422 56282114 4 4 5 1 True 10 10 0 0 TP chr20 56284258 56284407 2 2 0 0 False 2 2 0 0 TP @@ -172,7 +172,7 @@ chr20 62875172 62875556 2 2 3 0 True 3 3 0 0 TP chr20 62975788 62976224 1 1 0 0 False 1 1 0 0 TP chr20 62986310 62986805 2 2 0 0 False 2 2 0 0 TP chr20 63027997 63028166 2 2 1 0 True 4 4 0 0 TP -chr20 63028876 63029402 2 2 0 1 True 1 1 0 1 FP +chr20 63028876 63029402 2 2 0 1 True 2 2 0 0 TP chr20 63049033 63049412 3 3 1 0 True 2 2 0 0 TP chr20 63068409 63068799 2 2 0 0 False 2 2 0 0 TP chr20 63103249 63103492 1 1 0 0 False 1 1 0 0 TP @@ -219,8 +219,8 @@ chr20 64090704 64091389 0 0 0 2 False 0 0 0 2 FP chr20 64096658 64097164 0 0 0 2 False 0 0 0 2 FP chr20 64125109 64127974 3 3 3 0 True 12 12 0 0 TP chr20 64131804 64133955 5 5 8 1 True 24 24 0 0 TP -chr20 64134883 64136386 3 3 1 0 True 4 4 1 0 FN +chr20 64134883 64136386 3 3 1 0 True 4 4 0 0 TP chr20 64157894 64158976 2 2 0 0 False 2 2 0 0 TP -chr20 64173409 64176529 4 4 7 3 True 18 18 0 0 TP +chr20 64173409 64176529 4 4 7 3 True 17 17 0 0 TP chr20 64201854 64202098 3 3 0 0 False 3 3 0 0 TP chr20 64233048 64233545 2 2 0 0 False 2 2 0 0 TP diff --git a/repo_utils/answer_key/refine/refine_output_one/refine.variant_summary.json b/repo_utils/answer_key/refine/refine_output_one/refine.variant_summary.json index e61085c2..edc91450 100644 --- a/repo_utils/answer_key/refine/refine_output_one/refine.variant_summary.json +++ b/repo_utils/answer_key/refine/refine_output_one/refine.variant_summary.json @@ -1,11 +1,11 @@ { - "TP-base": 591, - "TP-comp": 591, - "FP": 18, - "FN": 3, - "precision": 0.9704433497536946, - "recall": 0.9949494949494949, - "f1": 0.9825436408977556, - "base cnt": 594, - "comp cnt": 609 + "TP-base": 593, + "TP-comp": 593, + "FP": 15, + "FN": 2, + "precision": 0.975328947368421, + "recall": 0.9966386554621849, + "f1": 0.9858686616791356, + "base cnt": 595, + "comp cnt": 608 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_three/log.txt b/repo_utils/answer_key/refine/refine_output_three/log.txt index c4110bc2..e6bd2d31 100644 --- a/repo_utils/answer_key/refine/refine_output_three/log.txt +++ b/repo_utils/answer_key/refine/refine_output_three/log.txt @@ -1,6 +1,6 @@ -2023-11-01 14:06:18,612 [INFO] Truvari v4.1.1.dev0+a08d9a7.uc -2023-11-01 14:06:18,613 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/refine_data/hg002_base.vcf.gz -c repo_utils/test_files/refine_data/hg002_comp.vcf.gz --includebed repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed -s 5 -o test_results/refine_output_three -2023-11-01 14:06:18,614 [INFO] Params: +2023-12-19 15:34:12,422 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:34:12,423 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/refine_data/hg002_base.vcf.gz -c repo_utils/test_files/refine_data/hg002_comp.vcf.gz --includebed repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed -s 5 -o test_results/refine_output_three +2023-12-19 15:34:12,425 [INFO] Params: { "base": "/data/repo_utils/test_files/refine_data/hg002_base.vcf.gz", "comp": "/data/repo_utils/test_files/refine_data/hg002_comp.vcf.gz", @@ -28,10 +28,10 @@ "check_monref": true, "check_multi": true } -2023-11-01 14:06:18,683 [INFO] Including 225 bed regions -2023-11-01 14:06:20,271 [INFO] Zipped 7158 variants Counter({'comp': 5303, 'base': 1855}) -2023-11-01 14:06:20,272 [INFO] 212 chunks of 7158 variants Counter({'__filtered': 6120, 'base': 587, 'comp': 451}) -2023-11-01 14:06:20,478 [INFO] Stats: { +2023-12-19 15:34:12,508 [INFO] Including 225 bed regions +2023-12-19 15:34:14,248 [INFO] Zipped 7158 variants Counter({'comp': 5303, 'base': 1855}) +2023-12-19 15:34:14,250 [INFO] 212 chunks of 7158 variants Counter({'__filtered': 6120, 'base': 587, 'comp': 451}) +2023-12-19 15:34:14,462 [INFO] Stats: { "TP-base": 387, "TP-comp": 387, "FP": 64, @@ -83,4 +83,4 @@ } } } -2023-11-01 14:06:20,479 [INFO] Finished bench +2023-12-19 15:34:14,463 [INFO] Finished bench diff --git a/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz b/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz index a934c83d..d42e536f 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz and b/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz.tbi index 0f71574c..d423dd20 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_three/phab.output.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz index d19ac02d..10f87aaf 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz.tbi index 7d794f6f..4d0fb4ae 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fn.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz index ae9977d6..0f35b523 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz.tbi index 08a57ccc..834228c5 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/params.json b/repo_utils/answer_key/refine/refine_output_three/phab_bench/params.json index 43197620..db270a03 100644 --- a/repo_utils/answer_key/refine/refine_output_three/phab_bench/params.json +++ b/repo_utils/answer_key/refine/refine_output_three/phab_bench/params.json @@ -1 +1 @@ -{"base": "test_results/refine_output_three/phab.output.vcf.gz", "comp": "test_results/refine_output_three/phab.output.vcf.gz", "output": "test_results/refine_output_three/phab_bench", "includebed": "/tmp/ce30u761.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "p:HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": "a", "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file +{"base": "test_results/refine_output_three/phab.output.vcf.gz", "comp": "test_results/refine_output_three/phab.output.vcf.gz", "output": "test_results/refine_output_three/phab_bench", "includebed": "/tmp/fqzb0b6u.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "p:HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": "a", "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/summary.json b/repo_utils/answer_key/refine/refine_output_three/phab_bench/summary.json index a9fff9f2..c89425b0 100644 --- a/repo_utils/answer_key/refine/refine_output_three/phab_bench/summary.json +++ b/repo_utils/answer_key/refine/refine_output_three/phab_bench/summary.json @@ -29,21 +29,21 @@ }, "weighted": { "sequence": { - "TP": 120.4527998296544, - "FP": 9.857900094240904, - "FN": 17.068500188179314, - "precision": 0.9243508008168312, - "recall": 0.8758846797843979, - "f1": 0.8994653354032435, + "TP": 121.03439995925874, + "FP": 9.64990008994937, + "FN": 16.475100067444146, + "precision": 0.9261586886388359, + "recall": 0.8801893682673207, + "f1": 0.9025890973244014, "total": 139 }, "size": { - "TP": 117.62869990803301, + "TP": 117.65679998137057, "FP": 11.260100107640028, - "FN": 19.847500076517463, - "precision": 0.9126370940976192, - "recall": 0.8556295556703785, - "f1": 0.8832143855831983, + "FN": 19.819400003179908, + "precision": 0.9126561366285918, + "recall": 0.855833955219833, + "f1": 0.8833321880249987, "total": 139 } } diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz index 101e2160..8316f0f7 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz.tbi index b830017a..bcccf78b 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-base.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz index 3a400e55..1877d571 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz.tbi index 43dc3ea1..899b5452 100644 Binary files a/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz.tbi and b/repo_utils/answer_key/refine/refine_output_three/phab_bench/tp-comp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.log.txt b/repo_utils/answer_key/refine/refine_output_three/refine.log.txt index 861343e0..82c2c64d 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.log.txt +++ b/repo_utils/answer_key/refine/refine_output_three/refine.log.txt @@ -1,6 +1,6 @@ -2023-11-01 14:06:22,093 [INFO] Truvari v4.1.1.dev0+a08d9a7.uc -2023-11-01 14:06:22,094 [INFO] Command /data/truvari/__main__.py refine --recount -U -r test_results/refine_output_three/candidate.refine.bed -f repo_utils/test_files/refine_data/chr20.fa.gz test_results/refine_output_three -2023-11-01 14:06:22,095 [INFO] Params: +2023-12-19 15:34:16,211 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:34:16,213 [INFO] Command /data/truvari/__main__.py refine --recount -U -r test_results/refine_output_three/candidate.refine.bed -f repo_utils/test_files/refine_data/chr20.fa.gz test_results/refine_output_three +2023-12-19 15:34:16,214 [INFO] Params: { "benchdir": "test_results/refine_output_three", "reference": "repo_utils/test_files/refine_data/chr20.fa.gz", @@ -13,14 +13,16 @@ "mafft_params": "--auto --thread 1", "debug": false } -2023-11-01 14:06:22,096 [INFO] Setting up regions -2023-11-01 14:06:22,144 [INFO] 92 --regions reduced to 92 after intersecting with 225 from --includebed -2023-11-01 14:06:22,523 [INFO] 41 regions to be refined -2023-11-01 14:06:22,532 [INFO] Preparing regions -2023-11-01 14:06:22,537 [INFO] Extracting haplotypes -2023-11-01 14:06:23,106 [INFO] Harmonizing variants -2023-11-01 14:06:24,655 [INFO] Running bench -2023-11-01 14:06:24,706 [INFO] Including 41 bed regions -2023-11-01 14:06:25,093 [INFO] Zipped 2844 variants Counter({'base': 1422, 'comp': 1422}) -2023-11-01 14:06:25,094 [INFO] 35 chunks of 2844 variants Counter({'__filtered': 2595, 'base': 136, 'comp': 113}) -2023-11-01 14:06:26,429 [INFO] Finished refine +2023-12-19 15:34:16,216 [INFO] Setting up regions +2023-12-19 15:34:16,264 [INFO] 92 --regions reduced to 92 after intersecting with 225 from --includebed +2023-12-19 15:34:16,681 [INFO] 41 regions to be refined +2023-12-19 15:34:16,691 [INFO] Preparing regions +2023-12-19 15:34:16,696 [INFO] Extracting haplotypes +2023-12-19 15:34:17,133 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:34:17,155 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:34:17,169 [INFO] Harmonizing variants +2023-12-19 15:34:18,825 [INFO] Running bench +2023-12-19 15:34:18,872 [INFO] Including 41 bed regions +2023-12-19 15:34:19,325 [INFO] Zipped 2824 variants Counter({'base': 1412, 'comp': 1412}) +2023-12-19 15:34:19,327 [INFO] 35 chunks of 2824 variants Counter({'__filtered': 2573, 'base': 136, 'comp': 115}) +2023-12-19 15:34:20,774 [INFO] Finished refine diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json b/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json index 1be00983..6003cac7 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json +++ b/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json @@ -1,18 +1,18 @@ { - "TP": 15, + "TP": 17, "TN": 6, - "FP": 25, + "FP": 23, "FN": 58, "base P": 76, "base N": 16, "comp P": 85, "comp N": 7, - "PPV": 0.17647058823529413, - "TPR": 0.19736842105263158, + "PPV": 0.2, + "TPR": 0.2236842105263158, "TNR": 0.375, "NPV": 0.8571428571428571, - "ACC": 0.22826086956521738, - "BA": 0.2861842105263158, - "F1": 0.1863354037267081, + "ACC": 0.25, + "BA": 0.2993421052631579, + "F1": 0.2111801242236025, "UND": 0 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt index 4ac24492..1498de92 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt @@ -15,8 +15,8 @@ chr20 16395201 16395373 3 3 1 0 False 3 3 1 0 FN chr20 17081293 17081365 2 2 1 0 False 2 2 1 0 FN chr20 18209139 18210134 3 3 2 1 True 3 3 0 1 FP chr20 20296014 20296330 2 2 2 1 True 5 5 0 0 TP -chr20 20320339 20320519 1 1 2 1 True 1 1 0 1 FP -chr20 20337285 20337624 1 1 6 2 True 2 2 0 1 FP +chr20 20320339 20320519 1 1 2 1 True 3 3 0 0 TP +chr20 20337285 20337624 1 1 6 2 True 1 1 0 0 TP chr20 20354912 20355435 3 3 1 0 False 3 3 1 0 FN chr20 20356530 20357810 4 4 8 1 True 8 8 2 0 FN chr20 21120298 21120461 2 2 1 0 False 2 2 1 0 FN @@ -46,7 +46,7 @@ chr20 53204099 53204252 2 2 2 1 True 0 0 0 0 TN chr20 55624808 55625652 6 6 6 0 False 6 6 6 0 FN chr20 55627638 55628305 7 7 4 0 False 7 7 4 0 FN chr20 55944272 55945175 2 2 2 1 True 1 1 2 0 FN -chr20 56280541 56281913 4 4 5 1 True 3 3 2 1 FN,FP +chr20 56280541 56281913 4 4 5 1 True 4 4 2 1 FN,FP chr20 57090868 57091166 1 1 2 0 False 1 1 2 0 FN chr20 57110450 57110593 2 2 1 0 False 2 2 1 0 FN chr20 57190256 57190428 0 0 3 1 True 0 0 0 0 TN @@ -67,7 +67,7 @@ chr20 62057602 62058768 1 1 3 1 True 1 1 0 0 TP chr20 62270413 62270827 1 1 3 1 True 3 3 0 0 TP chr20 62321396 62321730 2 2 3 0 False 2 2 3 0 FN chr20 62349641 62349826 1 1 5 1 True 0 0 2 1 FN,FP -chr20 62360410 62360602 0 0 8 2 True 0 0 6 1 FN,FP +chr20 62360410 62360602 0 0 8 2 True 2 2 2 1 FN,FP chr20 62830650 62830697 2 2 1 1 True 1 1 0 0 TP chr20 62875241 62875404 2 2 3 0 False 2 2 3 0 FN chr20 63028066 63029030 4 4 1 1 True 2 2 0 0 TP diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json b/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json index 1a23dd9a..c4e13143 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json +++ b/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json @@ -1,11 +1,11 @@ { - "TP-base": 404, - "TP-comp": 404, - "FP": 37, - "FN": 112, - "precision": 0.9160997732426304, - "recall": 0.7829457364341085, - "f1": 0.8443051201671891, + "TP-base": 408, + "TP-comp": 408, + "FP": 35, + "FN": 108, + "precision": 0.9209932279909706, + "recall": 0.7906976744186046, + "f1": 0.8508863399374348, "base cnt": 516, - "comp cnt": 441 + "comp cnt": 443 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/candidate.refine.bed b/repo_utils/answer_key/refine/refine_output_two/candidate.refine.bed new file mode 100644 index 00000000..3da282b1 --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/candidate.refine.bed @@ -0,0 +1,92 @@ +chr20 278929 279069 +chr20 641912 642420 +chr20 2240960 2241290 +chr20 4032357 4033228 +chr20 5040476 5040477 +chr20 5041941 5042268 +chr20 7720952 7720968 +chr20 8661944 8662119 +chr20 10802727 10802844 +chr20 13848272 13848544 +chr20 14862054 14862644 +chr20 16257854 16259205 +chr20 16395201 16395373 +chr20 17081293 17081365 +chr20 18209139 18210134 +chr20 20296014 20296330 +chr20 20320339 20320519 +chr20 20337285 20337624 +chr20 20354912 20355435 +chr20 20356530 20357810 +chr20 21120298 21120461 +chr20 21721451 21721646 +chr20 22082266 22083905 +chr20 23155578 23155857 +chr20 23560939 23561098 +chr20 24408073 24408820 +chr20 24682066 24682125 +chr20 25781790 25781791 +chr20 32723044 32723045 +chr20 34235898 34235981 +chr20 35539212 35539582 +chr20 35580686 35580756 +chr20 37361785 37361886 +chr20 38123799 38124003 +chr20 38463997 38464344 +chr20 41196370 41196495 +chr20 41257714 41258003 +chr20 44764150 44764203 +chr20 45600655 45600695 +chr20 48449794 48450385 +chr20 49834182 49834469 +chr20 50775646 50775832 +chr20 51953819 51953820 +chr20 53204099 53204252 +chr20 55624808 55625652 +chr20 55627638 55628305 +chr20 55944272 55945175 +chr20 56280541 56281913 +chr20 57090868 57091166 +chr20 57110450 57110593 +chr20 57190256 57190428 +chr20 57350856 57350920 +chr20 57949001 57949346 +chr20 59384366 59384743 +chr20 60314443 60314711 +chr20 60703005 60703087 +chr20 61100921 61102405 +chr20 61201822 61202242 +chr20 61282925 61283479 +chr20 61289662 61290273 +chr20 61329345 61329441 +chr20 61562109 61562252 +chr20 61744401 61744592 +chr20 61783958 61784698 +chr20 62057602 62058768 +chr20 62270413 62270827 +chr20 62321396 62321730 +chr20 62349641 62349826 +chr20 62360410 62360602 +chr20 62830650 62830697 +chr20 62875241 62875404 +chr20 63028066 63029030 +chr20 63049093 63049159 +chr20 63154687 63154921 +chr20 63167473 63167564 +chr20 63221509 63221721 +chr20 63372214 63372400 +chr20 63491957 63492390 +chr20 63535751 63536002 +chr20 63559415 63559719 +chr20 63641847 63642015 +chr20 63693449 63693732 +chr20 63770936 63771014 +chr20 63948594 63948653 +chr20 63964805 63966113 +chr20 64065882 64065883 +chr20 64090733 64091007 +chr20 64097039 64097040 +chr20 64125360 64127875 +chr20 64131913 64133856 +chr20 64134990 64135308 +chr20 64173438 64176330 \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz new file mode 100644 index 00000000..1bf37b27 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz.tbi new file mode 100644 index 00000000..ebacc167 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/fn.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz new file mode 100644 index 00000000..6f125f07 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz.tbi new file mode 100644 index 00000000..a440ae4b Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/log.txt b/repo_utils/answer_key/refine/refine_output_two/log.txt new file mode 100644 index 00000000..f9c2859f --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/log.txt @@ -0,0 +1,86 @@ +2023-12-19 15:34:01,502 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:34:01,504 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/refine_data/hg002_base.vcf.gz -c repo_utils/test_files/refine_data/hg002_comp.vcf.gz --includebed repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed -s 5 -o test_results/refine_output_two +2023-12-19 15:34:01,505 [INFO] Params: +{ + "base": "/data/repo_utils/test_files/refine_data/hg002_base.vcf.gz", + "comp": "/data/repo_utils/test_files/refine_data/hg002_comp.vcf.gz", + "output": "test_results/refine_output_two", + "includebed": "/data/repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed", + "extend": 0, + "debug": false, + "reference": null, + "refdist": 500, + "pctseq": 0.7, + "minhaplen": 50, + "pctsize": 0.7, + "pctovl": 0.0, + "typeignore": false, + "chunksize": 1000, + "bSample": "syndip", + "cSample": "HG002", + "dup_to_ins": false, + "sizemin": 5, + "sizefilt": 5, + "sizemax": 50000, + "passonly": false, + "no_ref": false, + "pick": "single", + "check_monref": true, + "check_multi": true +} +2023-12-19 15:34:01,573 [INFO] Including 225 bed regions +2023-12-19 15:34:03,210 [INFO] Zipped 7158 variants Counter({'comp': 5303, 'base': 1855}) +2023-12-19 15:34:03,212 [INFO] 212 chunks of 7158 variants Counter({'__filtered': 6120, 'base': 587, 'comp': 451}) +2023-12-19 15:34:03,413 [INFO] Stats: { + "TP-base": 387, + "TP-comp": 387, + "FP": 64, + "FN": 200, + "precision": 0.8580931263858093, + "recall": 0.6592844974446337, + "f1": 0.745664739884393, + "base cnt": 587, + "comp cnt": 451, + "TP-comp_TP-gt": 382, + "TP-comp_FP-gt": 5, + "TP-base_TP-gt": 382, + "TP-base_FP-gt": 5, + "gt_concordance": 0.9870801033591732, + "gt_matrix": { + "(1, 0)": { + "(0, 1)": 203, + "(1, 0)": 13, + "(1, 1)": 1 + }, + "(0, 1)": { + "(1, 0)": 146, + "(0, 1)": 11 + }, + "(1, 1)": { + "(1, 1)": 9, + "(1, 0)": 2, + "(0, 1)": 2 + } + }, + "weighted": { + "sequence": { + "TP": 500.5166001608595, + "FP": 39.91689974069595, + "FN": 87.50939983222634, + "precision": 0.9261391091633528, + "recall": 0.8511810705083529, + "f1": 0.88707942147253, + "total": 604 + }, + "size": { + "TP": 485.1211999114603, + "FP": 44.722199864685535, + "FN": 102.43430008925498, + "precision": 0.9155935510689002, + "recall": 0.8256602140748741, + "f1": 0.8683044166382059, + "total": 604 + } + } +} +2023-12-19 15:34:03,415 [INFO] Finished bench diff --git a/repo_utils/answer_key/refine/refine_output_two/params.json b/repo_utils/answer_key/refine/refine_output_two/params.json new file mode 100644 index 00000000..596718ee --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/params.json @@ -0,0 +1 @@ +{"base": "/data/repo_utils/test_files/refine_data/hg002_base.vcf.gz", "comp": "/data/repo_utils/test_files/refine_data/hg002_comp.vcf.gz", "output": "test_results/refine_output_two", "includebed": "/data/repo_utils/test_files/refine_data/h1_hc_tr_hg002.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz new file mode 100644 index 00000000..e132884d Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz.tbi new file mode 100644 index 00000000..47ccbd36 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab.output.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/candidate.refine.bed b/repo_utils/answer_key/refine/refine_output_two/phab_bench/candidate.refine.bed new file mode 100644 index 00000000..1d31809d --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/phab_bench/candidate.refine.bed @@ -0,0 +1,2 @@ +chr20 5040476 5040477 +chr20 32723044 32723045 \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz new file mode 100644 index 00000000..fe8f9b3e Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz.tbi new file mode 100644 index 00000000..426f818b Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fn.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz new file mode 100644 index 00000000..9f8ea569 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz.tbi new file mode 100644 index 00000000..a74137fb Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/params.json b/repo_utils/answer_key/refine/refine_output_two/phab_bench/params.json new file mode 100644 index 00000000..4023be31 --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/phab_bench/params.json @@ -0,0 +1 @@ +{"base": "test_results/refine_output_two/phab.output.vcf.gz", "comp": "test_results/refine_output_two/phab.output.vcf.gz", "output": "test_results/refine_output_two/phab_bench", "includebed": "/tmp/rbha7u9j.bed", "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.7, "minhaplen": 50, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "chunksize": 1000, "bSample": "syndip", "cSample": "p:HG002", "dup_to_ins": false, "sizemin": 5, "sizefilt": 5, "sizemax": 50000, "passonly": false, "no_ref": "a", "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/summary.json b/repo_utils/answer_key/refine/refine_output_two/phab_bench/summary.json new file mode 100644 index 00000000..8d850191 --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/phab_bench/summary.json @@ -0,0 +1,47 @@ +{ + "TP-base": 263, + "TP-comp": 263, + "FP": 2, + "FN": 1, + "precision": 0.9924528301886792, + "recall": 0.9962121212121212, + "f1": 0.9943289224952742, + "base cnt": 264, + "comp cnt": 265, + "TP-comp_TP-gt": 263, + "TP-comp_FP-gt": 0, + "TP-base_TP-gt": 263, + "TP-base_FP-gt": 0, + "gt_concordance": 1.0, + "gt_matrix": { + "(0, 1)": { + "(1, 0)": 106 + }, + "(1, 0)": { + "(0, 1)": 112 + }, + "(1, 1)": { + "(1, 1)": 45 + } + }, + "weighted": { + "sequence": { + "TP": 262.9908000230789, + "FP": 2.0091999769210815, + "FN": 1.0091999769210815, + "precision": 0.9924181132946375, + "recall": 0.9961772728146929, + "f1": 0.9942941399738333, + "total": 266 + }, + "size": { + "TP": 263.0, + "FP": 2.0, + "FN": 1.0, + "precision": 0.9924528301886792, + "recall": 0.9962121212121212, + "f1": 0.9943289224952742, + "total": 266 + } + } +} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz new file mode 100644 index 00000000..08484ee2 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz.tbi new file mode 100644 index 00000000..cc1fb34c Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-base.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz new file mode 100644 index 00000000..58afeef9 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz.tbi new file mode 100644 index 00000000..cc1fb34c Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/phab_bench/tp-comp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/refine.log.txt b/repo_utils/answer_key/refine/refine_output_two/refine.log.txt new file mode 100644 index 00000000..885798db --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/refine.log.txt @@ -0,0 +1,28 @@ +2023-12-19 15:34:05,536 [INFO] Truvari v4.2.0rc1 +2023-12-19 15:34:05,537 [INFO] Command /data/truvari/__main__.py refine -u -U -r repo_utils/test_files/refine_data/h2_hc_tr_hg002.bed -f repo_utils/test_files/refine_data/chr20.fa.gz test_results/refine_output_two +2023-12-19 15:34:05,539 [INFO] Params: +{ + "benchdir": "test_results/refine_output_two", + "reference": "repo_utils/test_files/refine_data/chr20.fa.gz", + "regions": "repo_utils/test_files/refine_data/h2_hc_tr_hg002.bed", + "use_original_vcfs": true, + "use_region_coords": true, + "recount": false, + "threads": 4, + "align": "mafft", + "mafft_params": "--auto --thread 1", + "debug": false +} +2023-12-19 15:34:05,540 [INFO] Setting up regions +2023-12-19 15:34:05,618 [INFO] 247 --regions reduced to 129 after intersecting with 225 from --includebed +2023-12-19 15:34:06,241 [INFO] 52 regions to be refined +2023-12-19 15:34:06,275 [INFO] Preparing regions +2023-12-19 15:34:06,283 [INFO] Extracting haplotypes +2023-12-19 15:34:06,821 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:34:06,823 [WARNING] /usr/local/lib/python3.10/dist-packages/coverage/control.py:883: CoverageWarning:No data was collected. (no-data-collected) +2023-12-19 15:34:06,859 [INFO] Harmonizing variants +2023-12-19 15:34:08,877 [INFO] Running bench +2023-12-19 15:34:08,927 [INFO] Including 52 bed regions +2023-12-19 15:34:09,801 [INFO] Zipped 2598 variants Counter({'base': 1299, 'comp': 1299}) +2023-12-19 15:34:09,802 [INFO] 53 chunks of 2598 variants Counter({'__filtered': 2069, 'comp': 265, 'base': 264}) +2023-12-19 15:34:10,338 [INFO] Finished refine diff --git a/repo_utils/answer_key/refine/refine_output_two/refine.region_summary.json b/repo_utils/answer_key/refine/refine_output_two/refine.region_summary.json index de10db40..2125f6d6 100644 --- a/repo_utils/answer_key/refine/refine_output_two/refine.region_summary.json +++ b/repo_utils/answer_key/refine/refine_output_two/refine.region_summary.json @@ -1,18 +1,18 @@ { - "TP": 118, - "TN": 3, - "FP": 7, + "TP": 121, + "TN": 2, + "FP": 5, "FN": 1, - "base P": 121, - "base N": 8, + "base P": 122, + "base N": 7, "comp P": 126, "comp N": 3, - "PPV": 0.9365079365079365, - "TPR": 0.9752066115702479, - "TNR": 0.375, - "NPV": 1.0, - "ACC": 0.937984496124031, - "BA": 0.6751033057851239, - "F1": 0.9554655870445344, + "PPV": 0.9603174603174603, + "TPR": 0.9918032786885246, + "TNR": 0.2857142857142857, + "NPV": 0.6666666666666666, + "ACC": 0.9534883720930233, + "BA": 0.6387587822014051, + "F1": 0.9758064516129032, "UND": 0 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt index 1d4dd76e..fc8fd4c9 100644 --- a/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt @@ -33,13 +33,13 @@ chr20 22882702 22882830 2 2 0 0 False 2 2 0 0 TP chr20 23155504 23155975 3 3 2 2 True 5 5 0 0 TP chr20 23267965 23268087 2 2 0 0 False 2 2 0 0 TP chr20 23942431 23942955 1 1 0 0 False 1 1 0 0 TP -chr20 24681988 24682277 2 2 1 0 True 1 1 0 1 FP +chr20 24681988 24682277 2 2 1 0 True 2 2 0 0 TP chr20 24836081 24836384 2 2 0 0 False 2 2 0 0 TP chr20 24956961 24957198 2 2 0 0 False 2 2 0 0 TP chr20 25067059 25068112 3 3 0 0 False 3 3 0 0 TP chr20 25561369 25562321 2 2 0 0 False 2 2 0 0 TP chr20 31689079 31689527 2 2 0 0 False 2 2 0 0 TP -chr20 32722424 32723212 0 0 1 0 True 0 0 0 0 TN +chr20 32722424 32723212 0 0 1 0 True 0 0 1 0 FN chr20 32723469 32724637 0 0 0 0 False 0 0 0 0 TN chr20 32724927 32724999 0 0 0 0 False 0 0 0 0 TN chr20 34205619 34205960 1 1 0 0 False 1 1 0 0 TP @@ -97,7 +97,7 @@ chr20 62830612 62830838 2 2 1 1 True 2 2 0 0 TP chr20 62875172 62875556 2 2 3 0 True 3 3 0 0 TP chr20 62986310 62986805 2 2 0 0 False 2 2 0 0 TP chr20 63027997 63028166 2 2 1 0 True 4 4 0 0 TP -chr20 63028876 63029402 2 2 0 1 True 1 1 0 1 FP +chr20 63028876 63029402 2 2 0 1 True 2 2 0 0 TP chr20 63049033 63049412 3 3 1 0 True 2 2 0 0 TP chr20 63068409 63068799 2 2 0 0 False 2 2 0 0 TP chr20 63103249 63103492 1 1 0 0 False 1 1 0 0 TP @@ -125,6 +125,6 @@ chr20 64090704 64091389 0 0 0 2 False 0 0 0 2 FP chr20 64096658 64097164 0 0 0 2 False 0 0 0 2 FP chr20 64125109 64127974 3 3 3 0 True 12 12 0 0 TP chr20 64131804 64133955 5 5 8 1 True 24 24 0 0 TP -chr20 64134883 64136386 3 3 1 0 True 4 4 1 0 FN -chr20 64173409 64176529 4 4 7 3 True 18 18 0 0 TP +chr20 64134883 64136386 3 3 1 0 True 4 4 0 0 TP +chr20 64173409 64176529 4 4 7 3 True 17 17 0 0 TP chr20 64233048 64233545 2 2 0 0 False 2 2 0 0 TP diff --git a/repo_utils/answer_key/refine/refine_output_two/refine.variant_summary.json b/repo_utils/answer_key/refine/refine_output_two/refine.variant_summary.json index 6a256452..24067482 100644 --- a/repo_utils/answer_key/refine/refine_output_two/refine.variant_summary.json +++ b/repo_utils/answer_key/refine/refine_output_two/refine.variant_summary.json @@ -1,11 +1,11 @@ { - "TP-base": 405, - "TP-comp": 405, - "FP": 12, + "TP-base": 406, + "TP-comp": 406, + "FP": 10, "FN": 1, - "precision": 0.9712230215827338, - "recall": 0.9975369458128078, - "f1": 0.9842041312272175, - "base cnt": 406, - "comp cnt": 417 + "precision": 0.9759615384615384, + "recall": 0.9975429975429976, + "f1": 0.9866342648845686, + "base cnt": 407, + "comp cnt": 416 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/summary.json b/repo_utils/answer_key/refine/refine_output_two/summary.json new file mode 100644 index 00000000..7c012dba --- /dev/null +++ b/repo_utils/answer_key/refine/refine_output_two/summary.json @@ -0,0 +1,52 @@ +{ + "TP-base": 387, + "TP-comp": 387, + "FP": 64, + "FN": 200, + "precision": 0.8580931263858093, + "recall": 0.6592844974446337, + "f1": 0.745664739884393, + "base cnt": 587, + "comp cnt": 451, + "TP-comp_TP-gt": 382, + "TP-comp_FP-gt": 5, + "TP-base_TP-gt": 382, + "TP-base_FP-gt": 5, + "gt_concordance": 0.9870801033591732, + "gt_matrix": { + "(1, 0)": { + "(0, 1)": 203, + "(1, 0)": 13, + "(1, 1)": 1 + }, + "(0, 1)": { + "(1, 0)": 146, + "(0, 1)": 11 + }, + "(1, 1)": { + "(1, 1)": 9, + "(1, 0)": 2, + "(0, 1)": 2 + } + }, + "weighted": { + "sequence": { + "TP": 500.5166001608595, + "FP": 39.91689974069595, + "FN": 87.50939983222634, + "precision": 0.9261391091633528, + "recall": 0.8511810705083529, + "f1": 0.88707942147253, + "total": 604 + }, + "size": { + "TP": 485.1211999114603, + "FP": 44.722199864685535, + "FN": 102.43430008925498, + "precision": 0.9155935510689002, + "recall": 0.8256602140748741, + "f1": 0.8683044166382059, + "total": 604 + } + } +} \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz new file mode 100644 index 00000000..22990202 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz.tbi new file mode 100644 index 00000000..8e71cb40 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/tp-base.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz b/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz new file mode 100644 index 00000000..58ac980f Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz differ diff --git a/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz.tbi new file mode 100644 index 00000000..3e2f5910 Binary files /dev/null and b/repo_utils/answer_key/refine/refine_output_two/tp-comp.vcf.gz.tbi differ diff --git a/truvari/phab.py b/truvari/phab.py index 568cb36d..a8b8186b 100644 --- a/truvari/phab.py +++ b/truvari/phab.py @@ -93,6 +93,7 @@ def incorporate(consensus_sequence, entry, correction): consensus_sequence[position:position + ref_len] = list(entry.alts[0]) return correction + (alt_len - ref_len) + def make_consensus(data, ref_fn): """ Creates consensus sequence from variants @@ -120,11 +121,11 @@ def make_consensus(data, ref_fn): # Checks - doesn't overlap previous position correction[1] = incorporate(haps[1], entry, correction[1]) # turn into fasta. - ret[ref] = f">{o_samp}_1_{ref}\n{''.join(haps[0])}\n>{o_samp}_2_{ref}\n{''.join(haps[1])}\n".encode() + ret[ref] = f">{o_samp}_1_{ref}\n{''.join(haps[0])}\n>{o_samp}_2_{ref}\n{''.join(haps[1])}\n".encode( + ) return ret - def make_haplotype_jobs(base_vcf, bSamples=None, comp_vcf=None, cSamples=None, prefix_comp=False): """ Sets up sample parameters for extract haplotypes @@ -173,22 +174,6 @@ def fasta_reader(fa_str, name_entries=True): yield cur_name, cur_entry.read() -def extract_haplotypes(data, ref_fn): - """ - Deprecated - bcftools consensus isn't great - Given a data tuple of VCF, sample name, and prefix bool - Call bcftools consensus - Returns list of tuples [location, fastaentry] for every haplotype, - so locations are duplicated - """ - vcf_fn, sample, prefix, hap = data - prefix = 'p:' if prefix else '' - cmd = (f"--sample {sample} --prefix {prefix}{sample}_{hap}_ " - f"-H{hap} -f {ref_fn} {vcf_fn}").split(' ') - # Can't return generator from process - return list(fasta_reader(bcftools.consensus(*cmd))) - - def collect_haplotypes(ref_haps_fn, hap_jobs, threads): """ Calls extract haplotypes for every hap_job