-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcount_shared_vs_VAF.py
100 lines (86 loc) · 2.91 KB
/
count_shared_vs_VAF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 6 14:42:05 2019
@author: lpsmith
"""
import csv
import lucianSNPLibrary as lsl
mutation_file = "snv_plus_indels.twoPlus.20181030.csv"
def getPatientSampleMap():
patientSampleMap = {}
samplePatientMap = {}
callfile = open("calling_evidence.tsv", "r")
for line in callfile:
if "Patient" in line:
continue
(patient, sample) = line.rstrip().split()[0:2]
patientSampleMap[sample] = patient
if patient not in samplePatientMap:
samplePatientMap[patient] = []
samplePatientMap[patient].append(sample)
return patientSampleMap, samplePatientMap
mutations = {}
with open(mutation_file, 'r') as csvfile:
for lvec in csv.reader(csvfile):
if "DNANum" in lvec[0]:
continue
(sample, __, __, chr, pos, ref, alt, is_snv, is_2p) = lvec[0:9]
if (is_2p=="f"):
continue
ref = int(lvec[-2])
alt = int(lvec[-1])
VAF = alt/(ref+alt)
# if ("N" in sample):
# continue
if sample not in mutations:
mutations[sample] = {}
if chr not in mutations[sample]:
mutations[sample][chr] = {}
pos = int(pos)
mutations[sample][chr][pos] = (ref, alt, VAF)
(patientSampleMap, samplePatientMap) = getPatientSampleMap()
shared_high = 0
shared_low = 0
shared_both = 0
private_high = 0
private_low = 0
for patient in samplePatientMap:
patientmuts = {}
for sample in samplePatientMap[patient]:
for chr in mutations[sample]:
if chr not in patientmuts:
patientmuts[chr] = {}
for pos in mutations[sample][chr]:
if pos not in patientmuts[chr]:
patientmuts[chr][pos] = {}
patientmuts[chr][pos][sample] = mutations[sample][chr][pos]
for chr in patientmuts:
for pos in patientmuts[chr]:
samples = list(patientmuts[chr][pos].keys())
canon_alt = patientmuts[chr][pos][samples[0]][1]
nhigh = 0
nlow = 0
for sample in samples:
if patientmuts[chr][pos][sample][1] == canon_alt:
if patientmuts[chr][pos][sample][2] >= 0.25:
nhigh += 1
else:
nlow += 1
if nhigh + nlow == 1:
if nhigh==1:
private_high += 1
else:
private_low += 1
else:
if nhigh > 0 and nlow > 0:
shared_both +=1
elif nhigh > 0:
shared_high += 1
else:
shared_low += 1
print(str(shared_high), "shared high")
print(str(shared_low), "shared low")
print(str(shared_both), "shared both")
print(str(private_high), "private high")
print(str(private_low), "private low")