-
Notifications
You must be signed in to change notification settings - Fork 0
/
Data.py
135 lines (108 loc) · 3.6 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from collections import defaultdict
from Population import Population
class SimpleStat:
"""
a single population statistic
"""
def __init__(self, f=None):
"""
f is a function f(a,...) that computes the pairwise stat
from data a,b
"""
self.f = f
self.stats = {}
def __setitem__(self, pop, data):
if type(data) is np.float_ or type(data) is float:
self.stats[pop] = data
else:
self.stats[pop] = f(data)
def __getitem__(self,pop):
return self.stats[pop]
def __iter__(self):
return self.stats.__iter__()
class PWStat(SimpleStat):
def __init__(self, f=None):
"""
f is a function f(a,b,...) that computes the pairwise stat
from data a,b
"""
SimpleStat.__init__(self,f=f)
def __setitem__(self, pops, data):
"""there are two variations of this function:
if data is a tuple, it is assumed that the stat has to
be calculated from data[0], data[1]. Otherwise, the data
is directly assigned
"""
if type(data) is tuple:
self.stats[pops] = self.f(data[0], data[1])
else:
self.stats[pops] = data
def __getitem__(self, pops):
return self.stats[pops]
def __iter__(self):
return self.stats.__iter__()
class AntiCommutativePWStat(PWStat):
def __getitem__(self,pops):
if pops[::-1] in self.stats:
return - self.stats[ pops[::-1] ]
else:
return self.stats[pops]
class CommutativePWStat(PWStat):
def __getitem__(self,pops):
if pops[::-1] in self.stats:
return self.stats[ pops[::-1] ]
else:
return self.stats[pops]
def pw_psi(v1, v2):
""" calculates pw psi from allele frequency vectors v1 and v2
"""
psi, shared_snp = 0.0, 0
for i,snp1 in enumerate(v1):
snp2 = v2[i]
if snp1 and snp2:
shared_snp +=1
psi = snp[i] - snp[j]
if shared_snp == 0:
return 0
return psi / shared_snp
def psi_sum(pw_psi):
"""
returns a SimpleStat object representing the sum of psi for
a population, e.g. for ordering purposes
@param pw_psi : the pairwise psi vlaues
@type pw_psi: a PWStat object
"""
psi = defaultdict( lambda : 0)
for p1,p2 in pw_psi:
psi[p1] += pw_psi[p1,p2]
psi[p2] -= pw_psi[p1,p2]
return dict(psi)
def psi_sum_cluster(pw_psi, sList):
psi = defaultdict( lambda : 0)
for i,s1 in enumerate(sList):
for s2 in sList[i+1:]:
if s1.cluster == s2.cluster:
psi[s1.pop] += pw_psi[s1.pop, s2.pop]
psi[s2.pop] -= pw_psi[s1.pop, s2.pop]
else:
psi[s1.pop] += 0
psi[s2.pop] += 0
#print s1.cluster, s2.cluster
return dict(psi)
def mkHeterozygosityFun(n):
def Heterozygosity(v1):
"""calculates heterozygosity if v1 is the allele freq in [0;1]"""
h = 0
for i, f in enumerate(v1):
h += 2*f*(n-f)
return f/len(v1)
return Heterozygosity
def order(stat ,keyfun=lambda x:x):
"""
returns the order of the elements in a sorted list
@param stat: the statistic for which the order is to be established
@type stat: a SimpleStat object
"""
k, v = stat.keys(), stat.values()
if keyfun is None: keyfun = lambda x:x
return dict([(i[1][0],i[0]) for i in enumerate(sorted([i for i in stat.iteritems()],key=lambda i: keyfun(i[1])))])