-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathwnngip.py
90 lines (78 loc) · 3.74 KB
/
wnngip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
'''
[1] van Laarhoven, Twan, Sander B. Nabuurs, and Elena Marchiori. "Gaussian interaction profile kernels for predicting drug-target interaction." Bioinformatics 27.21 (2011): 3036-3043.
[2] van Laarhoven, Twan, and Elena Marchiori. "Predicting drug-target interactions for new drug compounds using a weighted nearest neighbor profile." PloS one 8.6 (2013): e66952.
Default Parameters:
T = 0.7 (the parameter T in Section [2])
sigma = 1.0
alpha = 0.5
gamma = 1.0
'''
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics import precision_recall_curve, roc_curve
from sklearn.metrics import auc
class WNNGIP:
def __init__(self, T=0.7, sigma=1, alpha=0.5, gamma=1.0):
self.T = T # the decay parameter
self.sigma = sigma # the regularization parameter
self.alpha = alpha # the weight parameter used in combining different kernels
self.gamma = gamma # the bandwidth of the GIP kernel
def preprocess_wnn(self, R, S, train_inx, new_inx, drug=True):
for d in new_inx:
ii = np.argsort(S[d, train_inx])[::-1]
inx = train_inx[ii]
for i in xrange(inx.size):
w = self.T**(i)
if w >= 1e-4:
if drug:
R[d, :] += w*R[inx[i], :]
else:
R[:, d] += w*R[:, inx[i]]
else:
break
def rls_kron_train(self, R, Kd, Kt):
m, n = R.shape
ld, vd = np.linalg.eig(Kd)
lt, vt = np.linalg.eig(Kt)
vec = ld.reshape((ld.size, 1))*lt.reshape((1, lt.size))
vec = vec.reshape((1, vec.size))
x = vec*(1.0/(vec+self.sigma))
y = np.dot(np.dot(vt.T, R.T), vd)
y = y.reshape((1, y.size))
z = (x*y).reshape((n, m)) # need to check
self.predictR = np.dot(np.dot(vd, z.T), vt.T)
def kernel_combination(self, R, S, new_inx, bandwidth):
K = self.alpha*S+(1.0-self.alpha)*rbf_kernel(R, gamma=bandwidth)
K[new_inx, :] = S[new_inx, :]
K[:, new_inx] = S[:, new_inx]
return K
def fix_model(self, W, intMat, drugMat, targetMat, seed=None, epsilon=0.1):
R = W*intMat
m, n = intMat.shape
x, y = np.where(R > 0)
# Enforce the positive definite property of similarity matrix
drugMat = (drugMat+drugMat.T)/2 + epsilon*np.eye(m)
targetMat = (targetMat+targetMat.T)/2 + epsilon*np.eye(n)
train_drugs = np.array(list(set(x.tolist())), dtype=np.int32)
train_targets = np.array(list(set(y.tolist())), dtype=np.int32)
new_drugs = np.array(list(set(xrange(m)) - set(x.tolist())), dtype=np.int32)
new_targets = np.array(list(set(xrange(n)) - set(y.tolist())), dtype=np.int32)
drug_bw = self.gamma*m/len(x)
target_bw = self.gamma*n/len(x)
Kd = self.kernel_combination(R, drugMat, new_drugs, drug_bw)
Kt = self.kernel_combination(R.T, targetMat, new_targets, target_bw)
self.preprocess_wnn(R, drugMat, train_drugs, new_drugs, True)
self.preprocess_wnn(R, targetMat, train_targets, new_targets, False)
self.rls_kron_train(R, Kd, Kt)
def predict_scores(self, test_data, N):
inx = np.array(test_data)
return self.predictR[inx[:, 0], inx[:, 1]]
def evaluation(self, test_data, test_label):
scores = self.predictR[test_data[:, 0], test_data[:, 1]]
prec, rec, thr = precision_recall_curve(test_label, scores)
aupr_val = auc(rec, prec)
fpr, tpr, thr = roc_curve(test_label, scores)
auc_val = auc(fpr, tpr)
return aupr_val, auc_val
def __str__(self):
return "Model: RLSWNN, T:%s, sigma:%s, alpha:%s, gamma:%s" % (self.T, self.sigma, self.alpha, self.gamma)