-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathactive.py
191 lines (147 loc) · 5.73 KB
/
active.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/python
#coding:utf-8
import os;
import sys;
import common
import math
import random
import numpy as np
import scipy.sparse as sp
import logging,Logger
#active function list
#1. linear ( not active )
#2. sgmoid
#3. tanh
#4. rel
def logit(A):
l = A[ A >= 0.0 ]
A[A >= 0.0] = 1 / (1.0 + np.exp(-l))
s = A[ A < 0.0 ]
A[A < 0.0] = np.exp(s) / (1.0 + np.exp(s))
#A = 1 / (1+1/np.exp(A));
return A
## active
def active(A, active_type= common.act.sgmoid, idx = None):
if common.act.linear == active_type:
None;
elif common.act.sgmoid == active_type:
if sp.isspmatrix(A):#type(A) == type(sp.csr_matrix([[0]])):
#A.data = 1 / ( 1 + 1 / np.exp(A.data))
A.data = logit(A.data)
else:
#A = 1 / ( 1 + 1/np.exp(A) );
A = logit(A)
elif common.act.tanh == active_type:
A = np.tanh(A)
elif common.act.relu == active_type:
if sp.isspmatrix(A):
A.data[ A.data <0 ] = 0
else:
A[ A < 0 ] = 0;
else:
logger = logging.getLogger(Logger.project_name)
logger.error("Not recognized active function: %s"%active_type);
raise Exception("Not recognized active function: %s"%active_type);
return A;
#@loss_type: the loss function list
def loss(A, Y, loss_type = common.lo.negative_log_likelihood, idx = None):
#if idx not specified, full one
if None == idx:
idx = np.ones(Y.shape);
#negative_log_likelihood
#loss = -ylog(f(x)) - (1-y)log(1-f(x))
if common.lo.negative_log_likelihood == loss_type:
return np.sum( -Y * np.log(A) * idx - (1 - Y) * np.log(1-A) * idx );
# least_square
# loss = (y-f(x))^2
elif common.lo.least_square == loss_type:
return np.sum( (Y - A) * (Y - A) * idx );
# weighted_approximate_rank_pairwise
elif common.lo.weighted_approximate_rank_pairwise == loss_type:
return 0.0
# 如果你能看懂中文,就知道∩厦娴拇胧锹倚吹摹的意思
else:
logger = logging.getLogger(Logger.project_name);
logger.error("Not recognized loss function: %s"%loss_type);
raise Exception("Not recognized loss function: %s"%loss_type);
#@type. the grad() computes the gradient of the type function. \
# The type function may be the loss function or the active function
#@parameters.
def grad(A, Y = None, grad_type = common.grad.sgmoid_negative_log_likelihood):
########################## check
if ( common.grad.sgmoid_negative_log_likelihood == grad_type or \
common.grad.linear_least_square == grad_type or \
common.grad.linear_weighted_approximate_rank_pairwise == grad_type ) \
and None == Y:
logger = logging.getLogger(Logger.project_name);
logger.error("Y should not equals None when computing gradients"
" of loss function %s"%grad_type);
raise Exception("Y should not equal None when computing gradients of loss"
" function %s"%grad_type);
if None != Y:
m, n = Y.shape
m1, n1 = A.shape
if m != m1 or n != n1:
logger = logging.getLogger(Logger.project_name);
logger.error("Y.shape (%d,%d) != A.shape(%d,%d)"%(m,n,m1,n1))
raise Exception("Y.shape (%d,%d) != A.shape(%d,%d)"%(m,n,m1,n1))
############################ gradient of loss
if common.grad.sgmoid_negative_log_likelihood == grad_type:
if sp.isspmatrix(A):
nonzero = A.nonzero()
if len(nonzero[0])!= 0:
R = A.copy()
#print "in grad", type(R)
R.data = A.data - np.asarray(Y[A.nonzero()])[0,:]
return R
else:
return A
else:
return A - Y;
elif common.grad.linear_least_square == grad_type:
return 2*(A - Y);
elif common.grad.linear_weighted_approximate_rank_pairwise == grad_type:
grad_loss = np.zeros(A.shape)
c = Y.nonzero()
m,n = Y.shape
dic = dict()
for i in xrange(len(c[0])):
dic["%d_%d"%(c[0][i],c[1][i])] = 1
for i in xrange(len(c[0])):
x = c[0][i]
y = c[1][i]
try:
av = A[x][y]
except:
logger = logging.getLogger(Logger.project_name)
logger.waring("Index out of range in av=A[x][y]");
continue;
for j in xrange(100):
y1 = int(random.random()*n)
try:
av1 = A[x][y1]
except:
logger = logging.getLogger(Logger.project_name)
logger.waring("Index out of range in av1=A[x1][y1]");
continue
if "%d_%d"%(x,y1) in dic: continue
if av1 + 1 > av:
grad_loss[x][y] += -1.0
grad_loss[x][y1] += 1.0
break
return sp.csr_matrix(grad_loss)
# gradient of activation
elif common.grad.sgmoid == grad_type:
return A * (1 - A)
elif common.grad.linear == grad_type:
return np.ones(A.shape);
elif common.grad.tanh == grad_type:
return 1 - A * A;
elif common.grad.relu == grad_type:
A[A < 0] = 0;
A[A > 0] = 1;
return A;
else:
logger = logging.getLogger(Logger.project_name);
logger.info("Not recognized grad target function: %s"%grad_type);
raise Exception("Not recognized grad target function: %s"%grad_type);