-
Notifications
You must be signed in to change notification settings - Fork 12
/
multinomial_dirichlet_model.py
executable file
·393 lines (314 loc) · 16.9 KB
/
multinomial_dirichlet_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# Python 3.5.2 |Anaconda 3.0 (64-bit)|
# -*- coding: utf-8 -*-
"""
Last edited: 15-03-2017
Author: Yannis Zachos ([email protected])
Forked by: Jeremias Knoblauch ([email protected])
Description: Implements the Multinomial Dirichlet (MD) model.
"""
import numpy as np
from probability_model import ProbabilityModel
from scipy.special import gammaln
import scipy.misc
import math
from cp_probability_model import CpModel
class MDGPModel(ProbabilityModel):
"""The Multinomial Dirichlet (MD) model.
Statistical model:
Let Y_t be a S1xS2 matrix of data, \LAMBDA a S1xS2 matrix of
normalised intensities, and \ALPHA a S1xS2 matrix of prior hyperparameters.
Let S1xS2 = k.
$Y_t,\LAMBDA \sim \mathcal{Multinomial}(\LAMBDA;n) *
\mathcal{Dirichlet}(\LAMBDA|\ALPHA)$
where n = sum_{i=1}^{k} Y_{it} for a fixed time t.
In the remainder, \ALPHA are called 'prior_alphas'.
Inputs at creation:
S1, S2: int;
give the dimensions of the lattice. The one-dimensional case
corresponds to S1=1 and S2=1.
prior_alphas: float numpy array; dimension S1xS2
stores the parameters of the prior. Default is a vector of 1s.
auto_prior_update: boolean
Indicates whether the prior_mean and prior_var should be updated
to the posterior expectation and posterior variance at each time
step. Default is False.
Internal Attributes:
sums: float numpy array; dimension (t+1)xS1xS2 at time t.
sum_{i=1}^{t} Y_{ij}
gives the sum for each of the S1*S2 series for each possible
run length r=0,1,...t-1, >t-1. at each time point t. Since this
model corresponds to the special case where we assume that there
is no spatial information, the sums numpy array is stored in a
lattice again. (I.e., has two dimensions)
This object stores at position r the posterior means corresponding
to the run length of r=0,1,...,t at each time t.
joint_log_probabilities: float numpy array; dimension t+1 at time t
stores the joint log probabilities (y_{1:t}, r_t| q) for this
model q and all potential run lengths r_t = 0,1,...t-1
model_log_evidence: float;
stores in a scalar the evidence for this model, i.e. the joint
probability (y_{1:t}| q) for this model q.
"""
def __init__(self, prior_alphas,
S1, S2, auto_prior_update=False):
"""Construct a naive MDGP probability model, providing a prior
as ...
NOTE: The Input must always be given as a lattices, i.e.
we have to input evenjoint_log_probabilities the one-dimensional model in 2-dimensional form.
If it is given in different form, simply convert it.
"""
"""initialize all the prior quantities"""
self.prior_alphas = prior_alphas.reshape(S1,S2)
self.auto_prior_update = auto_prior_update
"""initialize all the quantities that we compute through time"""
self.S1, self.S2 = S1, S2
self.sums = None
self.joint_log_probabilities = None
self.model_log_evidence = -np.inf
self.retained_run_lengths = None
self.has_lags = False
self.hyperparameter_optimization = False
self.generalized_bayes_rld = "kullback_leibler"
def initialization(self, y, cp_model, model_prior):
#print('----------------------INITIALISATION----------------------')
"""function which is only called ONCE, namely when the very first
lattice of observations y is obtained/observed. Called from the
'update_all_joint_log_probabilities' function within the Detector
object containing the MDGPModel object.
"""
"""Get the new observation into the right shape"""
y = y.reshape(self.S1, self.S2)
"""Evaluate the pmf corresponding to the observations, and
multiply by the *model_prior* of the corresponding model. The model
prior is stored by the Detector object, and passed from there to the
ProbabilityModel object only when the joint probabilities
are updated"""
""" sum_process is the sum_{i=1}^{k} y_{it}, where each y_{it} are counts of process i at time t"""
sum_process = sum(y)
updated_alphas = y + self.prior_alphas
""" Compute an ugly term included in the expression of the log marginal pmf.
See derivation of the marginal."""
""" Compute log marginal pmf - i.e. the log_evidence """
sum_updated_alphas = sum(updated_alphas)
positive = gammaln(sum_process+1) + gammaln(sum_updated_alphas) + sum(gammaln(y+updated_alphas))
negative = sum(gammaln(y+1)) + sum(gammaln(updated_alphas)) + gammaln(sum_process+sum_updated_alphas)
evidence = positive - negative
""" evidence is a singleton array and therefore use the flattened
version by selecting its first element."""
self.model_log_evidence = np.log(model_prior) + evidence[0]
"""Get the hazard/probability of CP using *cp_model* as passed
to the probability model from the containing Detector object. Obtain
the *joint_probabilities*. Use the 'pmf' function of the CP model only
at time 0. This 'pmf' function is only used this once and NOWHERE ELSE,
since the hazard function (i.e., the conditional probability of
r_t = r| r_{t-1} = r' can be used from here on"""
"""Ensure that we do not get np.log(0)=np.inf by perturbation"""
if cp_model.pmf_0(1) == 0:
epsilon = 0.005
else:
epsilon = 0
"""Get log-probs for r_1=0 or r_1>0. Typically, we assume that the
first observation corresponds to a CP (i.e. P(r_1 = 0) = 1),
but this need not be the case in general."""
r_equal_0 = (self.model_log_evidence +
np.log(cp_model.pmf_0(0) + epsilon))
r_larger_0 = (self.model_log_evidence +
np.log(cp_model.pmf_0(1)+ epsilon))
self.joint_log_probabilities = np.array([r_equal_0, r_larger_0])
self.retained_run_lengths = np.array([0,0])
"""Update the sufficient statistics.
For the sum, use the same entry as sufficient statistic for both,
r=0 and r>0, because we have no observation at t=0."""
self.sums = np.array([y,y])
def evaluate_predictive_log_distribution(self, y, t):
#print('----------------PREDICTIVE LOG DIST----------------')
"""Returns the log densities of *y* using the predictive posteriors
for all possible run-lengths r=0,1,...,t-1,>t-1 as currently stored by
virtue of the sufficient statistics.
See derivations for the expression of the posterior predictive.
The corresponding density is computed for all run-lengths and
returned in a np array"""
"""Get the new observation into the right shape"""
y = y.reshape(self.S1, self.S2)
""" sum_process is the sum_{i=1}^{k} y_{t,i}, where each y_{t,i} are counts of process i at time t"""
sum_process = sum(y)
updated_alphas = self.prior_alphas + self.sums
sum_updated_alphas = np.sum(updated_alphas,axis=1)
# print('t',t)
# print('y',y.shape)
# print('sum_process',sum_process.shape)
# print('updated_alphas',updated_alphas.shape)
# print('sum_updated_alphas',sum_updated_alphas.shape)
# print('y+updated_alphas',(y+updated_alphas).shape)
#
# print('gammaln(sum_process+1)',gammaln(sum_process+1).shape)
# print('gammaln(sum_updated_alphas)',gammaln(sum_updated_alphas).shape)
# print('np.sum(gammaln(y+updated_alphas))',np.sum(gammaln(y+updated_alphas),axis=1).shape)
# print('np.sum(gammaln(y+1))',np.sum(gammaln(y+1),axis=0).shape)
# print('np.sum(gammaln(updated_alphas))',np.sum(gammaln(updated_alphas),axis=1).shape)
# print('gammaln(sum_process+sum_updated_alphas)',gammaln(sum_process+sum_updated_alphas).shape)
positive = gammaln(sum_process+1) + gammaln(sum_updated_alphas) + np.sum(gammaln(y+updated_alphas),axis=1)
negative = np.sum(gammaln(y+1),axis=0) + np.sum(gammaln(updated_alphas),axis=1) + gammaln(sum_process+sum_updated_alphas)
#print('positive',positive)
#print('negative',negative.shape)
difference = positive - negative
#print('difference', difference)
"""" Flatten the posterior predictive to match the dimensions
of the corresponding object in the detector"""
#print('posterior_predictive',np.ndarray.flatten(difference).shape)
#print(np.ndarray.flatten(difference))
return np.ndarray.flatten(difference)
def evaluate_log_prior_predictive(self, y, t):
#print('----------------LOG PRIOR PREDICTIVE----------------')
"""Returns the prior log density of the predictive distribution
for all possible run-lengths r=0,1,...,t-1,>t-1.
This is the case when no other data has been processed.
First, reshape the data as always.
"""
y = y.reshape(self.S1, self.S2)
""" sum_process is the sum_{i=1}^{k} y_{i,t}, where each y_{i,t} are counts of process i at time t"""
sum_process = sum(y)
updated_alphas = self.prior_alphas + y
sum_updated_alphas = sum(updated_alphas)
""" Compute the prior predictive in the same fashion as the posterior
predictive but without using updated quantities, such as model parameters
and sufficient statistics."""
positive = gammaln(sum_process+1) + gammaln(sum_updated_alphas) + sum(gammaln(y+updated_alphas))
negative = sum(gammaln(y+1)) + sum(gammaln(updated_alphas)) + gammaln(sum_process+sum_updated_alphas)
difference = positive - negative
return np.ndarray.flatten(difference)
def update_predictive_distributions(self, y, t, r_evaluations = None):
#print('----------------UPDATE PREDICTIVE DIST----------------')
"""Takes the next observation, *y*, at time *t* and updates the
sufficient statistics/means corresponding to all potential
run-lengths r=0,1,...,t-1,>t-1.
Since the two entries corresponding to r=t-1 and r>t-1 will always
have the same sufficient statistic in this model, we only update the
entries r=0,1,...,t-1 and then double the last one.
"""
"""STEP 0: Get the new observation into the right shape"""
y = y.reshape(self.S1, self.S2)
"""STEP 1: SUMS
The sums are updated.
Note that this requires the knowledge of the true posterior
variances.
Update the sums from t-r to t to the sums from t-r to
t+1 and add the t+1 th observation as the sum for r=0."""
fac = self.retained_run_lengths[:,np.newaxis, np.newaxis]
self.sums += y*fac # for r > 0
self.sums = np.insert(self.sums, 0, y, axis=0) # for r = 0
"""STEP 2: Update retained_run_lengths in two steps: First by advancing
the run-lengths, second by adding the 0-run-length from this run."""
self.retained_run_lengths += 1
self.retained_run_lengths = np.insert(self.retained_run_lengths, 0, 0)
#print('self.retained_run_lengths.shape',self.retained_run_lengths.shape)
def get_posterior_expectation(self, t, r_list=None):
#print('----------------POSTERIOR EXPECTATION----------------')
""" NOTE: THE POSTERIOR EXPECTATION OF THE PARAMETERS IS BEING RETURNED"""
"""get the predicted value/expectation from the current posteriors
at time point t, for all possible run-lengths"""
"""get the weights for the prior. Last two entries of w are the same
by the usual r=t-1 and r>t-1 logic."""
#print('t',t,'prior_alpha',self.prior_alpha.shape,'sums',self.sums.shape)
num = self.prior_alphas + self.sums
den = np.sum(num)
if den == np.zeros(den.shape):
den += 0.005
output = num/den
#print(output)
#print('get_posterior_expectation.shape',output.shape)
#print('get_posterior_expectation',output[:,:,0])
return (output)
def get_posterior_variance(self, t, r_list=None):
#print('----------------POSTERIOR VARIANCE----------------')
""" NOTE: Needs to be debugged"""
"""get the predicted variance from the current posteriors at
time point t, for all possible run-lengths."""
"""STEP 0: store how many variance matrices you have to return"""
run_length_num = self.retained_run_lengths.shape[0]
"""STEP 1: Flatten each S1xS2 matrix into an S1*S2 vector"""
fac1 = self.prior_alphas + self.sums
fac2 = np.sum(fac1,axis=1)
fac3 = (fac2**2 * (fac2+1))[:,:,np.newaxis]
fac2 = fac2[:,:,np.newaxis]
#print('posterior_variance_fac1',fac1[t,:,:])
#print('posterior_variance_fac2',fac2)
#print('fac1',fac1)
#print('fac1.shape',fac1.shape)
#print('fac2',fac2)
#print('fac2.shape',fac2.shape)
#print('sum(np.ndarray.flatten(fac1[t]))',sum(np.ndarray.flatten(fac1)))
#print('fac3',fac3)
#print('fac3.shape',fac3.shape)
covariance = np.zeros((run_length_num,self.S1*self.S2,self.S1*self.S2))
#print('product',-np.transpose(fac1, (0,2,1))*fac1)
#print('product',(-np.transpose(fac1, (0,2,1))*fac1).shape)
#print('fac3',fac3)
#print('fac3',fac3.shape)
#covariance = -fac1 * np.transpose(fac1,(0,2,1)) * (1/fac3)
#print('covariance.shape',covariance.shape)
#print('fac1.shape',fac1.shape)
#print('covariance.shape',covariance.shape)
#print('covariance BEFORE',covariance)
for i in range(0,covariance.shape[0]):
for j in range(0,fac1.shape[1]):
for k in range(0,fac1.shape[2]):
#print('diagonal (',i,j,j,')',covariance[i,j,j])
#print('fac1[i,j,k]',fac1[i,j,k])
#covariance[i,j,j] += (fac1[i,j,k]*fac2)/fac3
#covariance[i,j,j] += (fac1[i,j,k]*fac2)/fac3
a = 0
#print('covariance',covariance)
#print('get_posterior_variance',covariance[t,:,:])
#print('get_posterior_variance.shape',covariance.shape)
#print(covariance[:,:,0])
#print(covariance)
#exit()
return(covariance)
def prior_update(self, t, r_list=None):
#print('----------------PRIOR UPDATE----------------')
"""update the prior expectation & variance to be the posterior
expectation and variances weighted by the run-length distribution"""
self.pred_exp = np.sum((self.get_posterior_expectation(t) *
(np.exp(self.joint_log_probabilities - self.model_log_evidence)
[:,np.newaxis, np.newaxis])), axis=0)
"""cannot use get_posterior_variance here, because that returns the
covariance matrix in the global format. I need it in the local (Naive)
format here though"""
posterior_variance = self.get_posterior_variance(t)
self.pred_var = np.sum(posterior_variance *
np.exp(self.joint_log_probabilities - self.model_log_evidence)
[:,np.newaxis, np.newaxis], axis=0)
"""finally, update the prior mean and prior var"""
self.prior_mean = self.pred_exp
self.prior_var = self.pred_var
def prior_log_density(self, y):
#print('----------------PRIOR LOG DENSITY----------------')
""" NOTE: HAS NOT BEEN DEBUGGED YET"""
"""Computes the log-density of *y* under the prior."""
"""Get the new observation into the right shape"""
y = y.reshape(self.S1, self.S2)
sum_process = sum(y)
updated_alphas = y + self.prior_alphas
num = math.log(math.gamma(sum_process + 1))
den = self.multiSum(gammaln(y+1))
term = np.sum(num-den)
""" Compute log marginal pmf - i.e. the log_evidence """
positive = gammaln(sum(self.prior_alphas)) + term + np.sum(gammaln(updated_alphas))
negative = sum(gammaln(self.prior_alphas)) + gammaln(self.S1*self.S2*sum_process + sum(self.prior_alphas))
#print('positive',positive)
#print('negative',negative)
evidence = positive - negative
#param = self.prior_alpha + y
#return (np.sum(stats.dirichlet.logpdf(y, param)))
#print('prior_log_density',evidence)
return evidence
def trimmer(self, kept_run_lengths):
#print(kept_run_lengths)
"""Trim the relevant quantities for the MDGP model"""
self.joint_log_probabilities = (self.joint_log_probabilities[kept_run_lengths,:,:])
#print('kept_run_lengths',kept_run_lengths)
#print('self.sums.shape',self.sums.shape)
self.sums = self.sums[kept_run_lengths,:,:]
self.retained_run_lengths = (self.retained_run_lengths[kept_run_lengths])
self.model_log_evidence = scipy.misc.logsumexp(self.joint_log_probabilities)