-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_utils.py
119 lines (88 loc) · 3.88 KB
/
predict_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from data import data
def reject_outliers(data, m=1.):
""" Sets the value of outlayers to nan
Args:
data (np.array): the data from which to remove outlayers
m (float): threshold on outlayers as m * std_dev(data)
"""
standardized_data = np.abs(data - np.mean(data, axis=0))
outlayer_threshold = m * np.std(data, axis=0)
outlayers = standardized_data > outlayer_threshold
# data[ outlayers ] = standardized_data[outlayers]
data[ outlayers ] = np.nan
print('nr of outlayers: ', np.sum(outlayers))
return data
def predict(model, country, population, days_to_predict, params = None, std_dev=1.):
""" Use the epidemic model to predict what's next
Args:
model: one of the epidemic models
country: (str) which country to predict
population: (int) population of the country
days_to_predict: (int) nr of days to predict
params: (list) [OPTIONAL] containing the params of the model for each
timestep to predict
std_dev (float) standard deviation of uncertainty to plot in graph
"""
X, _, dates = data.getDataset(
init_population = population,
type=model.type(), country=country)
X = X[0,:,:] # first sample
# get the last day
S = X[-1, 0]
I = X[-1, 1]
R = X[-1, 2]
C = X[-1, 3]
# Average more predictions (models have bayesian params)
# TODO: this procedure can be vectorized
y_pred_accumulator = []
for _ in range(1000):
if params == None: # use default params of the model
y_pred = model.predict(S, I, R, C, timesteps=days_to_predict)
else: # Use different params, for each day
assert len(params) == days_to_predict
SIRC = [S, I, R, C]
y_pred = []
for d in range(days_to_predict):
model.set_params(**params[d])
SIRC = model.step(None, SIRC)
y_pred.append(SIRC)
y_pred = np.array(y_pred)
y_pred_accumulator.append(y_pred)
y_pred_accumulator = np.array(y_pred_accumulator)
y_pred_accumulator = reject_outliers(y_pred_accumulator, m=std_dev)
# Max, Min, Mean ignoring NaNs (--> outlayers)
worst_case = np.nanmax(y_pred_accumulator, axis=0)
best_case = np.nanmin(y_pred_accumulator, axis=0)
average_case = np.nanmean(y_pred_accumulator, axis=0)
timeseries_best = np.concatenate([X, best_case], axis=0)
timeseries_worst = np.concatenate([X, worst_case], axis=0)
timeseries_average = np.concatenate([X, average_case], axis=0)
# Adding the population to the values
timeseries_best *= population
timeseries_worst *= population
timeseries_average *= population
# add predicted values to dates
current = dates[-1]
for i in range(days_to_predict):
dates.append( current + timedelta(days = i+1))
dates = [d.strftime('%d/%m/%y') for d in dates]
plt.figure(figsize=(15,10))
plt.title('Italy - COVID-19')
# Plot average
plt.plot(timeseries_average[:,1], label='Infected', color='orange')
plt.plot(timeseries_average[:,2], label='Recovered', color='g')
plt.plot(timeseries_average[:,3], label='Casualties', color='r')
# "Confidence interval"
x = np.arange(0, len(timeseries_best))
plt.fill_between(x, timeseries_best[:,1], timeseries_worst[:,1], color='orange', alpha=0.2)
plt.fill_between(x, timeseries_best[:,2], timeseries_worst[:,2], color='g', alpha=0.2)
plt.fill_between(x, timeseries_best[:,3], timeseries_worst[:,3], color='r', alpha=0.2)
plt.xticks(x[::5], dates[::5], rotation=90)
plt.grid(True)
plt.legend()
plt.plot()
plt.savefig(str(country)+".png")
plt.show()