-
Notifications
You must be signed in to change notification settings - Fork 0
/
illinois_covid_19_curve_ml_model.py
114 lines (86 loc) · 3.23 KB
/
illinois_covid_19_curve_ml_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
"""Illinois COVID-19 Curve ML Model.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1EoWUyPP7sD8F6L5SW2h-tHDDW3AyKusP
"""
# Commented out IPython magic to ensure Python compatibility.
# Run on TensorFlow 2.x
# %tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
# Import relevant modules
import json, requests
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import PolynomialFeatures
# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
print("Imported modules.")
covid_data = json.loads(requests.get('https://www.dph.illinois.gov/sitefiles/COVIDHistoricalTestResults.json?nocache=1').text)
stateTestingResults = covid_data["state_testing_results"]["values"]
indices = np.arange(1, len(stateTestingResults))
changeInCasesByDay = []
for i in range(1, len(stateTestingResults)):
changeInCases = stateTestingResults[i]["confirmed_cases"] - stateTestingResults[i-1]["confirmed_cases"]
changeInCasesByDay.append(changeInCases)
train_df = pd.DataFrame({"Day": indices, "Change in Cases": changeInCasesByDay})
print("Imported data.")
# Creating dataset
x = list(train_df["Day"])
y = list(train_df["Change in Cases"])
# Converting to numpy arrays
x = np.array(x)
y = np.array(y)
# Scaling dataset
x = x/max(x)
y = y/max(y)
# Display dataset
plt.scatter(x,y)
plt.show()
print('Created dataset.')
def build_model(learning_rate):
model = tf.keras.Sequential()
model.add(keras.layers.Dense(units=1, input_shape=[5]))
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(optimizer=optimizer, loss='mean_squared_error')
return model
def train_model(model, feature, label, epochs, batch_size):
history = model.fit(x=feature, y=label, batch_size=batch_size, epochs=epochs, verbose=True)
trained_weight = model.get_weights()[0]
trained_bias = model.get_weights()[1]
hist_loss = history.history['loss']
return hist_loss, trained_weight, trained_bias
print('Created functions that build and train model.')
def plot_model_loss(hist_loss):
plt.plot(hist_loss)
plt.xlabel('Epochs')
plt.ylabel('MSE Loss')
plt.title('MSE Loss vs Epochs Run')
plt.show()
def plot_model(hist_loss, x_poly):
mse = hist_loss[-1]
y_hat = model.predict(x_poly)
plt.figure(figsize=(12,7))
plt.title('ML Curve Fit')
plt.scatter(x, y, label='Actual Data')
plt.plot(x, y_hat, color='red', label='Predicted Curve', linewidth=4.0)
plt.xlabel('$Days$', fontsize=20)
plt.ylabel('$Change in Cases$', fontsize=20)
plt.text(0, 0.70, 'MSE = {:.3f}'.format(mse), fontsize=20)
plt.grid(True)
plt.legend(fontsize=20)
plt.show()
print('Created functions that plots model and model loss.')
poly = PolynomialFeatures(degree=4)
x_poly = poly.fit_transform(x.reshape(-1,1))
learning_rate = 0.005
epochs = 750
batch_size = 10
model = build_model(learning_rate)
hist_loss, trained_weight, trained_bias = train_model(model, x_poly, y, epochs, batch_size)
plot_model_loss(hist_loss)
plot_model(hist_loss, x_poly)