Skip to content

Commit

Permalink
Merge pull request #227 from Urban-Analytics/nm_sensitivity_testing
Browse files Browse the repository at this point in the history
Nm sensitivity testing
  • Loading branch information
nickmalleson authored Dec 1, 2020
2 parents 576f99b + 92f75b5 commit 64619df
Show file tree
Hide file tree
Showing 18 changed files with 6,972 additions and 2,314 deletions.
File renamed without changes.
4,630 changes: 4,630 additions & 0 deletions experiments/calibration/abc.ipynb

Large diffs are not rendered by default.

Binary file added experiments/calibration/abc2.db
Binary file not shown.
File renamed without changes.
2,043 changes: 2,043 additions & 0 deletions experiments/calibration/calibration.ipynb

Large diffs are not rendered by default.

273 changes: 273 additions & 0 deletions experiments/calibration/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
#!/usr/bin/env python
# coding: utf-8

# # Sensitivity Analysis with the OpenCL RAMP model

# ### Import opencl modules

# In[1]:


import multiprocessing as mp
import numpy as np
import yaml # pyyaml library for reading the parameters.yml file
import os
import pandas as pd
import unittest
import pickle
import copy
import random
import matplotlib.pyplot as plt
import scipy.stats as stats

from microsim.opencl.ramp.run import run_headless
from microsim.opencl.ramp.snapshot_convertor import SnapshotConvertor
from microsim.opencl.ramp.snapshot import Snapshot
from microsim.opencl.ramp.params import Params, IndividualHazardMultipliers, LocationHazardMultipliers
from microsim.opencl.ramp.simulator import Simulator
from microsim.opencl.ramp.disease_statuses import DiseaseStatus

import sys
sys.path.append('..')
#import experiments_functions # For the ones outside the class
from opencl_runner import OpenCLRunner # Some additional notebook-specific functions required (functions.py)

# Useful for connecting to this kernel
#%connect_info


# ### Setup params for all runs

# Read the parameters file

# Prepare the parameters for the OpenCL model. (See [main.py](https://github.com/Urban-Analytics/RAMP-UA/blob/052861cc51be5bc1827c85bb827209f0df73c685/microsim/main.py#L262) for an example of how this is done in the code).

# In[2]:


PARAMETERS_FILE = os.path.join("../../","model_parameters", "default.yml")
PARAMS = OpenCLRunner.create_parameters(parameters_file=PARAMETERS_FILE)


# ### Get snapshot path
# **NB** this is the path to the OpenCL snapshot file generated by running `microsim/main.py`. You need to initilaise the model at least once to create the snapshot. The following says 'run in opencl mode and stop once initialisation has finished':
#
# ```
# python microsim/main.py -ocl -init
# ```

# In[3]:


OPENCL_DIR = "../../microsim/opencl"
SNAPSHOT_FILEPATH = os.path.join(OPENCL_DIR, "snapshots", "cache.npz")
assert os.path.isfile(SNAPSHOT_FILEPATH), f"Snapshot doesn't exist: {SNAPSHOT_FILEPATH}"


# ## Observation Data
#
# Read the real observations (number of hospital admissions in Devon) that will be used to calibrate the model. See the [README](./observation_data/README.md) for information about how these observations were obtained. They aren't the raw cases, it's actually a model that was fitted to the lagged cases. They need to be made cumulative as this is how they will be compared to the model.

# In[4]:


# New per day:
gam_cases = pd.read_csv(os.path.join("../../", "gam_cases.csv"), header=0, names=["Day", "Cases"], )

# Cumulative
OBSERVATIONS = pd.DataFrame( {"Day": gam_cases['Day'], "Cases": gam_cases.cumsum()['Cases']} )

assert OBSERVATIONS.tail(1)['Cases'].values[0] == sum(gam_cases['Cases'])
print(f"Total cases: {sum(gam_cases['Cases'])}")


# ## Run default (manually calibrated) model
#
# This shows what happens with the 'default' (manually calibrated) model

# In[5]:


ITERATIONS = 100 # Number of iterations to run for
NUM_SEED_DAYS = 10 # Number of days to seed the population
USE_GPU = False
STORE_DETAILED_COUNTS = False
REPETITIONS = 5

assert ITERATIONS < len(OBSERVATIONS), f"Have more iterations ({ITERATIONS}) than observations ({len(OBSERVATIONS)})."

# Initialise the class so that its ready to run the model.
# This isn't actually necessary immediately as the `run_opencl_model_multi` function is a static method
# so doesn't read any of the class parameters, but the init is necessary
# for calibration later when some parameters can't be passed to the run function directly
OpenCLRunner.init(
iterations = ITERATIONS,
repetitions = REPETITIONS,
observations = OBSERVATIONS,
use_gpu = USE_GPU,
store_detailed_counts = STORE_DETAILED_COUNTS,
parameters_file = PARAMETERS_FILE,
opencl_dir = OPENCL_DIR,
snapshot_filepath = SNAPSHOT_FILEPATH
)


# In[6]:



# #### Approximate Bayesian Computation - Multiple Parameters
#
# As above, but this time with multiple parameters

# In[7]:


import pyabc
# Also quieten down the pyopencl info messages (just print errors)
import logging
logging.getLogger("pyopencl").setLevel(logging.ERROR)

# Also need a new distance function that extracts the data from dataframes.
def distance(sim,obs):
fit = OpenCLRunner.fit_l2(sim["data"],obs["data"])
print(fit)
return fit


# Define the priors. This time make them all normal distributions, but will decorate them later to make sure they are positive. (_For some reason there is an error thrown if they are decorated first_)

# In[122]:


current_risk_beta_rv = pyabc.RV("norm", 0.05, 0.5)
presymptomatic_rv = pyabc.RV("norm", 0.5, 0.5)
asymptomatic_rv = pyabc.RV("norm", 0.5, 0.5)
symptomatic_rv = pyabc.RV("norm", 0.5, 0.5)

# Note, could create the distribution here (currently done below), then plot the priors directly using, e.g.
# y= p riors['current_risk_beta_prior'].pdf(x)
# but for some reason decorating them with the LowerBoundDecorator breaks the call to pdf()

x = np.linspace(-0 ,5, 150)
lines = plt.plot(x, pyabc.Distribution(param=current_risk_beta_rv).pdf({"param": x}),
label = "current_risk_beta", lw = 3)
lines = plt.plot(x, pyabc.Distribution(param=presymptomatic_rv).pdf({"param": x}),
label = "presymptomatic_prior", lw = 3)
lines = plt.plot(x, pyabc.Distribution(param=asymptomatic_rv).pdf({"param": x}),
label = "asymptomatic_prior", lw = 3)
lines = plt.plot(x, pyabc.Distribution(param=symptomatic_rv).pdf({"param": x}),
label = "symptomatic_prior", lw = 3)

plt.autoscale(tight=True)

plt.axvline(x=0.5, ls='--', color="black", label="x=1")
plt.title("Priors")
plt.ylim(0,1)
plt.legend(title=r"$\alpha$ parameter");


# In[125]:


# Decorate the RVs so that they wont go below 0 and create the prior distribution

priors = pyabc.Distribution(
current_risk_beta = pyabc.LowerBoundDecorator(current_risk_beta_rv, 0.0),
presymptomatic = pyabc.LowerBoundDecorator(presymptomatic_rv, 0.0),
asymptomatic = pyabc.LowerBoundDecorator(asymptomatic_rv, 0.0),
symptomatic = pyabc.LowerBoundDecorator(symptomatic_rv, 0.0)
)

#current_risk_beta_prior = pyabc.LowerBoundDecorator(current_risk_beta_prior, 0.0)
#presymptomatic_prior = pyabc.LowerBoundDecorator(presymptomatic_prior, 0.0)
#asymptomatic_prior = pyabc.LowerBoundDecorator(asymptomatic_prior, 0.0)
#symptomatic_prior = pyabc.LowerBoundDecorator(symptomatic_prior, 0.0)


# Define the ABC algorithm. **NOTE: I have had to define one model for each prior, not sure what the implications of this are, e.g. if the interations between the parameters are nonlinear**

# In[124]:


abc = pyabc.ABCSMC(
models=OpenCLRunner.run_model_with_params_abc, # Model (could be a list)
parameter_priors=priors, # Priors (could be a list)
distance_function=distance, # Distance function
sampler = pyabc.sampler.SingleCoreSampler() # Single core because the model is parallelised
)


# Define observations

# In[74]:


# 'Real' cumulative cases:
y_observed = OBSERVATIONS.loc[:ITERATIONS-1, "Cases"].values


# Where to store results
#

# In[ ]:


db_path = ("sqlite:///" + os.path.join(".", "abc2.db"))

run_id = abc.new(db_path, {"data": y_observed}) # (ID only matters if multiple runs stored is same DB)

# Run ABC

# In[ ]:


# Only use 1 iteration for speed while testing
#OpenCLRunner.update(repetitions=1)

#history = abc.run(minimum_epsilon=.1, max_nr_populations=10)
history = abc.run(max_nr_populations=5)

#OpenCLRunner.update(repetitions=REPETITIONS)

# The history object only works if it has the associated database too
with open("./optimisation_result-abc2.pkl", "wb" ) as f:
pickle.dump( history, f)


# Can load the history pickle file, but note that you will also need the sqlite database ('abc.db')

# In[25]:


with open( "./optimisation_result-abc2.pkl", "rb" ) as f:
history = pickle.load(f)


# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# #### Machine learning based (neural) density estimation
#
# Something that Sebastion Schmon is experimenting with that I want to try. SBI (simulation-based inference, https://www.mackelab.org/sbi/). I think the idea is that you train a neural network to learn the model, then use that to generate a posterior.
3 changes: 3 additions & 0 deletions experiments/calibration/convert_nb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
jupyter nbconvert --to script calibration.ipynb
echo "Script converted, running ... "
nohup python calibration.py &
30 changes: 23 additions & 7 deletions experiments/opencl_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,17 @@ def run_model_with_params(cls, input_params: List, return_full_details=False):
return fitness

@classmethod
def run_model_with_params_abc(cls, input_params_dict: dict):
"""TEMP to work with ABC. Parameters are passed in as a dictionary"""
def run_model_with_params_abc(cls, input_params_dict: dict, return_full_details=False):
"""
TEMP to work with ABC. Parameters are passed in as a dictionary.
:param return_full_details: If True then rather than just returning the normal results,
it returns a tuple of the following:
(fitness value, simulated results, observations, the Params object, summaries list)
:return: The number of cumulative new infections per day (as a list value in a
dictionary as required by the pyabc package) unless return_full_details is True.
"""

if not cls.initialised:
raise Exception("The OpenCLRunner class needs to be initialised first. "
"Call the OpenCLRunner.init() function")
Expand All @@ -363,9 +372,16 @@ def run_model_with_params_abc(cls, input_params_dict: dict):
)

summaries = [x[0] for x in results]
# Return the expexted counts in a dictionary
results = OpenCLRunner.get_cumulative_new_infections(summaries)
# Get the cumulative number of new infections per day (i.e. simulated results)
sim = OpenCLRunner.get_cumulative_new_infections(summaries)
print(f"Ran Model. {str(input_params_dict)} ("
f"{[round(params.individual_hazard_multipliers[i],3) for i in [0,1,2] ]}) "
f"Sum result: {sum(results)}")
return {"data": results}
f"Sum result: {sum(sim)})")

if return_full_details:
# Can compare these to the observations to get a fitness
obs = cls.OBSERVATIONS.loc[:cls.ITERATIONS - 1, "Cases"].values
assert len(sim) == len(obs)
fitness = OpenCLRunner.fit_l2(sim, obs)
return fitness, sim, obs, params, summaries
else: # Return the expected counts in a dictionary
return {"data": sim}
Binary file removed experiments/sensitivity_analysis/abc2.db
Binary file not shown.
3 changes: 0 additions & 3 deletions experiments/sensitivity_analysis/convert_nb.sh

This file was deleted.

Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 64619df

Please sign in to comment.