Merge pull request #227 from Urban-Analytics/nm_sensitivity_testing

Nm sensitivity testing
Urban-Analytics · Dec 1, 2020 · 64619df · 64619df
2 parents 576f99b + 92f75b5
commit 64619df
Show file tree

Hide file tree

Showing 18 changed files with 6,972 additions and 2,314 deletions.
diff --git a/experiments/sensitivity_analysis/abc.db → experiments/calibration/abc.db b/experiments/sensitivity_analysis/abc.db → experiments/calibration/abc.db
diff --git a/experiments/calibration/abc.ipynb b/experiments/calibration/abc.ipynb
diff --git a/experiments/calibration/abc2.db b/experiments/calibration/abc2.db
diff --git a/...ments/sensitivity_analysis/abc_test.ipynb → experiments/calibration/abc_test.ipynb b/...ments/sensitivity_analysis/abc_test.ipynb → experiments/calibration/abc_test.ipynb
diff --git a/experiments/calibration/calibration.ipynb b/experiments/calibration/calibration.ipynb
diff --git a/experiments/calibration/calibration.py b/experiments/calibration/calibration.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # Sensitivity Analysis with the OpenCL RAMP model
+
+# ### Import opencl modules
+
+# In[1]:
+
+
+import multiprocessing as mp
+import numpy as np
+import yaml # pyyaml library for reading the parameters.yml file
+import os
+import pandas as pd
+import unittest
+import pickle
+import copy
+import random
+import matplotlib.pyplot as plt
+import scipy.stats as stats
+
+from microsim.opencl.ramp.run import run_headless
+from microsim.opencl.ramp.snapshot_convertor import SnapshotConvertor
+from microsim.opencl.ramp.snapshot import Snapshot
+from microsim.opencl.ramp.params import Params, IndividualHazardMultipliers, LocationHazardMultipliers
+from microsim.opencl.ramp.simulator import Simulator
+from microsim.opencl.ramp.disease_statuses import DiseaseStatus
+
+import sys
+sys.path.append('..')
+#import experiments_functions  # For the ones outside the class
+from opencl_runner import OpenCLRunner # Some additional notebook-specific functions required (functions.py)
+
+# Useful for connecting to this kernel
+#%connect_info
+
+
+# ### Setup params for all runs
+
+# Read the parameters file
+
+# Prepare the parameters for the OpenCL model. (See [main.py](https://github.com/Urban-Analytics/RAMP-UA/blob/052861cc51be5bc1827c85bb827209f0df73c685/microsim/main.py#L262) for an example of how this is done in the code). 
+
+# In[2]:
+
+
+PARAMETERS_FILE = os.path.join("../../","model_parameters", "default.yml")
+PARAMS = OpenCLRunner.create_parameters(parameters_file=PARAMETERS_FILE)
+
+
+# ### Get snapshot path
+# **NB** this is the path to the OpenCL snapshot file generated by running `microsim/main.py`. You need to initilaise the model at least once to create the snapshot. The following says 'run in opencl mode and stop once initialisation has finished':
+# 
+# ```
+# python microsim/main.py -ocl -init
+# ```
+
+# In[3]:
+
+
+OPENCL_DIR = "../../microsim/opencl"
+SNAPSHOT_FILEPATH = os.path.join(OPENCL_DIR, "snapshots", "cache.npz")
+assert os.path.isfile(SNAPSHOT_FILEPATH), f"Snapshot doesn't exist: {SNAPSHOT_FILEPATH}"
+
+
+# ## Observation Data
+# 
+# Read the real observations (number of hospital admissions in Devon) that will be used to calibrate the model. See the [README](./observation_data/README.md) for information about how these observations were obtained. They aren't the raw cases, it's actually a model that was fitted to the lagged cases. They need to be made cumulative as this is how they will be compared to the model.
+
+# In[4]:
+
+
+# New per day:
+gam_cases = pd.read_csv(os.path.join("../../", "gam_cases.csv"), header=0, names=["Day", "Cases"], )
+
+# Cumulative
+OBSERVATIONS = pd.DataFrame( {"Day": gam_cases['Day'], "Cases": gam_cases.cumsum()['Cases']} )
+
+assert OBSERVATIONS.tail(1)['Cases'].values[0] == sum(gam_cases['Cases'])
+print(f"Total cases: {sum(gam_cases['Cases'])}")
+
+
+# ## Run default (manually calibrated) model
+#
+# This shows what happens with the 'default' (manually calibrated) model
+
+# In[5]:
+
+
+ITERATIONS = 100  # Number of iterations to run for
+NUM_SEED_DAYS = 10  # Number of days to seed the population
+USE_GPU = False
+STORE_DETAILED_COUNTS = False
+REPETITIONS = 5
+
+assert ITERATIONS < len(OBSERVATIONS),     f"Have more iterations ({ITERATIONS}) than observations ({len(OBSERVATIONS)})."
+
+# Initialise the class so that its ready to run the model.
+# This isn't actually necessary immediately as the `run_opencl_model_multi` function is a static method
+# so doesn't read any of the class parameters, but the init is necessary
+# for calibration later when some parameters can't be passed to the run function directly
+OpenCLRunner.init(
+    iterations = ITERATIONS,
+    repetitions = REPETITIONS,
+    observations = OBSERVATIONS,
+    use_gpu = USE_GPU,
+    store_detailed_counts = STORE_DETAILED_COUNTS,
+    parameters_file = PARAMETERS_FILE,
+    opencl_dir = OPENCL_DIR,
+    snapshot_filepath = SNAPSHOT_FILEPATH
+)
+
+
+# In[6]:
+
+
+
+# #### Approximate Bayesian Computation - Multiple Parameters
+#
+# As above, but this time with multiple parameters
+
+# In[7]:
+
+
+import pyabc
+# Also quieten down the pyopencl info messages (just print errors)
+import logging
+logging.getLogger("pyopencl").setLevel(logging.ERROR)
+
+# Also need a new distance function that extracts the data from dataframes.
+def distance(sim,obs):
+    fit = OpenCLRunner.fit_l2(sim["data"],obs["data"])
+    print(fit)
+    return fit
+
+
+# Define the priors. This time make them all normal distributions, but will decorate them later to make sure they are positive. (_For some reason there is an error thrown if they are decorated first_) 
+
+# In[122]:
+
+
+current_risk_beta_rv = pyabc.RV("norm", 0.05, 0.5)
+presymptomatic_rv = pyabc.RV("norm", 0.5, 0.5)
+asymptomatic_rv = pyabc.RV("norm", 0.5, 0.5)
+symptomatic_rv = pyabc.RV("norm", 0.5, 0.5)
+
+# Note, could create the distribution here (currently done below), then plot the priors directly using, e.g. 
+#   y= p riors['current_risk_beta_prior'].pdf(x)
+# but for some reason decorating them with the LowerBoundDecorator breaks the call to pdf()
+
+x = np.linspace(-0 ,5, 150)
+lines = plt.plot(x, pyabc.Distribution(param=current_risk_beta_rv).pdf({"param": x}),
+                 label = "current_risk_beta", lw = 3)
+lines = plt.plot(x, pyabc.Distribution(param=presymptomatic_rv).pdf({"param": x}),
+                 label = "presymptomatic_prior", lw = 3)
+lines = plt.plot(x, pyabc.Distribution(param=asymptomatic_rv).pdf({"param": x}),
+                 label = "asymptomatic_prior", lw = 3)
+lines = plt.plot(x, pyabc.Distribution(param=symptomatic_rv).pdf({"param": x}),
+                 label = "symptomatic_prior", lw = 3)
+
+plt.autoscale(tight=True)
+
+plt.axvline(x=0.5, ls='--', color="black", label="x=1")
+plt.title("Priors")
+plt.ylim(0,1)
+plt.legend(title=r"$\alpha$ parameter");
+
+
+# In[125]:
+
+
+# Decorate the RVs so that they wont go below 0 and create the prior distribution 
+
+priors = pyabc.Distribution(
+    current_risk_beta = pyabc.LowerBoundDecorator(current_risk_beta_rv, 0.0),
+    presymptomatic = pyabc.LowerBoundDecorator(presymptomatic_rv, 0.0),
+    asymptomatic = pyabc.LowerBoundDecorator(asymptomatic_rv, 0.0),
+    symptomatic = pyabc.LowerBoundDecorator(symptomatic_rv, 0.0)
+    )
+
+#current_risk_beta_prior = pyabc.LowerBoundDecorator(current_risk_beta_prior, 0.0)
+#presymptomatic_prior = pyabc.LowerBoundDecorator(presymptomatic_prior, 0.0)
+#asymptomatic_prior = pyabc.LowerBoundDecorator(asymptomatic_prior, 0.0)
+#symptomatic_prior = pyabc.LowerBoundDecorator(symptomatic_prior, 0.0)
+
+
+# Define the ABC algorithm. **NOTE: I have had to define one model for each prior, not sure what the implications of this are, e.g. if the interations between the parameters are nonlinear**
+
+# In[124]:
+
+
+abc = pyabc.ABCSMC(
+    models=OpenCLRunner.run_model_with_params_abc, # Model (could be a list)
+    parameter_priors=priors, # Priors (could be a list)
+    distance_function=distance,  # Distance function
+    sampler = pyabc.sampler.SingleCoreSampler()  # Single core because the model is parallelised
+    )
+
+
+# Define observations
+
+# In[74]:
+
+
+# 'Real' cumulative cases: 
+y_observed = OBSERVATIONS.loc[:ITERATIONS-1, "Cases"].values
+
+
+# Where to store results
+# 
+
+# In[ ]:
+
+
+db_path = ("sqlite:///" + os.path.join(".", "abc2.db"))
+
+run_id = abc.new(db_path, {"data": y_observed})  # (ID only matters if multiple runs stored is same DB)
+
+# Run ABC
+
+# In[ ]:
+
+
+# Only use 1 iteration for speed while testing
+#OpenCLRunner.update(repetitions=1)
+
+#history = abc.run(minimum_epsilon=.1, max_nr_populations=10)
+history = abc.run(max_nr_populations=5)
+
+#OpenCLRunner.update(repetitions=REPETITIONS)
+
+# The history object only works if it has the associated database too
+with open("./optimisation_result-abc2.pkl", "wb" ) as f:
+    pickle.dump( history, f)
+
+
+# Can load the history pickle file, but note that you will also need the sqlite database ('abc.db')
+
+# In[25]:
+
+
+with open( "./optimisation_result-abc2.pkl", "rb" ) as f:
+    history = pickle.load(f)
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# #### Machine learning based (neural) density estimation
+# 
+# Something that Sebastion Schmon is experimenting with that I want to try. SBI (simulation-based inference, https://www.mackelab.org/sbi/). I think the idea is that you train a neural network to learn the model, then use that to generate a posterior.
diff --git a/experiments/calibration/convert_nb.sh b/experiments/calibration/convert_nb.sh
@@ -0,0 +1,3 @@
+jupyter nbconvert --to script calibration.ipynb
+echo "Script converted, running ... "
+nohup python calibration.py &
diff --git a/...ivity_analysis/observation_data/README.md → ...ts/calibration/observation_data/README.md b/...ivity_analysis/observation_data/README.md → ...ts/calibration/observation_data/README.md
diff --git a/...analysis/observation_data/devon_cases.csv → ...ibration/observation_data/devon_cases.csv b/...analysis/observation_data/devon_cases.csv → ...ibration/observation_data/devon_cases.csv
diff --git a/...ity_analysis/observation_data/gam_cases.R → .../calibration/observation_data/gam_cases.R b/...ity_analysis/observation_data/gam_cases.R → .../calibration/observation_data/gam_cases.R
diff --git a/...s/observation_data/getUKCovidTimeSeries.R → ...n/observation_data/getUKCovidTimeSeries.R b/...s/observation_data/getUKCovidTimeSeries.R → ...n/observation_data/getUKCovidTimeSeries.R
diff --git a/experiments/opencl_runner.py b/experiments/opencl_runner.py
@@ -338,8 +338,17 @@ def run_model_with_params(cls, input_params: List, return_full_details=False):
             return fitness
 
     @classmethod
-    def run_model_with_params_abc(cls, input_params_dict: dict):
-        """TEMP to work with ABC. Parameters are passed in as a dictionary"""
+    def run_model_with_params_abc(cls, input_params_dict: dict, return_full_details=False):
+        """
+        TEMP to work with ABC. Parameters are passed in as a dictionary.
+
+        :param return_full_details: If True then rather than just returning the normal results,
+            it returns a tuple of the following:
+             (fitness value, simulated results, observations, the Params object, summaries list)
+        :return: The number of cumulative new infections per day (as a list value in a
+            dictionary as required by the pyabc package) unless return_full_details is True.
+        """
+
         if not cls.initialised:
             raise Exception("The OpenCLRunner class needs to be initialised first. "
                             "Call the OpenCLRunner.init() function")
@@ -363,9 +372,16 @@ def run_model_with_params_abc(cls, input_params_dict: dict):
         )
 
         summaries = [x[0] for x in results]
-        # Return the expexted counts in a dictionary
-        results = OpenCLRunner.get_cumulative_new_infections(summaries)
+        # Get the cumulative number of new infections per day (i.e. simulated results)
+        sim = OpenCLRunner.get_cumulative_new_infections(summaries)
         print(f"Ran Model. {str(input_params_dict)} ("
-              f"{[round(params.individual_hazard_multipliers[i],3) for i in [0,1,2] ]}) "
-              f"Sum result: {sum(results)}")
-        return {"data": results}
+              f"Sum result: {sum(sim)})")
+
+        if return_full_details:
+            # Can compare these to the observations to get a fitness
+            obs = cls.OBSERVATIONS.loc[:cls.ITERATIONS - 1, "Cases"].values
+            assert len(sim) == len(obs)
+            fitness = OpenCLRunner.fit_l2(sim, obs)
+            return fitness, sim, obs, params, summaries
+        else:  # Return the expected counts in a dictionary
+            return {"data": sim}
diff --git a/experiments/sensitivity_analysis/abc2.db b/experiments/sensitivity_analysis/abc2.db
diff --git a/experiments/sensitivity_analysis/convert_nb.sh b/experiments/sensitivity_analysis/convert_nb.sh
diff --git a/experiments/sensitivity_analysis/optimisation_result-abc.pkl b/experiments/sensitivity_analysis/optimisation_result-abc.pkl
diff --git a/experiments/sensitivity_analysis/optimisation_result-minimize.pkl b/experiments/sensitivity_analysis/optimisation_result-minimize.pkl