diff --git a/docs/__pycache__.html b/docs/__pycache__.html new file mode 100644 index 0000000..4b349a0 --- /dev/null +++ b/docs/__pycache__.html @@ -0,0 +1,235 @@ + + +
+ + + +1import matplotlib.pyplot as plt + 2import numpy as np + 3import scipy as sp + 4import emcee + 5import yaml + 6import ssme_function as ssme + 7import tellurium as te + 8import os + 9import shutil + 10import datetime + 11import logging + 12import corner + 13import utility_functions as uf + 14 + 15 + 16def run_sampler(config_fname): + 17 """ + 18 Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters. + 19 + 20 This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. + 21 The sampler is used to infer model parameters from observed data. The function also provides + 22 preliminary data visualization of the MCMC results, including trace plots, corner plots, and + 23 flux prediction plots. + 24 + 25 Args: + 26 config_fname (str): Path to the configuration YAML file. This file should contain all + 27 necessary settings and parameters for the run, including model file, + 28 data file, solver arguments, Bayesian inference settings, optimization + 29 settings, and random seed. + 30 + 31 Outputs: + 32 - Creates a new directory for the run, containing: + 33 - Copied model, data, and configuration files. + 34 - Log file with run details. + 35 - MCMC trace plot. + 36 - MCMC corner plot. + 37 - Flux prediction plot. + 38 - Flat samples and log likelihoods as CSV files. + 39 + 40 Notes: + 41 - The configuration file should be structured appropriately with all necessary fields. + 42 - The function uses the `emcee` library for MCMC sampling. + 43 - The function uses the `corner` library for corner plots. + 44 - The function assumes the use of the RoadRunner model for simulations. + 45 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 46 """ + 47 + 48 + 49 ##### Setup ##### + 50 # Load the config.yaml file + 51 with open(config_fname, "r") as f: + 52 config = yaml.safe_load(f) + 53 + 54 # get run configuration settings + 55 run_name = config["run_name"] + 56 model_file = config['model_file'] + 57 data_file = config['data_file'] + 58 simulation_kwargs = config['solver_arguments'] + 59 inference_settings = config['bayesian_inference'] + 60 optimization_settings = config['optimization'] + 61 seed = config['random_seed'] + 62 np.random.seed(seed) + 63 extended = config['bayesian_inference']['extended'] + 64 + 65 if extended == False: + 66 # get parameter names, values, and boundaries + 67 k_names = config['solver_arguments']['rate_constant_names'] + 68 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 69 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 70 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 71 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 72 p_bounds = list(zip(p_lb,p_ub)) + 73 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 74 k_nom = p_nom[:-1] # last parameter is noise sigma + 75 sigma_nom = p_nom[-1] + 76 + 77 # get assay initial conditions + 78 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 79 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 80 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 81 else: + 82 # get parameter names, values, and boundaries + 83 k_names = config['solver_arguments']['rate_constant_names'] + 84 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 85 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 86 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 87 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 88 p_bounds = list(zip(p_lb,p_ub)) + 89 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 90 k_nom = p_nom[:-6] # extended model, last 5 parameters are nuisance parameters + 91 sigma_nom = p_nom[-1] + 92 + 93 # get assay initial conditions + 94 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 95 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 96 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 97 + 98 # create new directory for runs + 99 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') +100 output_fname = f'run_{run_name}_r{seed}' +101 output_dir = output_fname + '_' + timestamp +102 os.mkdir(output_dir) +103 shutil.copy(model_file, output_dir) +104 shutil.copy(data_file, output_dir) +105 shutil.copy(config_fname, output_dir) +106 +107 # create logging file +108 logger = logging.getLogger('log') +109 logger.setLevel(logging.INFO) +110 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) +111 file_handler.setLevel(logging.INFO) +112 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +113 file_handler.setFormatter(log_format) +114 logger.addHandler(file_handler) +115 logger.info(f"output_dir: {output_dir}") +116 logger.info(f"seed: {seed}") +117 +118 # load roadrunner model and simulate assay at nominal values +119 rr_model = te.loadSBMLModel(model_file) +120 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +121 y_nom = data_nom[1] # (time, output) +122 +123 # Load data, plot trace, and calculate log-likelihood reference +124 y_obs = np.loadtxt(data_file, delimiter=',') +125 plt.figure(figsize=(8,6)) +126 plt.title('Net ion influx (M/s) vs simulation step') +127 plt.plot(y_nom, label='y_nom') +128 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +129 plt.legend() +130 plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png')) +131 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +132 logger.info(f"log_like_nom: {log_like_nom}") +133 +134 # set sampler parameters +135 n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers']) +136 n_steps = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_steps']) +137 n_dim = len(p_names) +138 thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin']) +139 burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in']) +140 logger.info(f"n_walkers: {n_walkers}") +141 logger.info(f"n_steps: {n_steps}") +142 logger.info(f"n_dim: {n_dim}") +143 logger.info(f"thin: {thin}") +144 logger.info(f"burn_in: {burn_in}") +145 +146 # get sampler starting points +147 p_0 = uf.get_p0(p_bounds, n_walkers) +148 +149 ##### Run Sampler ##### +150 if extended: +151 logger.info(f"extended") +152 sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper_extended, args=[rr_model, +153 y_obs, +154 initial_conditions, +155 initial_conditions_scale, +156 buffer_concentration_scale, +157 simulation_kwargs, +158 p_lb, +159 p_ub] +160 ) +161 else: +162 sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper, args=[rr_model, +163 y_obs, +164 initial_conditions, +165 initial_conditions_scale, +166 buffer_concentration_scale, +167 simulation_kwargs, +168 p_lb, +169 p_ub] +170 ) +171 logger.info(f"starting MCMC") +172 sampler.run_mcmc(p_0, n_steps, progress=True) +173 logger.info(f"finished MCMC") +174 +175 ##### Store data ##### +176 samples = sampler.get_chain(discard =burn_in, thin=thin) +177 flat_samples = sampler.get_chain(discard=burn_in, thin=thin, flat=True) +178 log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin) +179 flat_log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin, flat=True) +180 np.savetxt(os.path.join(output_dir, f'flat_samples.csv'), flat_samples, delimiter=',') +181 np.savetxt(os.path.join(output_dir, f'flat_log_likelihoods.csv'), flat_log_likelihoods, delimiter=',') +182 logger.info(f"saved samples and log likelihoods") +183 +184 ##### Preliminary data viz ##### +185 # MCMC trace plot +186 fig, axes = plt.subplots(n_dim, 1, figsize=(10, 12+(0.25*n_dim)), sharex=True) +187 labels = p_names +188 plt.suptitle('MCMC sampling trace') +189 for i in range(n_dim): +190 ax = axes[i] +191 ax.plot(samples[:, :, i], "k", alpha=0.3) +192 ax.set_ylabel(labels[i]) +193 ax.yaxis.set_label_coords(-0.1, 0.5) +194 ax.axhline(log10_p_nom[i], color='red') +195 axes[-1].set_xlabel("step number") +196 plt.savefig(os.path.join(output_dir, f'mcmc_trace.png')) +197 +198 # corner plot +199 fig = corner.corner( +200 flat_samples, labels=p_names, truths=log10_p_nom, range=p_bounds, +201 ) +202 plt.savefig(os.path.join(output_dir, f'mcmc_corner.png')) +203 +204 # flux prediction plot +205 plt.figure(figsize=(10,8)) +206 inds = np.random.randint(len(flat_samples), size=100) +207 for ind in inds: +208 p_sample = flat_samples[ind] +209 k_tmp = [10**i for i in p_sample[:-1]] +210 res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +211 y_tmp = res_tmp[1] +212 plt.plot(y_tmp, color='blue', alpha=0.1) +213 plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred') +214 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange') +215 plt.legend(fontsize=14) +216 plt.xlabel("time step") +217 plt.ylabel("net ion influx (M/s)"); +218 plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png')) +219 logger.info(f"plotted analysis") +220 +221 return None +222 +223 +224 +225if __name__ == '__main__': +226 +227 ##### Adjust this if needed ##### +228 config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml" +229 run_sampler(config_fname) +230 +
17def run_sampler(config_fname): + 18 """ + 19 Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters. + 20 + 21 This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. + 22 The sampler is used to infer model parameters from observed data. The function also provides + 23 preliminary data visualization of the MCMC results, including trace plots, corner plots, and + 24 flux prediction plots. + 25 + 26 Args: + 27 config_fname (str): Path to the configuration YAML file. This file should contain all + 28 necessary settings and parameters for the run, including model file, + 29 data file, solver arguments, Bayesian inference settings, optimization + 30 settings, and random seed. + 31 + 32 Outputs: + 33 - Creates a new directory for the run, containing: + 34 - Copied model, data, and configuration files. + 35 - Log file with run details. + 36 - MCMC trace plot. + 37 - MCMC corner plot. + 38 - Flux prediction plot. + 39 - Flat samples and log likelihoods as CSV files. + 40 + 41 Notes: + 42 - The configuration file should be structured appropriately with all necessary fields. + 43 - The function uses the `emcee` library for MCMC sampling. + 44 - The function uses the `corner` library for corner plots. + 45 - The function assumes the use of the RoadRunner model for simulations. + 46 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 47 """ + 48 + 49 + 50 ##### Setup ##### + 51 # Load the config.yaml file + 52 with open(config_fname, "r") as f: + 53 config = yaml.safe_load(f) + 54 + 55 # get run configuration settings + 56 run_name = config["run_name"] + 57 model_file = config['model_file'] + 58 data_file = config['data_file'] + 59 simulation_kwargs = config['solver_arguments'] + 60 inference_settings = config['bayesian_inference'] + 61 optimization_settings = config['optimization'] + 62 seed = config['random_seed'] + 63 np.random.seed(seed) + 64 extended = config['bayesian_inference']['extended'] + 65 + 66 if extended == False: + 67 # get parameter names, values, and boundaries + 68 k_names = config['solver_arguments']['rate_constant_names'] + 69 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 70 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 71 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 72 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 73 p_bounds = list(zip(p_lb,p_ub)) + 74 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 75 k_nom = p_nom[:-1] # last parameter is noise sigma + 76 sigma_nom = p_nom[-1] + 77 + 78 # get assay initial conditions + 79 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 80 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 81 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 82 else: + 83 # get parameter names, values, and boundaries + 84 k_names = config['solver_arguments']['rate_constant_names'] + 85 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 86 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 87 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 88 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 89 p_bounds = list(zip(p_lb,p_ub)) + 90 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 91 k_nom = p_nom[:-6] # extended model, last 5 parameters are nuisance parameters + 92 sigma_nom = p_nom[-1] + 93 + 94 # get assay initial conditions + 95 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 96 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 97 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 98 + 99 # create new directory for runs +100 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') +101 output_fname = f'run_{run_name}_r{seed}' +102 output_dir = output_fname + '_' + timestamp +103 os.mkdir(output_dir) +104 shutil.copy(model_file, output_dir) +105 shutil.copy(data_file, output_dir) +106 shutil.copy(config_fname, output_dir) +107 +108 # create logging file +109 logger = logging.getLogger('log') +110 logger.setLevel(logging.INFO) +111 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) +112 file_handler.setLevel(logging.INFO) +113 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +114 file_handler.setFormatter(log_format) +115 logger.addHandler(file_handler) +116 logger.info(f"output_dir: {output_dir}") +117 logger.info(f"seed: {seed}") +118 +119 # load roadrunner model and simulate assay at nominal values +120 rr_model = te.loadSBMLModel(model_file) +121 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +122 y_nom = data_nom[1] # (time, output) +123 +124 # Load data, plot trace, and calculate log-likelihood reference +125 y_obs = np.loadtxt(data_file, delimiter=',') +126 plt.figure(figsize=(8,6)) +127 plt.title('Net ion influx (M/s) vs simulation step') +128 plt.plot(y_nom, label='y_nom') +129 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +130 plt.legend() +131 plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png')) +132 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +133 logger.info(f"log_like_nom: {log_like_nom}") +134 +135 # set sampler parameters +136 n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers']) +137 n_steps = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_steps']) +138 n_dim = len(p_names) +139 thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin']) +140 burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in']) +141 logger.info(f"n_walkers: {n_walkers}") +142 logger.info(f"n_steps: {n_steps}") +143 logger.info(f"n_dim: {n_dim}") +144 logger.info(f"thin: {thin}") +145 logger.info(f"burn_in: {burn_in}") +146 +147 # get sampler starting points +148 p_0 = uf.get_p0(p_bounds, n_walkers) +149 +150 ##### Run Sampler ##### +151 if extended: +152 logger.info(f"extended") +153 sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper_extended, args=[rr_model, +154 y_obs, +155 initial_conditions, +156 initial_conditions_scale, +157 buffer_concentration_scale, +158 simulation_kwargs, +159 p_lb, +160 p_ub] +161 ) +162 else: +163 sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper, args=[rr_model, +164 y_obs, +165 initial_conditions, +166 initial_conditions_scale, +167 buffer_concentration_scale, +168 simulation_kwargs, +169 p_lb, +170 p_ub] +171 ) +172 logger.info(f"starting MCMC") +173 sampler.run_mcmc(p_0, n_steps, progress=True) +174 logger.info(f"finished MCMC") +175 +176 ##### Store data ##### +177 samples = sampler.get_chain(discard =burn_in, thin=thin) +178 flat_samples = sampler.get_chain(discard=burn_in, thin=thin, flat=True) +179 log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin) +180 flat_log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin, flat=True) +181 np.savetxt(os.path.join(output_dir, f'flat_samples.csv'), flat_samples, delimiter=',') +182 np.savetxt(os.path.join(output_dir, f'flat_log_likelihoods.csv'), flat_log_likelihoods, delimiter=',') +183 logger.info(f"saved samples and log likelihoods") +184 +185 ##### Preliminary data viz ##### +186 # MCMC trace plot +187 fig, axes = plt.subplots(n_dim, 1, figsize=(10, 12+(0.25*n_dim)), sharex=True) +188 labels = p_names +189 plt.suptitle('MCMC sampling trace') +190 for i in range(n_dim): +191 ax = axes[i] +192 ax.plot(samples[:, :, i], "k", alpha=0.3) +193 ax.set_ylabel(labels[i]) +194 ax.yaxis.set_label_coords(-0.1, 0.5) +195 ax.axhline(log10_p_nom[i], color='red') +196 axes[-1].set_xlabel("step number") +197 plt.savefig(os.path.join(output_dir, f'mcmc_trace.png')) +198 +199 # corner plot +200 fig = corner.corner( +201 flat_samples, labels=p_names, truths=log10_p_nom, range=p_bounds, +202 ) +203 plt.savefig(os.path.join(output_dir, f'mcmc_corner.png')) +204 +205 # flux prediction plot +206 plt.figure(figsize=(10,8)) +207 inds = np.random.randint(len(flat_samples), size=100) +208 for ind in inds: +209 p_sample = flat_samples[ind] +210 k_tmp = [10**i for i in p_sample[:-1]] +211 res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +212 y_tmp = res_tmp[1] +213 plt.plot(y_tmp, color='blue', alpha=0.1) +214 plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred') +215 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange') +216 plt.legend(fontsize=14) +217 plt.xlabel("time step") +218 plt.ylabel("net ion influx (M/s)"); +219 plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png')) +220 logger.info(f"plotted analysis") +221 +222 return None +
Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.
+ +This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. +The sampler is used to infer model parameters from observed data. The function also provides +preliminary data visualization of the MCMC results, including trace plots, corner plots, and +flux prediction plots.
+ +++ ++
+- Creates a new directory for the run, containing: +
++
- Copied model, data, and configuration files.
+- Log file with run details.
+- MCMC trace plot.
+- MCMC corner plot.
+- Flux prediction plot.
+- Flat samples and log likelihoods as CSV files.
+
+++
+- The configuration file should be structured appropriately with all necessary fields.
+- The function uses the
+emcee
library for MCMC sampling.- The function uses the
+corner
library for corner plots.- The function assumes the use of the RoadRunner model for simulations.
+- The function handles both standard and extended models based on the 'extended' flag in the configuration.
+
1import matplotlib.pyplot as plt + 2import numpy as np + 3import scipy as sp + 4import yaml + 5import ssme_function as ssme + 6import tellurium as te + 7import os + 8import shutil + 9import datetime + 10import logging + 11import time as time + 12from tqdm import tqdm + 13from scipy.optimize import basinhopping, dual_annealing, shgo, minimize + 14import inspect + 15import utility_functions as uf + 16 + 17 + 18 + 19def run_optimizer(config_fname): + 20 """ + 21 Run maximum likelihood estimation based on a configuration file. + 22 + 23 This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file. + 24 Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP" + 25 methods are supported. + 26 + 27 Args: + 28 config_fname (str): Path to the configuration YAML file. This file should contain all + 29 necessary settings and parameters for the run, including model file, + 30 data file, solver arguments, Bayesian inference settings, optimization + 31 settings, and random seed. + 32 + 33 Outputs: + 34 - Creates a new directory for the run, containing: + 35 - Copied model, data, and configuration files. + 36 - Log file with run details. + 37 - Flux trace plot. + 38 - Optimization plot. + 39 - Tuning results. + 40 - Optimization results. + 41 + 42 Notes: + 43 - The configuration file should be structured appropriately with all necessary fields (see example folder). + 44 - The function uses the `scipy` library for maximum likelihood estimation. + 45 - The function assumes the use of the RoadRunner model for simulations. + 46 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 47 """ + 48 + 49 ##### Setup ##### + 50 # Load the config.yaml file + 51 with open(config_fname, "r") as f: + 52 config = yaml.safe_load(f) + 53 + 54 # get run configuration settings + 55 run_name = config["run_name"] + 56 model_file = config['model_file'] + 57 data_file = config['data_file'] + 58 simulation_kwargs = config['solver_arguments'] + 59 inference_settings = config['bayesian_inference'] + 60 optimization_settings = config['optimization'] + 61 mle_method = optimization_settings['method'] + 62 seed = config['random_seed'] + 63 np.random.seed(seed) + 64 + 65 # get parameter names, values, and boundaries + 66 k_names = config['solver_arguments']['rate_constant_names'] + 67 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 68 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 69 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 70 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 71 p_bounds = list(zip(p_lb,p_ub)) + 72 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 73 k_nom = p_nom[:-1] + 74 sigma_nom = p_nom[-1] + 75 + 76 # get assay initial conditions + 77 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 78 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 79 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 80 + 81 # create new directory for runs + 82 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') + 83 output_fname = f'run_{run_name}_r{seed}' + 84 output_dir = output_fname + '_' + timestamp + 85 os.mkdir(output_dir) + 86 shutil.copy(model_file, output_dir) + 87 shutil.copy(data_file, output_dir) + 88 shutil.copy(config_fname, output_dir) + 89 + 90 # create logging file + 91 logger = logging.getLogger('log') + 92 logger.setLevel(logging.INFO) + 93 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) + 94 file_handler.setLevel(logging.INFO) + 95 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + 96 file_handler.setFormatter(log_format) + 97 logger.addHandler(file_handler) + 98 logger.info(f"output_dir: {output_dir}") + 99 logger.info(f"seed: {seed}") +100 +101 # load roadrunner model and simulate assay at nominal values +102 rr_model = te.loadSBMLModel(model_file) +103 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +104 y_nom = data_nom[1] # (time, output) +105 +106 # Load data, plot trace, and calculate log-likelihood reference +107 y_obs = np.loadtxt(data_file, delimiter=',') +108 plt.figure(figsize=(8,6)) +109 plt.title('Net ion influx (M/s) vs simulation step') +110 plt.plot(y_nom, label='y_nom') +111 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +112 plt.legend() +113 plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png')) +114 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +115 logger.info(f"log_like_nom: {log_like_nom}") +116 +117 ##### Run MLE optimization ##### +118 tuning = config['optimization']['tuning'] +119 extended = config['bayesian_inference']['extended'] +120 n_dim = len(p_names) +121 logger.info(f"n_dim: {n_dim}") +122 if extended: +123 logger.info(f"using extended nll") +124 optimization_func = uf.negative_log_likelihood_wrapper_extended +125 else: +126 optimization_func = uf.negative_log_likelihood_wrapper +127 +128 optimization_func_args = [rr_model, +129 y_obs, +130 initial_conditions, +131 initial_conditions_scale, +132 buffer_concentration_scale, +133 simulation_kwargs, +134 p_lb, +135 p_ub +136 ] +137 x = [] +138 y = [] +139 s = [] +140 nf = [] +141 t = [] +142 n_trials = optimization_settings['n_replicas'] +143 n_tuning_steps = config['optimization']['n_tuning_steps'] +144 +145 ### Fix this! too repetitive +146 # run hyperparameter tuning +147 if tuning: +148 logger.info(f"Starting optimization hyperparameter tuning") +149 +150 initial_guess = uf.get_p0(p_bounds, 1)[0] # use the same start point for each algorithm +151 logger.info(f"Initial guess: {initial_guess}") +152 +153 best_fun_value = np.inf +154 best_hyperparams = None +155 best_result = None +156 tuning_results = [] +157 +158 for i in tqdm(range(n_tuning_steps)): +159 logger.info(f"Starting optimization hyperparameter tuning run {i}") +160 t0 = time.time() +161 +162 +163 if mle_method == "differential_evolution": +164 # Define possible hyperparameter ranges +165 strategy_choices = ['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'currenttobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'currenttobest1bin', 'best2bin', 'rand2bin', 'rand1bin'] +166 popsize_range = (25, 50) +167 mutation_range = (0, 1.9) +168 recombination_range = (0, 1) +169 tol_range = (1e-5, 1e-3) +170 maxiter_range = (100, 1000) # Define a range for maxiter +171 +172 if i == 0: +173 # Use default hyperparameters for the first iteration +174 signature = inspect.signature(sp.optimize.differential_evolution) +175 parameters = signature.parameters +176 strategy = parameters["strategy"].default +177 popsize = parameters["popsize"].default +178 mutation = parameters["mutation"].default +179 recombination = parameters["recombination"].default +180 tol = parameters["tol"].default +181 maxiter = parameters["maxiter"].default # Get default maxiter +182 else: +183 # Sample random hyperparameters +184 strategy = np.random.choice(strategy_choices) +185 popsize = np.random.randint(*popsize_range) +186 mutation = np.random.uniform(*mutation_range) +187 recombination = np.random.uniform(*recombination_range) +188 tol = np.random.uniform(*tol_range) +189 maxiter = np.random.randint(*maxiter_range) # Sample maxiter +190 +191 # Run optimization with hyperparameters +192 tune_res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, +193 initial_conditions_scale, buffer_concentration_scale, +194 simulation_kwargs, p_lb, p_ub), +195 strategy=strategy, popsize=popsize, mutation=mutation, +196 recombination=recombination, tol=tol, maxiter=maxiter) +197 +198 +199 if isinstance(mutation, tuple): +200 mutation_to_store = list(mutation) +201 else: +202 mutation_to_store = mutation +203 # Store results +204 current_hyperparams = { +205 "strategy": str(strategy), +206 "popsize": popsize, +207 "mutation": mutation_to_store, +208 "recombination": recombination, +209 "tol": tol, +210 "maxiter": maxiter +211 } +212 +213 elif mle_method == "basinhopping": +214 # Define possible hyperparameter ranges for basinhopping +215 niter_range = (50, 500) # Number of basin hopping iterations +216 T_range = (0.5, 5.0) # Temperature for acceptance criterion +217 stepsize_range = (0.1, 2.0) # Step size for random displacement +218 interval_range = (10, 100) # Interval for updating the step size +219 niter_success_range = (5, 50) # Number of iterations for success check +220 +221 if i == 0: +222 # Use default hyperparameters for the first iteration +223 signature = inspect.signature(sp.optimize.basinhopping) +224 parameters = signature.parameters +225 niter = parameters["niter"].default +226 T = parameters["T"].default +227 stepsize = parameters["stepsize"].default +228 interval = parameters["interval"].default +229 niter_success = parameters["niter_success"].default +230 else: +231 # Sample random hyperparameters for basinhopping +232 niter = np.random.randint(*niter_range) +233 T = np.random.uniform(*T_range) +234 stepsize = np.random.uniform(*stepsize_range) +235 interval = np.random.randint(*interval_range) +236 niter_success = np.random.randint(*niter_success_range) +237 +238 # Run optimization with hyperparameters +239 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +240 tune_res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, niter=niter, T=T, stepsize=stepsize, interval=interval, niter_success=niter_success) +241 +242 # Store results +243 current_hyperparams = { +244 "niter": niter, +245 "T": T, +246 "stepsize": stepsize, +247 "interval": interval, +248 "niter_success": niter_success +249 } +250 +251 elif mle_method == "dual_annealing": +252 # Define possible hyperparameter ranges +253 initial_temp_range = (0.01, 50000) +254 restart_temp_ratio_range = (0, 1) +255 visit_range = (1, 3) +256 accept_range = (-1e4, -5) +257 maxiter_range = (100, 1000) +258 maxfun_range = (1000, 1e7) +259 no_local_search_choices = [True, False] +260 +261 if i == 0: +262 # Use default hyperparameters for the first iteration +263 signature = inspect.signature(sp.optimize.dual_annealing) +264 parameters = signature.parameters +265 initial_temp = parameters["initial_temp"].default +266 restart_temp_ratio = parameters["restart_temp_ratio"].default +267 visit = parameters["visit"].default +268 accept = parameters["accept"].default +269 maxiter = parameters["maxiter"].default +270 maxfun = parameters["maxfun"].default +271 no_local_search = parameters["no_local_search"].default +272 else: +273 # Sample random hyperparameters +274 initial_temp = np.random.uniform(*initial_temp_range) +275 restart_temp_ratio = np.random.uniform(*restart_temp_ratio_range) +276 visit = np.random.uniform(*visit_range) +277 accept = np.random.uniform(*accept_range) +278 maxiter = np.random.randint(*maxiter_range) +279 maxfun = np.random.randint(*maxfun_range) +280 no_local_search = np.random.choice(no_local_search_choices) +281 +282 # Run optimization with hyperparameters +283 tune_res = sp.optimize.dual_annealing(optimization_func, bounds=p_bounds, +284 args=(rr_model, y_obs, initial_conditions, +285 initial_conditions_scale, buffer_concentration_scale, +286 simulation_kwargs, p_lb, p_ub), +287 initial_temp=initial_temp, restart_temp_ratio=restart_temp_ratio, +288 visit=visit, accept=accept, maxiter=maxiter, maxfun=maxfun, +289 no_local_search=no_local_search) +290 # Store results +291 current_hyperparams = { +292 "initial_temp": initial_temp, +293 "restart_temp_ratio": restart_temp_ratio, +294 "visit": visit, +295 "accept": accept, +296 "maxiter": maxiter, +297 "maxfun": maxfun, +298 "no_local_search": bool(no_local_search) +299 } +300 +301 elif mle_method == "shgo": +302 # Define possible hyperparameter ranges +303 n_range = (25, 500) +304 iters_range = (1, 10) +305 sampling_methods = ["simplicial", "halton", "sobol"] +306 maxiter_range = (100, 1000) +307 maxev_range = (100, 10000) +308 maxtime_range = (0.1, 10) +309 minimize_every_iter_choices = [True, False] +310 local_iter_range = (10, 100) +311 infty_constraints_choices = [True, False] +312 +313 if i == 0: +314 # Use default hyperparameters for the first iteration +315 signature = inspect.signature(sp.optimize.shgo) +316 parameters = signature.parameters +317 n = parameters["n"].default +318 iters = parameters["iters"].default +319 sampling_method = parameters["sampling_method"].default +320 +321 options_defaults = parameters["options"].default +322 if options_defaults is None: +323 options_defaults = {} +324 +325 maxiter = options_defaults.get("maxiter", maxiter_range[0]) +326 maxev = options_defaults.get("maxev", maxev_range[0]) +327 maxtime = options_defaults.get("maxtime", maxtime_range[0]) +328 minimize_every_iter = options_defaults.get("minimize_every_iter", True) +329 local_iter = options_defaults.get("local_iter", local_iter_range[0]) +330 infty_constraints = options_defaults.get("infty_constraints", True) +331 else: +332 # Sample random hyperparameters +333 n = np.random.randint(*n_range) +334 iters = np.random.randint(*iters_range) +335 sampling_method = np.random.choice(sampling_methods) +336 maxiter = np.random.randint(*maxiter_range) +337 maxev = np.random.randint(*maxev_range) +338 maxtime = np.random.uniform(*maxtime_range) +339 minimize_every_iter = np.random.choice(minimize_every_iter_choices) +340 local_iter = np.random.randint(*local_iter_range) +341 infty_constraints = np.random.choice(infty_constraints_choices) +342 +343 # Run optimization with hyperparameters +344 tune_res = sp.optimize.shgo(optimization_func, bounds=p_bounds, +345 args=(rr_model, y_obs, initial_conditions, +346 initial_conditions_scale, buffer_concentration_scale, +347 simulation_kwargs, p_lb, p_ub), +348 n=n, iters=iters, sampling_method=sampling_method, +349 options={"maxiter": maxiter, "maxev": maxev, "maxtime": maxtime, +350 "minimize_every_iter": minimize_every_iter, "local_iter": local_iter, +351 "infty_constraints": infty_constraints}) +352 +353 # Store results +354 current_hyperparams = { +355 "n": n, +356 "iters": iters, +357 "sampling_method": sampling_method, +358 "maxiter": maxiter, +359 "maxev": maxev, +360 "maxtime": maxtime, +361 "minimize_every_iter": bool(minimize_every_iter), +362 "local_iter": local_iter, +363 "infty_constraints": bool(infty_constraints) +364 } +365 +366 elif mle_method == "Nelder-Mead": +367 # Define possible hyperparameter ranges +368 maxiter_range = (1000, 10000) +369 maxfev_range = (1000, 10000) +370 xatol_range = (1e-8, 1e-4) +371 fatol_range = (1e-8, 1e-4) +372 adaptive_choices = [True, False] +373 +374 if i == 0: # Use default hyperparameters for the first iteration +375 # Hard-coded default values based on scipy documentation / common values +376 maxiter = len(p_bounds) * 200 # N * 200, where N is the number of variables +377 maxfev = len(p_bounds) * 200 # Same as maxiter by default +378 xatol = 0.0001 +379 fatol = 0.0001 +380 adaptive = True +381 else: +382 # Sample random hyperparameters +383 maxiter = np.random.randint(*maxiter_range) +384 maxfev = np.random.randint(*maxfev_range) +385 xatol = np.random.uniform(*xatol_range) +386 fatol = np.random.uniform(*fatol_range) +387 adaptive = np.random.choice(adaptive_choices) +388 +389 # Run optimization with hyperparameters +390 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +391 initial_conditions_scale, buffer_concentration_scale, +392 simulation_kwargs, p_lb, p_ub), +393 method='Nelder-Mead', +394 options={"maxiter": maxiter, "maxfev": maxfev, "xatol": xatol, "fatol": fatol, "adaptive": adaptive}) +395 +396 +397 # Store results +398 current_hyperparams = { +399 "maxiter": maxiter, +400 "maxfev": maxfev, +401 "xatol": xatol, +402 "fatol": fatol, +403 "adaptive": bool(adaptive) +404 } +405 +406 elif mle_method == "Powell": +407 # Define possible hyperparameter ranges +408 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +409 maxfev_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +410 xtol_range = (1e-8, 1e-4) +411 ftol_range = (1e-8, 1e-4) +412 +413 if i == 0: # Use default hyperparameters for the first iteration +414 # Hard-coded default values based on scipy documentation / common values +415 maxiter = len(p_bounds) * 1000 # N * 1000, where N is the number of variables +416 maxfev = len(p_bounds) * 1000 # Same as maxiter by default +417 xtol = 0.0001 # Placeholder value based on common settings +418 ftol = 0.0001 # Placeholder value based on common settings +419 else: +420 # Sample random hyperparameters +421 maxiter = np.random.randint(*maxiter_range) +422 maxfev = np.random.randint(*maxfev_range) +423 xtol = np.random.uniform(*xtol_range) +424 ftol = np.random.uniform(*ftol_range) +425 +426 # Run optimization with hyperparameters +427 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +428 initial_conditions_scale, buffer_concentration_scale, +429 simulation_kwargs, p_lb, p_ub), +430 method='Powell', +431 options={"maxiter": maxiter, "maxfev": maxfev, "xtol": xtol, "ftol": ftol}) +432 +433 # Store results +434 current_hyperparams = { +435 "maxiter": maxiter, +436 "maxfev": maxfev, +437 "xtol": xtol, +438 "ftol": ftol +439 } +440 +441 +442 elif mle_method == "CG": +443 # Define possible hyperparameter ranges +444 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +445 gtol_range = (1e-8, 1e-4) +446 norm_choices = [np.inf, -np.inf, 1, 2] +447 eps_range = (1e-8, 1e-4) +448 +449 if i == 0: # Use default hyperparameters for the first iteration +450 # Hard-coded default values +451 maxiter = len(p_bounds) * 1000 +452 gtol = 0.0001 +453 norm = np.inf +454 eps = 0.0001 +455 else: +456 # Sample random hyperparameters +457 maxiter = np.random.randint(*maxiter_range) +458 gtol = np.random.uniform(*gtol_range) +459 norm = np.random.choice(norm_choices) +460 eps = np.random.uniform(*eps_range) +461 +462 # Run optimization with hyperparameters +463 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +464 initial_conditions_scale, buffer_concentration_scale, +465 simulation_kwargs, p_lb, p_ub), +466 method='CG', +467 options={"maxiter": maxiter, "gtol": gtol, "norm": norm, "eps": eps}) +468 +469 if norm == np.inf: +470 norm_to_save = float('inf') +471 elif norm == -np.inf: +472 norm_to_save = float('-inf') +473 else: +474 norm_to_save = float(norm) +475 +476 # Store results +477 current_hyperparams = { +478 "maxiter": maxiter, +479 "gtol": gtol, +480 "norm": norm_to_save, +481 "eps": eps +482 } +483 +484 elif mle_method == "L-BFGS-B": +485 # Define possible hyperparameter ranges +486 maxcor_range = (5, 20) +487 maxfun_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +488 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +489 ftol_range = (1e-10, 1e-4) +490 gtol_range = (1e-10, 1e-4) +491 eps_range = (1e-10, 1e-4) +492 maxls_range = (10, 100) +493 +494 if i == 0: # Use default hyperparameters for the first iteration +495 # Hard-coded default values +496 maxcor = 10 +497 maxfun = len(p_bounds) * 1000 +498 maxiter = len(p_bounds) * 1000 +499 ftol = 2.2e-9 # ftol = factr * numpy.finfo(float).eps where factr = 1e7. +500 gtol = 1e-5 +501 eps = 1e-8 +502 maxls = 20 +503 else: +504 # Sample random hyperparameters +505 maxcor = np.random.randint(*maxcor_range) +506 maxfun = np.random.randint(*maxfun_range) +507 maxiter = np.random.randint(*maxiter_range) +508 ftol = np.random.uniform(*ftol_range) +509 gtol = np.random.uniform(*gtol_range) +510 eps = np.random.uniform(*eps_range) +511 maxls = np.random.randint(*maxls_range) +512 +513 # Run optimization with hyperparameters +514 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +515 initial_conditions_scale, buffer_concentration_scale, +516 simulation_kwargs, p_lb, p_ub), +517 method='L-BFGS-B', +518 bounds=p_bounds, # Necessary for L-BFGS-B +519 options={"maxcor": maxcor, "maxfun": maxfun, "maxiter": maxiter, "ftol": ftol, +520 "gtol": gtol, "eps": eps, "maxls": maxls}) +521 +522 # Store results +523 current_hyperparams = { +524 "maxcor": maxcor, +525 "maxfun": maxfun, +526 "maxiter": maxiter, +527 "ftol": ftol, +528 "gtol": gtol, +529 "eps": eps, +530 "maxls": maxls +531 } +532 +533 elif mle_method == "COBYLA": +534 # Define possible hyperparameter ranges +535 rhobeg_range = (0.01, 1.0) +536 tol_range = (1e-10, 1e-4) +537 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +538 catol_range = (1e-10, 1e-4) +539 +540 if i == 0: # Use default hyperparameters for the first iteration +541 # Hard-coded default values +542 rhobeg = 0.1 +543 tol = 1e-6 +544 maxiter = len(p_bounds) * 1000 +545 catol = 1e-6 +546 else: +547 # Sample random hyperparameters +548 rhobeg = np.random.uniform(*rhobeg_range) +549 tol = np.random.uniform(*tol_range) +550 maxiter = np.random.randint(*maxiter_range) +551 catol = np.random.uniform(*catol_range) +552 +553 # Run optimization with hyperparameters +554 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +555 initial_conditions_scale, buffer_concentration_scale, +556 simulation_kwargs, p_lb, p_ub), +557 method='COBYLA', +558 options={"rhobeg": rhobeg, "tol": tol, "maxiter": maxiter, "catol": catol, "disp": False}) # disp set to True for convergence messages +559 +560 # Store results +561 current_hyperparams = { +562 "rhobeg": rhobeg, +563 "tol": tol, +564 "maxiter": maxiter, +565 "catol": catol +566 } +567 +568 +569 elif mle_method == "direct": +570 # Define possible hyperparameter ranges +571 eps_range = (1e-6, 1e-2) +572 maxfun_range = (10000, 100000) +573 maxiter_range = (500, 2000) +574 locally_biased_choices = [True, False] +575 f_min_rtol_range = (1e-6, 1e-2) +576 vol_tol_range = (1e-20, 1e-10) +577 len_tol_range = (1e-8, 1e-4) +578 +579 if i == 0: # Use default hyperparameters for the first iteration +580 # Get the default values from the sp.optimize.direct function +581 signature = inspect.signature(sp.optimize.direct) +582 parameters = signature.parameters +583 +584 default_eps = parameters["eps"].default +585 default_maxfun = parameters["maxfun"].default +586 default_maxiter = parameters["maxiter"].default +587 default_locally_biased = parameters["locally_biased"].default +588 default_f_min_rtol = parameters["f_min_rtol"].default +589 default_vol_tol = parameters["vol_tol"].default +590 default_len_tol = parameters["len_tol"].default +591 eps = default_eps +592 maxfun = default_maxfun +593 maxiter = default_maxiter +594 locally_biased = default_locally_biased +595 f_min_rtol = default_f_min_rtol +596 vol_tol = default_vol_tol +597 len_tol = default_len_tol +598 else: +599 # Sample random hyperparameters +600 eps = np.random.uniform(*eps_range) +601 maxfun = np.random.randint(*maxfun_range) +602 maxiter = np.random.randint(*maxiter_range) +603 locally_biased = np.random.choice(locally_biased_choices) +604 f_min_rtol = np.random.uniform(*f_min_rtol_range) +605 vol_tol = np.random.uniform(*vol_tol_range) +606 len_tol = np.random.uniform(*len_tol_range) +607 +608 # Run optimization with hyperparameters +609 tune_res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, +610 initial_conditions_scale, buffer_concentration_scale, +611 simulation_kwargs, p_lb, p_ub), +612 eps=eps, maxfun=maxfun, maxiter=maxiter, locally_biased=bool(locally_biased), +613 f_min_rtol=f_min_rtol, vol_tol=vol_tol, len_tol=len_tol) +614 +615 # Store results +616 current_hyperparams = { +617 "eps": eps, +618 "maxfun": maxfun, +619 "maxiter": maxiter, +620 "locally_biased": bool(locally_biased), +621 "f_min_rtol": f_min_rtol, +622 "vol_tol": vol_tol, +623 "len_tol": len_tol +624 } +625 +626 elif mle_method == "SLSQP": +627 # Define possible hyperparameter ranges +628 ftol_range = (1e-10, 1e-4) +629 eps_range = (1e-10, 1e-4) +630 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +631 +632 if i == 0: # Use default hyperparameters for the first iteration +633 # Hard-coded default values +634 ftol = 1e-6 +635 eps = 1e-8 +636 maxiter = len(p_bounds) * 1000 +637 +638 else: +639 # Sample random hyperparameters +640 ftol = np.random.uniform(*ftol_range) +641 eps = np.random.uniform(*eps_range) +642 maxiter = np.random.randint(*maxiter_range) +643 +644 # Run optimization with hyperparameters +645 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +646 initial_conditions_scale, buffer_concentration_scale, +647 simulation_kwargs, p_lb, p_ub), +648 method='SLSQP', +649 bounds=p_bounds, +650 options={"ftol": ftol, "eps": eps, "maxiter": maxiter, "disp": False,}) +651 +652 # Store results +653 current_hyperparams = { +654 "ftol": ftol, +655 "eps": eps, +656 "maxiter": maxiter, +657 } +658 +659 else: +660 print('error: invalid MLE algorithm.') +661 assert(1==0) +662 +663 tuning_results.append({ +664 "hyperparameters": current_hyperparams, +665 "function_value": float(tune_res.fun), # convert numpy float to python float if needed +666 "x_values": [i.item() if isinstance(i, np.generic) else i for i in tune_res.x], # convert numpy array to python list +667 "success": bool(tune_res.success), +668 "func_calls": int(tune_res.nfev), # convert numpy int to python int if needed +669 "duration": time.time() - t0 +670 }) +671 +672 if tune_res.fun < best_fun_value: +673 best_fun_value = tune_res.fun +674 best_hyperparams = current_hyperparams +675 best_result = tune_res +676 +677 logger.info(f"Hyperparameters: {current_hyperparams}") +678 logger.info(f"Func value: {tune_res.fun}") +679 logger.info(f"Time (s): {time.time() - t0}\n") +680 +681 +682 +683 logger.info(f"Best function value: {best_fun_value}") +684 logger.info(f"Best hyperparameters: {best_hyperparams}\n") +685 +686 with open(os.path.join(output_dir, f'tuning_results.yaml'), 'w') as file: +687 yaml.dump(tuning_results, file, sort_keys=False) +688 +689 +690 # start MLE optimization runs +691 for i in tqdm(range(n_trials)): +692 logger.info(f"starting optimization run {i}") +693 t0 = time.time() +694 initial_guess = uf.get_p0(p_bounds,1)[0] +695 logger.info(f"initial guess: {initial_guess}") +696 +697 if mle_method == "differential_evolution": +698 if tuning: +699 res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +700 else: +701 res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args) +702 elif mle_method == "basinhopping": +703 if tuning: +704 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +705 res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, **best_hyperparams) +706 else: +707 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +708 res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs) +709 elif mle_method == "dual_annealing": +710 if tuning: +711 res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +712 else: +713 res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args) +714 elif mle_method == "shgo": +715 if tuning: +716 res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +717 else: +718 res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args) +719 elif mle_method == "Nelder-Mead": +720 if tuning: +721 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead", options=best_hyperparams) +722 else: +723 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead") +724 elif mle_method == "Powell": +725 if tuning: +726 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Powell", options=best_hyperparams) +727 else: +728 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Powell") +729 elif mle_method == "CG": +730 if tuning: +731 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="CG", options=best_hyperparams) +732 else: +733 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="CG", ) +734 elif mle_method == "L-BFGS-B": +735 if tuning: +736 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="L-BFGS-B", options=best_hyperparams) +737 else: +738 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="L-BFGS-B") +739 elif mle_method == "COBYLA": +740 if tuning: +741 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="COBYLA", options=best_hyperparams) +742 else: +743 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="COBYLA") +744 elif mle_method == "direct": +745 if tuning: +746 res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), **best_hyperparams) +747 else: +748 res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub)) +749 elif mle_method == "SLSQP": +750 if tuning: +751 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="SLSQP", options=best_hyperparams) +752 else: +753 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="SLSQP") +754 else: +755 print('invalid MLE method.') +756 assert(1==0) +757 +758 +759 dt = time.time() - t0 +760 logger.info(f"method: {mle_method}") +761 logger.info(f"x: {res.x}") +762 logger.info(f"func: {res.fun}") +763 logger.info(f"success: {res.success}") +764 logger.info(f"nfev: {res.nfev}") +765 logger.info(f"time (s): {dt}\n") +766 x.append(res.x) +767 y.append(res.fun) +768 s.append(res.success) +769 nf.append(res.nfev) +770 t.append(dt) +771 +772 ##### Save and plot the data ##### +773 results_list=[x,y,s,nf,t] +774 +775 y_data_list = [y,nf,t] +776 fig, axes = plt.subplots(len(y_data_list), 1, figsize=(10, 12), sharex=True) +777 trial_idx = [int(i+1) for i in range(n_trials)] +778 y_labels = ['log-likelihood', 'n likelihood evaluations', 'runtime (s)'] +779 plt.suptitle('Maximum likelihood optimization runs') +780 +781 for i in range(len(y_data_list)): +782 ax = axes[i] +783 ax.set_ylabel(y_labels[i]) +784 ax.yaxis.set_label_coords(-0.1, 0.5) +785 if y_labels[i] == 'log-likelihood': +786 ax.axhline(log_like_nom, color='red') +787 ax.plot(trial_idx, [-1*j for j in y_data_list[i]], '-o', ) +788 else: +789 ax.plot(trial_idx, y_data_list[i], '-o',) +790 axes[-1].set_xlabel("replica number") +791 plt.tight_layout() +792 plt.savefig(os.path.join(output_dir, f'optimization_plot.png')) +793 +794 # Convert each NumPy array in results_list to a standard Python list +795 results_dict = { +796 'run': output_fname, +797 'method': mle_method, +798 'x': [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in results_list[0]], +799 'fun': [float(i) for i in results_list[1]], +800 'success': [bool(i) for i in results_list[2]], +801 'nfev': results_list[3], +802 't': results_list[4] +803 } +804 +805 with open(os.path.join(output_dir, f'optimization_results.yaml'), 'w') as file: +806 yaml.dump(results_dict, file, sort_keys=False) +807 +808 +809if __name__ == '__main__': +810 +811 ##### Adjust this if needed ##### +812 config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml" +813 run_optimizer(config_fname) +
20def run_optimizer(config_fname): + 21 """ + 22 Run maximum likelihood estimation based on a configuration file. + 23 + 24 This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file. + 25 Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP" + 26 methods are supported. + 27 + 28 Args: + 29 config_fname (str): Path to the configuration YAML file. This file should contain all + 30 necessary settings and parameters for the run, including model file, + 31 data file, solver arguments, Bayesian inference settings, optimization + 32 settings, and random seed. + 33 + 34 Outputs: + 35 - Creates a new directory for the run, containing: + 36 - Copied model, data, and configuration files. + 37 - Log file with run details. + 38 - Flux trace plot. + 39 - Optimization plot. + 40 - Tuning results. + 41 - Optimization results. + 42 + 43 Notes: + 44 - The configuration file should be structured appropriately with all necessary fields (see example folder). + 45 - The function uses the `scipy` library for maximum likelihood estimation. + 46 - The function assumes the use of the RoadRunner model for simulations. + 47 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 48 """ + 49 + 50 ##### Setup ##### + 51 # Load the config.yaml file + 52 with open(config_fname, "r") as f: + 53 config = yaml.safe_load(f) + 54 + 55 # get run configuration settings + 56 run_name = config["run_name"] + 57 model_file = config['model_file'] + 58 data_file = config['data_file'] + 59 simulation_kwargs = config['solver_arguments'] + 60 inference_settings = config['bayesian_inference'] + 61 optimization_settings = config['optimization'] + 62 mle_method = optimization_settings['method'] + 63 seed = config['random_seed'] + 64 np.random.seed(seed) + 65 + 66 # get parameter names, values, and boundaries + 67 k_names = config['solver_arguments']['rate_constant_names'] + 68 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 69 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 70 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 71 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 72 p_bounds = list(zip(p_lb,p_ub)) + 73 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 74 k_nom = p_nom[:-1] + 75 sigma_nom = p_nom[-1] + 76 + 77 # get assay initial conditions + 78 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 79 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 80 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 81 + 82 # create new directory for runs + 83 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') + 84 output_fname = f'run_{run_name}_r{seed}' + 85 output_dir = output_fname + '_' + timestamp + 86 os.mkdir(output_dir) + 87 shutil.copy(model_file, output_dir) + 88 shutil.copy(data_file, output_dir) + 89 shutil.copy(config_fname, output_dir) + 90 + 91 # create logging file + 92 logger = logging.getLogger('log') + 93 logger.setLevel(logging.INFO) + 94 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) + 95 file_handler.setLevel(logging.INFO) + 96 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + 97 file_handler.setFormatter(log_format) + 98 logger.addHandler(file_handler) + 99 logger.info(f"output_dir: {output_dir}") +100 logger.info(f"seed: {seed}") +101 +102 # load roadrunner model and simulate assay at nominal values +103 rr_model = te.loadSBMLModel(model_file) +104 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +105 y_nom = data_nom[1] # (time, output) +106 +107 # Load data, plot trace, and calculate log-likelihood reference +108 y_obs = np.loadtxt(data_file, delimiter=',') +109 plt.figure(figsize=(8,6)) +110 plt.title('Net ion influx (M/s) vs simulation step') +111 plt.plot(y_nom, label='y_nom') +112 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +113 plt.legend() +114 plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png')) +115 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +116 logger.info(f"log_like_nom: {log_like_nom}") +117 +118 ##### Run MLE optimization ##### +119 tuning = config['optimization']['tuning'] +120 extended = config['bayesian_inference']['extended'] +121 n_dim = len(p_names) +122 logger.info(f"n_dim: {n_dim}") +123 if extended: +124 logger.info(f"using extended nll") +125 optimization_func = uf.negative_log_likelihood_wrapper_extended +126 else: +127 optimization_func = uf.negative_log_likelihood_wrapper +128 +129 optimization_func_args = [rr_model, +130 y_obs, +131 initial_conditions, +132 initial_conditions_scale, +133 buffer_concentration_scale, +134 simulation_kwargs, +135 p_lb, +136 p_ub +137 ] +138 x = [] +139 y = [] +140 s = [] +141 nf = [] +142 t = [] +143 n_trials = optimization_settings['n_replicas'] +144 n_tuning_steps = config['optimization']['n_tuning_steps'] +145 +146 ### Fix this! too repetitive +147 # run hyperparameter tuning +148 if tuning: +149 logger.info(f"Starting optimization hyperparameter tuning") +150 +151 initial_guess = uf.get_p0(p_bounds, 1)[0] # use the same start point for each algorithm +152 logger.info(f"Initial guess: {initial_guess}") +153 +154 best_fun_value = np.inf +155 best_hyperparams = None +156 best_result = None +157 tuning_results = [] +158 +159 for i in tqdm(range(n_tuning_steps)): +160 logger.info(f"Starting optimization hyperparameter tuning run {i}") +161 t0 = time.time() +162 +163 +164 if mle_method == "differential_evolution": +165 # Define possible hyperparameter ranges +166 strategy_choices = ['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'currenttobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'currenttobest1bin', 'best2bin', 'rand2bin', 'rand1bin'] +167 popsize_range = (25, 50) +168 mutation_range = (0, 1.9) +169 recombination_range = (0, 1) +170 tol_range = (1e-5, 1e-3) +171 maxiter_range = (100, 1000) # Define a range for maxiter +172 +173 if i == 0: +174 # Use default hyperparameters for the first iteration +175 signature = inspect.signature(sp.optimize.differential_evolution) +176 parameters = signature.parameters +177 strategy = parameters["strategy"].default +178 popsize = parameters["popsize"].default +179 mutation = parameters["mutation"].default +180 recombination = parameters["recombination"].default +181 tol = parameters["tol"].default +182 maxiter = parameters["maxiter"].default # Get default maxiter +183 else: +184 # Sample random hyperparameters +185 strategy = np.random.choice(strategy_choices) +186 popsize = np.random.randint(*popsize_range) +187 mutation = np.random.uniform(*mutation_range) +188 recombination = np.random.uniform(*recombination_range) +189 tol = np.random.uniform(*tol_range) +190 maxiter = np.random.randint(*maxiter_range) # Sample maxiter +191 +192 # Run optimization with hyperparameters +193 tune_res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, +194 initial_conditions_scale, buffer_concentration_scale, +195 simulation_kwargs, p_lb, p_ub), +196 strategy=strategy, popsize=popsize, mutation=mutation, +197 recombination=recombination, tol=tol, maxiter=maxiter) +198 +199 +200 if isinstance(mutation, tuple): +201 mutation_to_store = list(mutation) +202 else: +203 mutation_to_store = mutation +204 # Store results +205 current_hyperparams = { +206 "strategy": str(strategy), +207 "popsize": popsize, +208 "mutation": mutation_to_store, +209 "recombination": recombination, +210 "tol": tol, +211 "maxiter": maxiter +212 } +213 +214 elif mle_method == "basinhopping": +215 # Define possible hyperparameter ranges for basinhopping +216 niter_range = (50, 500) # Number of basin hopping iterations +217 T_range = (0.5, 5.0) # Temperature for acceptance criterion +218 stepsize_range = (0.1, 2.0) # Step size for random displacement +219 interval_range = (10, 100) # Interval for updating the step size +220 niter_success_range = (5, 50) # Number of iterations for success check +221 +222 if i == 0: +223 # Use default hyperparameters for the first iteration +224 signature = inspect.signature(sp.optimize.basinhopping) +225 parameters = signature.parameters +226 niter = parameters["niter"].default +227 T = parameters["T"].default +228 stepsize = parameters["stepsize"].default +229 interval = parameters["interval"].default +230 niter_success = parameters["niter_success"].default +231 else: +232 # Sample random hyperparameters for basinhopping +233 niter = np.random.randint(*niter_range) +234 T = np.random.uniform(*T_range) +235 stepsize = np.random.uniform(*stepsize_range) +236 interval = np.random.randint(*interval_range) +237 niter_success = np.random.randint(*niter_success_range) +238 +239 # Run optimization with hyperparameters +240 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +241 tune_res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, niter=niter, T=T, stepsize=stepsize, interval=interval, niter_success=niter_success) +242 +243 # Store results +244 current_hyperparams = { +245 "niter": niter, +246 "T": T, +247 "stepsize": stepsize, +248 "interval": interval, +249 "niter_success": niter_success +250 } +251 +252 elif mle_method == "dual_annealing": +253 # Define possible hyperparameter ranges +254 initial_temp_range = (0.01, 50000) +255 restart_temp_ratio_range = (0, 1) +256 visit_range = (1, 3) +257 accept_range = (-1e4, -5) +258 maxiter_range = (100, 1000) +259 maxfun_range = (1000, 1e7) +260 no_local_search_choices = [True, False] +261 +262 if i == 0: +263 # Use default hyperparameters for the first iteration +264 signature = inspect.signature(sp.optimize.dual_annealing) +265 parameters = signature.parameters +266 initial_temp = parameters["initial_temp"].default +267 restart_temp_ratio = parameters["restart_temp_ratio"].default +268 visit = parameters["visit"].default +269 accept = parameters["accept"].default +270 maxiter = parameters["maxiter"].default +271 maxfun = parameters["maxfun"].default +272 no_local_search = parameters["no_local_search"].default +273 else: +274 # Sample random hyperparameters +275 initial_temp = np.random.uniform(*initial_temp_range) +276 restart_temp_ratio = np.random.uniform(*restart_temp_ratio_range) +277 visit = np.random.uniform(*visit_range) +278 accept = np.random.uniform(*accept_range) +279 maxiter = np.random.randint(*maxiter_range) +280 maxfun = np.random.randint(*maxfun_range) +281 no_local_search = np.random.choice(no_local_search_choices) +282 +283 # Run optimization with hyperparameters +284 tune_res = sp.optimize.dual_annealing(optimization_func, bounds=p_bounds, +285 args=(rr_model, y_obs, initial_conditions, +286 initial_conditions_scale, buffer_concentration_scale, +287 simulation_kwargs, p_lb, p_ub), +288 initial_temp=initial_temp, restart_temp_ratio=restart_temp_ratio, +289 visit=visit, accept=accept, maxiter=maxiter, maxfun=maxfun, +290 no_local_search=no_local_search) +291 # Store results +292 current_hyperparams = { +293 "initial_temp": initial_temp, +294 "restart_temp_ratio": restart_temp_ratio, +295 "visit": visit, +296 "accept": accept, +297 "maxiter": maxiter, +298 "maxfun": maxfun, +299 "no_local_search": bool(no_local_search) +300 } +301 +302 elif mle_method == "shgo": +303 # Define possible hyperparameter ranges +304 n_range = (25, 500) +305 iters_range = (1, 10) +306 sampling_methods = ["simplicial", "halton", "sobol"] +307 maxiter_range = (100, 1000) +308 maxev_range = (100, 10000) +309 maxtime_range = (0.1, 10) +310 minimize_every_iter_choices = [True, False] +311 local_iter_range = (10, 100) +312 infty_constraints_choices = [True, False] +313 +314 if i == 0: +315 # Use default hyperparameters for the first iteration +316 signature = inspect.signature(sp.optimize.shgo) +317 parameters = signature.parameters +318 n = parameters["n"].default +319 iters = parameters["iters"].default +320 sampling_method = parameters["sampling_method"].default +321 +322 options_defaults = parameters["options"].default +323 if options_defaults is None: +324 options_defaults = {} +325 +326 maxiter = options_defaults.get("maxiter", maxiter_range[0]) +327 maxev = options_defaults.get("maxev", maxev_range[0]) +328 maxtime = options_defaults.get("maxtime", maxtime_range[0]) +329 minimize_every_iter = options_defaults.get("minimize_every_iter", True) +330 local_iter = options_defaults.get("local_iter", local_iter_range[0]) +331 infty_constraints = options_defaults.get("infty_constraints", True) +332 else: +333 # Sample random hyperparameters +334 n = np.random.randint(*n_range) +335 iters = np.random.randint(*iters_range) +336 sampling_method = np.random.choice(sampling_methods) +337 maxiter = np.random.randint(*maxiter_range) +338 maxev = np.random.randint(*maxev_range) +339 maxtime = np.random.uniform(*maxtime_range) +340 minimize_every_iter = np.random.choice(minimize_every_iter_choices) +341 local_iter = np.random.randint(*local_iter_range) +342 infty_constraints = np.random.choice(infty_constraints_choices) +343 +344 # Run optimization with hyperparameters +345 tune_res = sp.optimize.shgo(optimization_func, bounds=p_bounds, +346 args=(rr_model, y_obs, initial_conditions, +347 initial_conditions_scale, buffer_concentration_scale, +348 simulation_kwargs, p_lb, p_ub), +349 n=n, iters=iters, sampling_method=sampling_method, +350 options={"maxiter": maxiter, "maxev": maxev, "maxtime": maxtime, +351 "minimize_every_iter": minimize_every_iter, "local_iter": local_iter, +352 "infty_constraints": infty_constraints}) +353 +354 # Store results +355 current_hyperparams = { +356 "n": n, +357 "iters": iters, +358 "sampling_method": sampling_method, +359 "maxiter": maxiter, +360 "maxev": maxev, +361 "maxtime": maxtime, +362 "minimize_every_iter": bool(minimize_every_iter), +363 "local_iter": local_iter, +364 "infty_constraints": bool(infty_constraints) +365 } +366 +367 elif mle_method == "Nelder-Mead": +368 # Define possible hyperparameter ranges +369 maxiter_range = (1000, 10000) +370 maxfev_range = (1000, 10000) +371 xatol_range = (1e-8, 1e-4) +372 fatol_range = (1e-8, 1e-4) +373 adaptive_choices = [True, False] +374 +375 if i == 0: # Use default hyperparameters for the first iteration +376 # Hard-coded default values based on scipy documentation / common values +377 maxiter = len(p_bounds) * 200 # N * 200, where N is the number of variables +378 maxfev = len(p_bounds) * 200 # Same as maxiter by default +379 xatol = 0.0001 +380 fatol = 0.0001 +381 adaptive = True +382 else: +383 # Sample random hyperparameters +384 maxiter = np.random.randint(*maxiter_range) +385 maxfev = np.random.randint(*maxfev_range) +386 xatol = np.random.uniform(*xatol_range) +387 fatol = np.random.uniform(*fatol_range) +388 adaptive = np.random.choice(adaptive_choices) +389 +390 # Run optimization with hyperparameters +391 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +392 initial_conditions_scale, buffer_concentration_scale, +393 simulation_kwargs, p_lb, p_ub), +394 method='Nelder-Mead', +395 options={"maxiter": maxiter, "maxfev": maxfev, "xatol": xatol, "fatol": fatol, "adaptive": adaptive}) +396 +397 +398 # Store results +399 current_hyperparams = { +400 "maxiter": maxiter, +401 "maxfev": maxfev, +402 "xatol": xatol, +403 "fatol": fatol, +404 "adaptive": bool(adaptive) +405 } +406 +407 elif mle_method == "Powell": +408 # Define possible hyperparameter ranges +409 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +410 maxfev_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +411 xtol_range = (1e-8, 1e-4) +412 ftol_range = (1e-8, 1e-4) +413 +414 if i == 0: # Use default hyperparameters for the first iteration +415 # Hard-coded default values based on scipy documentation / common values +416 maxiter = len(p_bounds) * 1000 # N * 1000, where N is the number of variables +417 maxfev = len(p_bounds) * 1000 # Same as maxiter by default +418 xtol = 0.0001 # Placeholder value based on common settings +419 ftol = 0.0001 # Placeholder value based on common settings +420 else: +421 # Sample random hyperparameters +422 maxiter = np.random.randint(*maxiter_range) +423 maxfev = np.random.randint(*maxfev_range) +424 xtol = np.random.uniform(*xtol_range) +425 ftol = np.random.uniform(*ftol_range) +426 +427 # Run optimization with hyperparameters +428 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +429 initial_conditions_scale, buffer_concentration_scale, +430 simulation_kwargs, p_lb, p_ub), +431 method='Powell', +432 options={"maxiter": maxiter, "maxfev": maxfev, "xtol": xtol, "ftol": ftol}) +433 +434 # Store results +435 current_hyperparams = { +436 "maxiter": maxiter, +437 "maxfev": maxfev, +438 "xtol": xtol, +439 "ftol": ftol +440 } +441 +442 +443 elif mle_method == "CG": +444 # Define possible hyperparameter ranges +445 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +446 gtol_range = (1e-8, 1e-4) +447 norm_choices = [np.inf, -np.inf, 1, 2] +448 eps_range = (1e-8, 1e-4) +449 +450 if i == 0: # Use default hyperparameters for the first iteration +451 # Hard-coded default values +452 maxiter = len(p_bounds) * 1000 +453 gtol = 0.0001 +454 norm = np.inf +455 eps = 0.0001 +456 else: +457 # Sample random hyperparameters +458 maxiter = np.random.randint(*maxiter_range) +459 gtol = np.random.uniform(*gtol_range) +460 norm = np.random.choice(norm_choices) +461 eps = np.random.uniform(*eps_range) +462 +463 # Run optimization with hyperparameters +464 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +465 initial_conditions_scale, buffer_concentration_scale, +466 simulation_kwargs, p_lb, p_ub), +467 method='CG', +468 options={"maxiter": maxiter, "gtol": gtol, "norm": norm, "eps": eps}) +469 +470 if norm == np.inf: +471 norm_to_save = float('inf') +472 elif norm == -np.inf: +473 norm_to_save = float('-inf') +474 else: +475 norm_to_save = float(norm) +476 +477 # Store results +478 current_hyperparams = { +479 "maxiter": maxiter, +480 "gtol": gtol, +481 "norm": norm_to_save, +482 "eps": eps +483 } +484 +485 elif mle_method == "L-BFGS-B": +486 # Define possible hyperparameter ranges +487 maxcor_range = (5, 20) +488 maxfun_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +489 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +490 ftol_range = (1e-10, 1e-4) +491 gtol_range = (1e-10, 1e-4) +492 eps_range = (1e-10, 1e-4) +493 maxls_range = (10, 100) +494 +495 if i == 0: # Use default hyperparameters for the first iteration +496 # Hard-coded default values +497 maxcor = 10 +498 maxfun = len(p_bounds) * 1000 +499 maxiter = len(p_bounds) * 1000 +500 ftol = 2.2e-9 # ftol = factr * numpy.finfo(float).eps where factr = 1e7. +501 gtol = 1e-5 +502 eps = 1e-8 +503 maxls = 20 +504 else: +505 # Sample random hyperparameters +506 maxcor = np.random.randint(*maxcor_range) +507 maxfun = np.random.randint(*maxfun_range) +508 maxiter = np.random.randint(*maxiter_range) +509 ftol = np.random.uniform(*ftol_range) +510 gtol = np.random.uniform(*gtol_range) +511 eps = np.random.uniform(*eps_range) +512 maxls = np.random.randint(*maxls_range) +513 +514 # Run optimization with hyperparameters +515 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +516 initial_conditions_scale, buffer_concentration_scale, +517 simulation_kwargs, p_lb, p_ub), +518 method='L-BFGS-B', +519 bounds=p_bounds, # Necessary for L-BFGS-B +520 options={"maxcor": maxcor, "maxfun": maxfun, "maxiter": maxiter, "ftol": ftol, +521 "gtol": gtol, "eps": eps, "maxls": maxls}) +522 +523 # Store results +524 current_hyperparams = { +525 "maxcor": maxcor, +526 "maxfun": maxfun, +527 "maxiter": maxiter, +528 "ftol": ftol, +529 "gtol": gtol, +530 "eps": eps, +531 "maxls": maxls +532 } +533 +534 elif mle_method == "COBYLA": +535 # Define possible hyperparameter ranges +536 rhobeg_range = (0.01, 1.0) +537 tol_range = (1e-10, 1e-4) +538 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +539 catol_range = (1e-10, 1e-4) +540 +541 if i == 0: # Use default hyperparameters for the first iteration +542 # Hard-coded default values +543 rhobeg = 0.1 +544 tol = 1e-6 +545 maxiter = len(p_bounds) * 1000 +546 catol = 1e-6 +547 else: +548 # Sample random hyperparameters +549 rhobeg = np.random.uniform(*rhobeg_range) +550 tol = np.random.uniform(*tol_range) +551 maxiter = np.random.randint(*maxiter_range) +552 catol = np.random.uniform(*catol_range) +553 +554 # Run optimization with hyperparameters +555 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +556 initial_conditions_scale, buffer_concentration_scale, +557 simulation_kwargs, p_lb, p_ub), +558 method='COBYLA', +559 options={"rhobeg": rhobeg, "tol": tol, "maxiter": maxiter, "catol": catol, "disp": False}) # disp set to True for convergence messages +560 +561 # Store results +562 current_hyperparams = { +563 "rhobeg": rhobeg, +564 "tol": tol, +565 "maxiter": maxiter, +566 "catol": catol +567 } +568 +569 +570 elif mle_method == "direct": +571 # Define possible hyperparameter ranges +572 eps_range = (1e-6, 1e-2) +573 maxfun_range = (10000, 100000) +574 maxiter_range = (500, 2000) +575 locally_biased_choices = [True, False] +576 f_min_rtol_range = (1e-6, 1e-2) +577 vol_tol_range = (1e-20, 1e-10) +578 len_tol_range = (1e-8, 1e-4) +579 +580 if i == 0: # Use default hyperparameters for the first iteration +581 # Get the default values from the sp.optimize.direct function +582 signature = inspect.signature(sp.optimize.direct) +583 parameters = signature.parameters +584 +585 default_eps = parameters["eps"].default +586 default_maxfun = parameters["maxfun"].default +587 default_maxiter = parameters["maxiter"].default +588 default_locally_biased = parameters["locally_biased"].default +589 default_f_min_rtol = parameters["f_min_rtol"].default +590 default_vol_tol = parameters["vol_tol"].default +591 default_len_tol = parameters["len_tol"].default +592 eps = default_eps +593 maxfun = default_maxfun +594 maxiter = default_maxiter +595 locally_biased = default_locally_biased +596 f_min_rtol = default_f_min_rtol +597 vol_tol = default_vol_tol +598 len_tol = default_len_tol +599 else: +600 # Sample random hyperparameters +601 eps = np.random.uniform(*eps_range) +602 maxfun = np.random.randint(*maxfun_range) +603 maxiter = np.random.randint(*maxiter_range) +604 locally_biased = np.random.choice(locally_biased_choices) +605 f_min_rtol = np.random.uniform(*f_min_rtol_range) +606 vol_tol = np.random.uniform(*vol_tol_range) +607 len_tol = np.random.uniform(*len_tol_range) +608 +609 # Run optimization with hyperparameters +610 tune_res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, +611 initial_conditions_scale, buffer_concentration_scale, +612 simulation_kwargs, p_lb, p_ub), +613 eps=eps, maxfun=maxfun, maxiter=maxiter, locally_biased=bool(locally_biased), +614 f_min_rtol=f_min_rtol, vol_tol=vol_tol, len_tol=len_tol) +615 +616 # Store results +617 current_hyperparams = { +618 "eps": eps, +619 "maxfun": maxfun, +620 "maxiter": maxiter, +621 "locally_biased": bool(locally_biased), +622 "f_min_rtol": f_min_rtol, +623 "vol_tol": vol_tol, +624 "len_tol": len_tol +625 } +626 +627 elif mle_method == "SLSQP": +628 # Define possible hyperparameter ranges +629 ftol_range = (1e-10, 1e-4) +630 eps_range = (1e-10, 1e-4) +631 maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000) +632 +633 if i == 0: # Use default hyperparameters for the first iteration +634 # Hard-coded default values +635 ftol = 1e-6 +636 eps = 1e-8 +637 maxiter = len(p_bounds) * 1000 +638 +639 else: +640 # Sample random hyperparameters +641 ftol = np.random.uniform(*ftol_range) +642 eps = np.random.uniform(*eps_range) +643 maxiter = np.random.randint(*maxiter_range) +644 +645 # Run optimization with hyperparameters +646 tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, +647 initial_conditions_scale, buffer_concentration_scale, +648 simulation_kwargs, p_lb, p_ub), +649 method='SLSQP', +650 bounds=p_bounds, +651 options={"ftol": ftol, "eps": eps, "maxiter": maxiter, "disp": False,}) +652 +653 # Store results +654 current_hyperparams = { +655 "ftol": ftol, +656 "eps": eps, +657 "maxiter": maxiter, +658 } +659 +660 else: +661 print('error: invalid MLE algorithm.') +662 assert(1==0) +663 +664 tuning_results.append({ +665 "hyperparameters": current_hyperparams, +666 "function_value": float(tune_res.fun), # convert numpy float to python float if needed +667 "x_values": [i.item() if isinstance(i, np.generic) else i for i in tune_res.x], # convert numpy array to python list +668 "success": bool(tune_res.success), +669 "func_calls": int(tune_res.nfev), # convert numpy int to python int if needed +670 "duration": time.time() - t0 +671 }) +672 +673 if tune_res.fun < best_fun_value: +674 best_fun_value = tune_res.fun +675 best_hyperparams = current_hyperparams +676 best_result = tune_res +677 +678 logger.info(f"Hyperparameters: {current_hyperparams}") +679 logger.info(f"Func value: {tune_res.fun}") +680 logger.info(f"Time (s): {time.time() - t0}\n") +681 +682 +683 +684 logger.info(f"Best function value: {best_fun_value}") +685 logger.info(f"Best hyperparameters: {best_hyperparams}\n") +686 +687 with open(os.path.join(output_dir, f'tuning_results.yaml'), 'w') as file: +688 yaml.dump(tuning_results, file, sort_keys=False) +689 +690 +691 # start MLE optimization runs +692 for i in tqdm(range(n_trials)): +693 logger.info(f"starting optimization run {i}") +694 t0 = time.time() +695 initial_guess = uf.get_p0(p_bounds,1)[0] +696 logger.info(f"initial guess: {initial_guess}") +697 +698 if mle_method == "differential_evolution": +699 if tuning: +700 res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +701 else: +702 res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args) +703 elif mle_method == "basinhopping": +704 if tuning: +705 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +706 res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, **best_hyperparams) +707 else: +708 minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds} +709 res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs) +710 elif mle_method == "dual_annealing": +711 if tuning: +712 res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +713 else: +714 res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args) +715 elif mle_method == "shgo": +716 if tuning: +717 res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args, **best_hyperparams) +718 else: +719 res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args) +720 elif mle_method == "Nelder-Mead": +721 if tuning: +722 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead", options=best_hyperparams) +723 else: +724 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead") +725 elif mle_method == "Powell": +726 if tuning: +727 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Powell", options=best_hyperparams) +728 else: +729 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Powell") +730 elif mle_method == "CG": +731 if tuning: +732 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="CG", options=best_hyperparams) +733 else: +734 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="CG", ) +735 elif mle_method == "L-BFGS-B": +736 if tuning: +737 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="L-BFGS-B", options=best_hyperparams) +738 else: +739 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="L-BFGS-B") +740 elif mle_method == "COBYLA": +741 if tuning: +742 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="COBYLA", options=best_hyperparams) +743 else: +744 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="COBYLA") +745 elif mle_method == "direct": +746 if tuning: +747 res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), **best_hyperparams) +748 else: +749 res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub)) +750 elif mle_method == "SLSQP": +751 if tuning: +752 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="SLSQP", options=best_hyperparams) +753 else: +754 res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), method="SLSQP") +755 else: +756 print('invalid MLE method.') +757 assert(1==0) +758 +759 +760 dt = time.time() - t0 +761 logger.info(f"method: {mle_method}") +762 logger.info(f"x: {res.x}") +763 logger.info(f"func: {res.fun}") +764 logger.info(f"success: {res.success}") +765 logger.info(f"nfev: {res.nfev}") +766 logger.info(f"time (s): {dt}\n") +767 x.append(res.x) +768 y.append(res.fun) +769 s.append(res.success) +770 nf.append(res.nfev) +771 t.append(dt) +772 +773 ##### Save and plot the data ##### +774 results_list=[x,y,s,nf,t] +775 +776 y_data_list = [y,nf,t] +777 fig, axes = plt.subplots(len(y_data_list), 1, figsize=(10, 12), sharex=True) +778 trial_idx = [int(i+1) for i in range(n_trials)] +779 y_labels = ['log-likelihood', 'n likelihood evaluations', 'runtime (s)'] +780 plt.suptitle('Maximum likelihood optimization runs') +781 +782 for i in range(len(y_data_list)): +783 ax = axes[i] +784 ax.set_ylabel(y_labels[i]) +785 ax.yaxis.set_label_coords(-0.1, 0.5) +786 if y_labels[i] == 'log-likelihood': +787 ax.axhline(log_like_nom, color='red') +788 ax.plot(trial_idx, [-1*j for j in y_data_list[i]], '-o', ) +789 else: +790 ax.plot(trial_idx, y_data_list[i], '-o',) +791 axes[-1].set_xlabel("replica number") +792 plt.tight_layout() +793 plt.savefig(os.path.join(output_dir, f'optimization_plot.png')) +794 +795 # Convert each NumPy array in results_list to a standard Python list +796 results_dict = { +797 'run': output_fname, +798 'method': mle_method, +799 'x': [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in results_list[0]], +800 'fun': [float(i) for i in results_list[1]], +801 'success': [bool(i) for i in results_list[2]], +802 'nfev': results_list[3], +803 't': results_list[4] +804 } +805 +806 with open(os.path.join(output_dir, f'optimization_results.yaml'), 'w') as file: +807 yaml.dump(results_dict, file, sort_keys=False) +
Run maximum likelihood estimation based on a configuration file.
+ +This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file. +Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP" +methods are supported.
+ +++ ++
+- Creates a new directory for the run, containing: +
++
- Copied model, data, and configuration files.
+- Log file with run details.
+- Flux trace plot.
+- Optimization plot.
+- Tuning results.
+- Optimization results.
+
+++
+- The configuration file should be structured appropriately with all necessary fields (see example folder).
+- The function uses the
+scipy
library for maximum likelihood estimation.- The function assumes the use of the RoadRunner model for simulations.
+- The function handles both standard and extended models based on the 'extended' flag in the configuration.
+
1import matplotlib.pyplot as plt + 2import numpy as np + 3import scipy as sp + 4import pocomc as pmc + 5import yaml + 6import ssme_function as ssme + 7import tellurium as te + 8import os + 9import shutil + 10import datetime + 11import logging + 12import corner + 13import arviz as az + 14import os + 15import utility_functions as uf + 16 + 17 + 18os.environ['KMP_DUPLICATE_LIB_OK']='True' # may be needed depending on installation of math library + 19 + 20 + 21def run_sampler(config_fname): + 22 """ + 23 Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters. + 24 + 25 This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. + 26 The sampler is used to infer model parameters from observed data. The function also provides + 27 preliminary data visualization of the MCMC results, including trace plots, corner plots, and + 28 flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo + 29 and Bridge Sampling methods. + 30 + 31 Args: + 32 config_fname (str): Path to the configuration YAML file. This file should contain all + 33 necessary settings and parameters for the run, including model file, + 34 data file, solver arguments, Bayesian inference settings, optimization + 35 settings, and random seed. + 36 + 37 Outputs: + 38 - Creates a new directory for the run, containing: + 39 - Copied model, data, and configuration files. + 40 - Log file with run details. + 41 - MCMC run and trace plot. + 42 - MCMC corner plot. + 43 - Flux prediction plot. + 44 - Flat samples and log likelihoods as CSV files. + 45 + 46 Notes: + 47 - The configuration file should be structured appropriately with all necessary fields. + 48 - The function uses the `pmc` library for MCMC sampling. + 49 - The function uses the `pmc` library for corner plots. + 50 - The function assumes the use of the RoadRunner model for simulations. + 51 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 52 """ + 53 + 54 ##### Setup ##### + 55 # Load the config.yaml file + 56 with open(config_fname, "r") as f: + 57 config = yaml.safe_load(f) + 58 + 59 # get run configuration settings + 60 run_name = config["run_name"] + 61 model_file = config['model_file'] + 62 data_file = config['data_file'] + 63 simulation_kwargs = config['solver_arguments'] + 64 inference_settings = config['bayesian_inference'] + 65 optimization_settings = config['optimization'] + 66 seed = config['random_seed'] + 67 np.random.seed(seed) + 68 extended = config['bayesian_inference']['extended'] + 69 + 70 if extended == False: + 71 # get parameter names, values, and boundaries + 72 k_names = config['solver_arguments']['rate_constant_names'] + 73 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 74 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 75 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 76 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 77 p_bounds = list(zip(p_lb,p_ub)) + 78 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 79 k_nom = p_nom[:-1] + 80 sigma_nom = p_nom[-1] + 81 + 82 # get assay initial conditions + 83 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 84 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 85 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 86 else: + 87 # get parameter names, values, and boundaries + 88 k_names = config['solver_arguments']['rate_constant_names'] + 89 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 90 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 91 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 92 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 93 p_bounds = list(zip(p_lb,p_ub)) + 94 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 95 k_nom = p_nom[:-6] + 96 sigma_nom = p_nom[-1] + 97 + 98 # get assay initial conditions + 99 initial_conditions = config['solver_arguments']['species_initial_concentrations'] +100 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] +101 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] +102 +103 # create new directory for runs +104 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') +105 output_fname = f'run_{run_name}_r{seed}' +106 output_dir = output_fname + '_' + timestamp +107 os.mkdir(output_dir) +108 shutil.copy(model_file, output_dir) +109 shutil.copy(data_file, output_dir) +110 shutil.copy(config_fname, output_dir) +111 +112 # create logging file +113 logger = logging.getLogger('log') +114 logger.setLevel(logging.INFO) +115 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) +116 file_handler.setLevel(logging.INFO) +117 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +118 file_handler.setFormatter(log_format) +119 logger.addHandler(file_handler) +120 logger.info(f"output_dir: {output_dir}") +121 logger.info(f"seed: {seed}") +122 +123 # load roadrunner model and simulate assay at nominal values +124 rr_model = te.loadSBMLModel(model_file) +125 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +126 +127 y_nom = data_nom[1] # (time, output) +128 +129 +130 # Load data, plot trace, and calculate log-likelihood reference +131 y_obs = np.loadtxt(data_file, delimiter=',') +132 plt.figure(figsize=(8,6)) +133 plt.title('Current (A) vs simulation step') +134 plt.plot(y_nom, label='y_nom') +135 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +136 plt.legend() +137 plt.savefig(os.path.join(output_dir, 'current_trace_nom.png')) +138 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +139 logger.info(f"log_like_nom: {log_like_nom}") +140 +141 # set sampler parameters +142 n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers']) +143 ess = config['bayesian_inference']['mcmc_algorithm_arguments']['ess'] +144 gamma = config['bayesian_inference']['mcmc_algorithm_arguments']['gamma'] +145 additional_samples = config['bayesian_inference']['mcmc_algorithm_arguments']['additional_samples'] +146 n_max = config['bayesian_inference']['mcmc_algorithm_arguments']['n_max'] +147 n_dim = len(p_names) +148 thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin']) +149 burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in']) +150 logger.info(f"n_walkers: {n_walkers}") +151 logger.info(f"ess: {ess}") +152 logger.info(f"gamma: {gamma}") +153 logger.info(f"additional samples: {additional_samples}") +154 logger.info(f"n_max: {n_max}") +155 logger.info(f"n_dim: {n_dim}") +156 logger.info(f"thin: {thin}") +157 logger.info(f"burn_in: {burn_in}") +158 +159 # get sampler starting points +160 p_0 = uf.get_p0(p_bounds, n_walkers) +161 +162 +163 ##### Run Sampler ##### +164 +165 if extended: +166 sampler = pmc.Sampler( +167 n_walkers, +168 n_dim, +169 log_likelihood=uf.log_like_extended, +170 log_prior=uf.log_prior, +171 log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs], +172 log_prior_args= [p_lb, p_ub], +173 infer_vectorization=False, +174 bounds=np.array(p_bounds), +175 random_state=seed, +176 diagonal = True, +177 ) +178 else: +179 sampler = pmc.Sampler( +180 n_walkers, +181 n_dim, +182 log_likelihood=uf.log_like, +183 log_prior=uf.log_prior, +184 log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs], +185 log_prior_args= [p_lb, p_ub], +186 infer_vectorization=False, +187 bounds=np.array(p_bounds), +188 random_state=seed, +189 diagonal = True, +190 ) +191 logger.info(f"starting MCMC") +192 sampler.run(prior_samples = p_0, +193 ess = ess, +194 gamma = gamma, +195 n_max = n_max +196 ) +197 logger.info(f"finished MCMC") +198 sampler.add_samples(additional_samples) +199 logger.info(f"finished additional MCMC") +200 +201 ##### Store data ##### +202 results = sampler.results +203 logger.info(f"total n steps: {np.sum(results['steps'])}") +204 +205 np.savetxt(os.path.join(output_dir, f'log_evidence.csv'), results['logz'], delimiter=',') +206 np.savetxt(os.path.join(output_dir, f'samples.csv'), results['samples'], delimiter=',') +207 np.savetxt(os.path.join(output_dir, f'log_likelihood.csv'), results['loglikelihood'], delimiter=',') +208 logger.info(f"saved data") +209 +210 ##### bridge sampling for log evidence calculation ##### +211 logz_bs, logz_bs_error = sampler.bridge_sampling() +212 logger.info(f"logZ estimated using SMC: {results['logz'][-1]}") +213 logger.info(f"logZ estimated using Bridge Sampling: {logz_bs} +- {logz_bs_error}") +214 +215 +216 ##### Preliminary data viz ##### +217 # run plot +218 fig = pmc.plotting.run(results) +219 plt.savefig(os.path.join(output_dir, f'run_plot.png')) +220 plt.close() +221 +222 # trace plot +223 fig = pmc.plotting.trace(results) +224 plt.savefig(os.path.join(output_dir, f'trace_plot.png')) +225 plt.close() +226 +227 # corner plot +228 fig = pmc.plotting.corner(results,labels=p_names, truths=log10_p_nom, range=p_bounds) +229 plt.savefig(os.path.join(output_dir, f'mcmc_corner.png')) +230 +231 # flux prediction plot +232 plt.figure(figsize=(10,8)) +233 inds = np.random.randint(len(results['samples']), size=100) +234 for ind in inds: +235 p_sample = results['samples'][ind] +236 k_tmp = [10**i for i in p_sample[:-1]] +237 res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +238 y_tmp = res_tmp[1] +239 plt.plot(y_tmp, color='blue', alpha=0.1) +240 plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred') +241 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange') +242 plt.legend(fontsize=14) +243 plt.xlabel("time step") +244 plt.ylabel("net ion influx (M/s)"); +245 plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png')) +246 logger.info(f"plotted analysis") +247 +248 +249 +250if __name__ == '__main__': +251 +252 ##### Adjust this if needed ##### +253 config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml" +254 run_sampler(config_fname) +255 +
22def run_sampler(config_fname): + 23 """ + 24 Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters. + 25 + 26 This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. + 27 The sampler is used to infer model parameters from observed data. The function also provides + 28 preliminary data visualization of the MCMC results, including trace plots, corner plots, and + 29 flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo + 30 and Bridge Sampling methods. + 31 + 32 Args: + 33 config_fname (str): Path to the configuration YAML file. This file should contain all + 34 necessary settings and parameters for the run, including model file, + 35 data file, solver arguments, Bayesian inference settings, optimization + 36 settings, and random seed. + 37 + 38 Outputs: + 39 - Creates a new directory for the run, containing: + 40 - Copied model, data, and configuration files. + 41 - Log file with run details. + 42 - MCMC run and trace plot. + 43 - MCMC corner plot. + 44 - Flux prediction plot. + 45 - Flat samples and log likelihoods as CSV files. + 46 + 47 Notes: + 48 - The configuration file should be structured appropriately with all necessary fields. + 49 - The function uses the `pmc` library for MCMC sampling. + 50 - The function uses the `pmc` library for corner plots. + 51 - The function assumes the use of the RoadRunner model for simulations. + 52 - The function handles both standard and extended models based on the 'extended' flag in the configuration. + 53 """ + 54 + 55 ##### Setup ##### + 56 # Load the config.yaml file + 57 with open(config_fname, "r") as f: + 58 config = yaml.safe_load(f) + 59 + 60 # get run configuration settings + 61 run_name = config["run_name"] + 62 model_file = config['model_file'] + 63 data_file = config['data_file'] + 64 simulation_kwargs = config['solver_arguments'] + 65 inference_settings = config['bayesian_inference'] + 66 optimization_settings = config['optimization'] + 67 seed = config['random_seed'] + 68 np.random.seed(seed) + 69 extended = config['bayesian_inference']['extended'] + 70 + 71 if extended == False: + 72 # get parameter names, values, and boundaries + 73 k_names = config['solver_arguments']['rate_constant_names'] + 74 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 75 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 76 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 77 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 78 p_bounds = list(zip(p_lb,p_ub)) + 79 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 80 k_nom = p_nom[:-1] + 81 sigma_nom = p_nom[-1] + 82 + 83 # get assay initial conditions + 84 initial_conditions = config['solver_arguments']['species_initial_concentrations'] + 85 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] + 86 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] + 87 else: + 88 # get parameter names, values, and boundaries + 89 k_names = config['solver_arguments']['rate_constant_names'] + 90 log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']] + 91 p_names = [d['name'] for d in config['bayesian_inference']['parameters']] + 92 p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']] + 93 p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']] + 94 p_bounds = list(zip(p_lb,p_ub)) + 95 p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']] + 96 k_nom = p_nom[:-6] + 97 sigma_nom = p_nom[-1] + 98 + 99 # get assay initial conditions +100 initial_conditions = config['solver_arguments']['species_initial_concentrations'] +101 initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale'] +102 buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale'] +103 +104 # create new directory for runs +105 timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') +106 output_fname = f'run_{run_name}_r{seed}' +107 output_dir = output_fname + '_' + timestamp +108 os.mkdir(output_dir) +109 shutil.copy(model_file, output_dir) +110 shutil.copy(data_file, output_dir) +111 shutil.copy(config_fname, output_dir) +112 +113 # create logging file +114 logger = logging.getLogger('log') +115 logger.setLevel(logging.INFO) +116 file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log')) +117 file_handler.setLevel(logging.INFO) +118 log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +119 file_handler.setFormatter(log_format) +120 logger.addHandler(file_handler) +121 logger.info(f"output_dir: {output_dir}") +122 logger.info(f"seed: {seed}") +123 +124 # load roadrunner model and simulate assay at nominal values +125 rr_model = te.loadSBMLModel(model_file) +126 data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +127 +128 y_nom = data_nom[1] # (time, output) +129 +130 +131 # Load data, plot trace, and calculate log-likelihood reference +132 y_obs = np.loadtxt(data_file, delimiter=',') +133 plt.figure(figsize=(8,6)) +134 plt.title('Current (A) vs simulation step') +135 plt.plot(y_nom, label='y_nom') +136 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3) +137 plt.legend() +138 plt.savefig(os.path.join(output_dir, 'current_trace_nom.png')) +139 log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +140 logger.info(f"log_like_nom: {log_like_nom}") +141 +142 # set sampler parameters +143 n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers']) +144 ess = config['bayesian_inference']['mcmc_algorithm_arguments']['ess'] +145 gamma = config['bayesian_inference']['mcmc_algorithm_arguments']['gamma'] +146 additional_samples = config['bayesian_inference']['mcmc_algorithm_arguments']['additional_samples'] +147 n_max = config['bayesian_inference']['mcmc_algorithm_arguments']['n_max'] +148 n_dim = len(p_names) +149 thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin']) +150 burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in']) +151 logger.info(f"n_walkers: {n_walkers}") +152 logger.info(f"ess: {ess}") +153 logger.info(f"gamma: {gamma}") +154 logger.info(f"additional samples: {additional_samples}") +155 logger.info(f"n_max: {n_max}") +156 logger.info(f"n_dim: {n_dim}") +157 logger.info(f"thin: {thin}") +158 logger.info(f"burn_in: {burn_in}") +159 +160 # get sampler starting points +161 p_0 = uf.get_p0(p_bounds, n_walkers) +162 +163 +164 ##### Run Sampler ##### +165 +166 if extended: +167 sampler = pmc.Sampler( +168 n_walkers, +169 n_dim, +170 log_likelihood=uf.log_like_extended, +171 log_prior=uf.log_prior, +172 log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs], +173 log_prior_args= [p_lb, p_ub], +174 infer_vectorization=False, +175 bounds=np.array(p_bounds), +176 random_state=seed, +177 diagonal = True, +178 ) +179 else: +180 sampler = pmc.Sampler( +181 n_walkers, +182 n_dim, +183 log_likelihood=uf.log_like, +184 log_prior=uf.log_prior, +185 log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs], +186 log_prior_args= [p_lb, p_ub], +187 infer_vectorization=False, +188 bounds=np.array(p_bounds), +189 random_state=seed, +190 diagonal = True, +191 ) +192 logger.info(f"starting MCMC") +193 sampler.run(prior_samples = p_0, +194 ess = ess, +195 gamma = gamma, +196 n_max = n_max +197 ) +198 logger.info(f"finished MCMC") +199 sampler.add_samples(additional_samples) +200 logger.info(f"finished additional MCMC") +201 +202 ##### Store data ##### +203 results = sampler.results +204 logger.info(f"total n steps: {np.sum(results['steps'])}") +205 +206 np.savetxt(os.path.join(output_dir, f'log_evidence.csv'), results['logz'], delimiter=',') +207 np.savetxt(os.path.join(output_dir, f'samples.csv'), results['samples'], delimiter=',') +208 np.savetxt(os.path.join(output_dir, f'log_likelihood.csv'), results['loglikelihood'], delimiter=',') +209 logger.info(f"saved data") +210 +211 ##### bridge sampling for log evidence calculation ##### +212 logz_bs, logz_bs_error = sampler.bridge_sampling() +213 logger.info(f"logZ estimated using SMC: {results['logz'][-1]}") +214 logger.info(f"logZ estimated using Bridge Sampling: {logz_bs} +- {logz_bs_error}") +215 +216 +217 ##### Preliminary data viz ##### +218 # run plot +219 fig = pmc.plotting.run(results) +220 plt.savefig(os.path.join(output_dir, f'run_plot.png')) +221 plt.close() +222 +223 # trace plot +224 fig = pmc.plotting.trace(results) +225 plt.savefig(os.path.join(output_dir, f'trace_plot.png')) +226 plt.close() +227 +228 # corner plot +229 fig = pmc.plotting.corner(results,labels=p_names, truths=log10_p_nom, range=p_bounds) +230 plt.savefig(os.path.join(output_dir, f'mcmc_corner.png')) +231 +232 # flux prediction plot +233 plt.figure(figsize=(10,8)) +234 inds = np.random.randint(len(results['samples']), size=100) +235 for ind in inds: +236 p_sample = results['samples'][ind] +237 k_tmp = [10**i for i in p_sample[:-1]] +238 res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs) +239 y_tmp = res_tmp[1] +240 plt.plot(y_tmp, color='blue', alpha=0.1) +241 plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred') +242 plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange') +243 plt.legend(fontsize=14) +244 plt.xlabel("time step") +245 plt.ylabel("net ion influx (M/s)"); +246 plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png')) +247 logger.info(f"plotted analysis") +
Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.
+ +This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. +The sampler is used to infer model parameters from observed data. The function also provides +preliminary data visualization of the MCMC results, including trace plots, corner plots, and +flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo +and Bridge Sampling methods.
+ +++ ++
+- Creates a new directory for the run, containing: +
++
- Copied model, data, and configuration files.
+- Log file with run details.
+- MCMC run and trace plot.
+- MCMC corner plot.
+- Flux prediction plot.
+- Flat samples and log likelihoods as CSV files.
+
+++
+- The configuration file should be structured appropriately with all necessary fields.
+- The function uses the
+pmc
library for MCMC sampling.- The function uses the
+pmc
library for corner plots.- The function assumes the use of the RoadRunner model for simulations.
+- The function handles both standard and extended models based on the 'extended' flag in the configuration.
+
Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.
\n\nThis function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.\nThe sampler is used to infer model parameters from observed data. The function also provides\npreliminary data visualization of the MCMC results, including trace plots, corner plots, and \nflux prediction plots.
\n\n\n\n\n\n
\n- Creates a new directory for the run, containing:\n
\n\n
- Copied model, data, and configuration files.
\n- Log file with run details.
\n- MCMC trace plot.
\n- MCMC corner plot.
\n- Flux prediction plot.
\n- Flat samples and log likelihoods as CSV files.
\n
\n\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "run_optimizer", "modulename": "run_optimizer", "kind": "module", "doc": "\n"}, {"fullname": "run_optimizer.run_optimizer", "modulename": "run_optimizer", "qualname": "run_optimizer", "kind": "function", "doc": "\n
\n- The configuration file should be structured appropriately with all necessary fields.
\n- The function uses the
\nemcee
library for MCMC sampling.- The function uses the
\ncorner
library for corner plots.- The function assumes the use of the RoadRunner model for simulations.
\n- The function handles both standard and extended models based on the 'extended' flag in the configuration.
\n
Run maximum likelihood estimation based on a configuration file.
\n\nThis function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file.\nCurrently the \"differential_evolution\", \"basinhopping\", \"dual_annealing\", \"shgo\", \"Nelder-Mead\", \"Powell\", \"CG\", \"L-BFGS-B\", \"COBYLA\", \"direct\" and \"SLSQP\"\nmethods are supported.
\n\n\n\n\n\n
\n- Creates a new directory for the run, containing:\n
\n\n
- Copied model, data, and configuration files.
\n- Log file with run details.
\n- Flux trace plot.
\n- Optimization plot.
\n- Tuning results.
\n- Optimization results.
\n
\n\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "run_pocomc", "modulename": "run_pocomc", "kind": "module", "doc": "\n"}, {"fullname": "run_pocomc.run_sampler", "modulename": "run_pocomc", "qualname": "run_sampler", "kind": "function", "doc": "\n
\n- The configuration file should be structured appropriately with all necessary fields (see example folder).
\n- The function uses the
\nscipy
library for maximum likelihood estimation.- The function assumes the use of the RoadRunner model for simulations.
\n- The function handles both standard and extended models based on the 'extended' flag in the configuration.
\n
Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.
\n\nThis function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.\nThe sampler is used to infer model parameters from observed data. The function also provides\npreliminary data visualization of the MCMC results, including trace plots, corner plots, and \nflux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo \nand Bridge Sampling methods.
\n\n\n\n\n\n
\n- Creates a new directory for the run, containing:\n
\n\n
- Copied model, data, and configuration files.
\n- Log file with run details.
\n- MCMC run and trace plot.
\n- MCMC corner plot.
\n- Flux prediction plot.
\n- Flat samples and log likelihoods as CSV files.
\n
\n\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "ssme_function", "modulename": "ssme_function", "kind": "module", "doc": "\n"}, {"fullname": "ssme_function.simulate_assay", "modulename": "ssme_function", "qualname": "simulate_assay", "kind": "function", "doc": "\n
\n- The configuration file should be structured appropriately with all necessary fields.
\n- The function uses the
\npmc
library for MCMC sampling.- The function uses the
\npmc
library for corner plots.- The function assumes the use of the RoadRunner model for simulations.
\n- The function handles both standard and extended models based on the 'extended' flag in the configuration.
\n
Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.
\n\n\n\n", "signature": "(\trr,\trate_constants,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions", "modulename": "utility_functions", "kind": "module", "doc": "\n"}, {"fullname": "utility_functions.get_p0", "modulename": "utility_functions", "qualname": "get_p0", "kind": "function", "doc": "results (ndarray): Array of simulation results.
\n
Generate initial samples uniformly distributed within given boundaries.
\n\n\n\n", "signature": "(b_list, n):", "funcdef": "def"}, {"fullname": "utility_functions.calc_normal_log_likelihood", "modulename": "utility_functions", "qualname": "calc_normal_log_likelihood", "kind": "function", "doc": "ndarray: Array of initial samples.
\n
Calculate the log likelihood of a Normal distribution.
\n\n\n\n", "signature": "(y_obs, y_pred, sigma):", "funcdef": "def"}, {"fullname": "utility_functions.log_like", "modulename": "utility_functions", "qualname": "log_like", "kind": "function", "doc": "float: Log likelihood value.
\n
Calculate the log likelihood for given parameters and model.
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions.log_like_extended", "modulename": "utility_functions", "qualname": "log_like_extended", "kind": "function", "doc": "float: Log likelihood value.
\n
Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions.log_prior", "modulename": "utility_functions", "qualname": "log_prior", "kind": "function", "doc": "float: Log likelihood value.
\n
Calculate the log prior for given parameters.
\n\n\n\n", "signature": "(params, param_lb, param_ub):", "funcdef": "def"}, {"fullname": "utility_functions.log_post_wrapper", "modulename": "utility_functions", "qualname": "log_post_wrapper", "kind": "function", "doc": "float: Log prior value.
\n
Wrapper function to calculate the log posterior (log likelihood + log prior).
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.log_post_wrapper_extended", "modulename": "utility_functions", "qualname": "log_post_wrapper_extended", "kind": "function", "doc": "float: Log posterior value.
\n
Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.negative_log_likelihood_wrapper", "modulename": "utility_functions", "qualname": "negative_log_likelihood_wrapper", "kind": "function", "doc": "float: Log posterior value.
\n
Wrapper function to calculate the negative log likelihood.\nTechnically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.negative_log_likelihood_wrapper_extended", "modulename": "utility_functions", "qualname": "negative_log_likelihood_wrapper_extended", "kind": "function", "doc": "float: Negative log likelihood value.
\n
Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. \nTechnically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
\n\n\n\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}]; + + // mirrored in build-search-index.js (part 1) + // Also split on html tags. this is a cheap heuristic, but good enough. + elasticlunr.tokenizer.setSeperator(/[\s\-.;&_'"=,()]+|<[^>]*>/); + + let searchIndex; + if (docs._isPrebuiltIndex) { + console.info("using precompiled search index"); + searchIndex = elasticlunr.Index.load(docs); + } else { + console.time("building search index"); + // mirrored in build-search-index.js (part 2) + searchIndex = elasticlunr(function () { + this.pipeline.remove(elasticlunr.stemmer); + this.pipeline.remove(elasticlunr.stopWordFilter); + this.addField("qualname"); + this.addField("fullname"); + this.addField("annotation"); + this.addField("default_value"); + this.addField("signature"); + this.addField("bases"); + this.addField("doc"); + this.setRef("fullname"); + }); + for (let doc of docs) { + searchIndex.addDoc(doc); + } + console.timeEnd("building search index"); + } + + return (term) => searchIndex.search(term, { + fields: { + qualname: {boost: 4}, + fullname: {boost: 2}, + annotation: {boost: 2}, + default_value: {boost: 2}, + signature: {boost: 2}, + bases: {boost: 2}, + doc: {boost: 1}, + }, + expand: true + }); +})(); \ No newline at end of file diff --git a/docs/ssme_function.html b/docs/ssme_function.html new file mode 100644 index 0000000..55ce90a --- /dev/null +++ b/docs/ssme_function.html @@ -0,0 +1,429 @@ + + + + + + +float: Negative log likelihood value.
\n
1import roadrunner + 2import numpy as np + 3 + 4 + 5def simulate_assay(rr, rate_constants, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + 6 """ + 7 Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments. + 8 + 9 Args: +10 rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle). +11 rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments. +12 initial_conditions (list): List of initial species concentrations. +13 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +14 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +15 solver_arguments (dict): Dictionary containing the following keys: +16 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +17 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +18 - time_per_stage (float): Duration of each assay stage. +19 - n_points_per_stage (int): Number of data points per assay stage. +20 - buffer_species_names (list): Names of buffer species. +21 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +22 - solver_relative_tolerance (float): Relative tolerance for the solver. +23 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +24 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +25 - species_labels (list): Labels of species for which initial conditions are provided. +26 - rate_constant_names (list): Names of rate constants in the model. +27 +28 Returns: +29 results (ndarray): Array of simulation results. +30 """ +31 # Load simulation settings +32 roadrunner_solver_output_selections = solver_arguments['roadrunner_solver_output_selections'] +33 buffer_concentration_sequence = solver_arguments['buffer_concentration_sequence'] +34 time_per_stage = solver_arguments['time_per_stage'] +35 n_points_per_stage = solver_arguments['n_points_per_stage'] +36 buffer_species_names = solver_arguments['buffer_species_names'] +37 absolute_tolerance = solver_arguments['solver_absolute_tolerance'] +38 relative_tolerance = solver_arguments['solver_relative_tolerance'] +39 remove_first_n = solver_arguments['remove_first_n'] +40 remove_after_n = solver_arguments['remove_after_n'] +41 species_labels = solver_arguments['species_labels'] +42 rate_constant_names = solver_arguments['rate_constant_names'] +43 +44 # Reset roadrunner model and set integrator tolerances +45 rr.resetToOrigin() +46 rr.integrator.absolute_tolerance = absolute_tolerance +47 rr.integrator.relative_tolerance = relative_tolerance +48 +49 # Update model rate constants +50 parameters = dict(zip(rate_constant_names, rate_constants)) +51 for name, value in parameters.items(): +52 rr[name] = value +53 +54 # Run synthetic SSME assay +55 ## iterate through buffer concentration sequence for assay +56 results = [] +57 for i, solution in enumerate(buffer_concentration_sequence): # Update buffer solution for each assay stage +58 for j, label in enumerate(buffer_species_names): +59 buffer_concentration_j = solution[j]*buffer_concentration_scale[j] # scale buffer concentrations (obs = pred*scale_factor) +60 setattr(rr, label, buffer_concentration_j) +61 if i==0: # Set initial state concentrations for stage 1 (equilibration) +62 for j, label in enumerate(species_labels): +63 initial_concentration_j = initial_conditions[j]*initial_conditions_scale[j] +64 setattr(rr, label, initial_concentration_j) +65 rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections) # Don't store equilibration results +66 else: +67 tmp = rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections) +68 results.append(tmp[remove_first_n:remove_after_n]) +69 return np.vstack(results).T +
6def simulate_assay(rr, rate_constants, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + 7 """ + 8 Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments. + 9 +10 Args: +11 rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle). +12 rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments. +13 initial_conditions (list): List of initial species concentrations. +14 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +15 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +16 solver_arguments (dict): Dictionary containing the following keys: +17 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +18 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +19 - time_per_stage (float): Duration of each assay stage. +20 - n_points_per_stage (int): Number of data points per assay stage. +21 - buffer_species_names (list): Names of buffer species. +22 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +23 - solver_relative_tolerance (float): Relative tolerance for the solver. +24 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +25 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +26 - species_labels (list): Labels of species for which initial conditions are provided. +27 - rate_constant_names (list): Names of rate constants in the model. +28 +29 Returns: +30 results (ndarray): Array of simulation results. +31 """ +32 # Load simulation settings +33 roadrunner_solver_output_selections = solver_arguments['roadrunner_solver_output_selections'] +34 buffer_concentration_sequence = solver_arguments['buffer_concentration_sequence'] +35 time_per_stage = solver_arguments['time_per_stage'] +36 n_points_per_stage = solver_arguments['n_points_per_stage'] +37 buffer_species_names = solver_arguments['buffer_species_names'] +38 absolute_tolerance = solver_arguments['solver_absolute_tolerance'] +39 relative_tolerance = solver_arguments['solver_relative_tolerance'] +40 remove_first_n = solver_arguments['remove_first_n'] +41 remove_after_n = solver_arguments['remove_after_n'] +42 species_labels = solver_arguments['species_labels'] +43 rate_constant_names = solver_arguments['rate_constant_names'] +44 +45 # Reset roadrunner model and set integrator tolerances +46 rr.resetToOrigin() +47 rr.integrator.absolute_tolerance = absolute_tolerance +48 rr.integrator.relative_tolerance = relative_tolerance +49 +50 # Update model rate constants +51 parameters = dict(zip(rate_constant_names, rate_constants)) +52 for name, value in parameters.items(): +53 rr[name] = value +54 +55 # Run synthetic SSME assay +56 ## iterate through buffer concentration sequence for assay +57 results = [] +58 for i, solution in enumerate(buffer_concentration_sequence): # Update buffer solution for each assay stage +59 for j, label in enumerate(buffer_species_names): +60 buffer_concentration_j = solution[j]*buffer_concentration_scale[j] # scale buffer concentrations (obs = pred*scale_factor) +61 setattr(rr, label, buffer_concentration_j) +62 if i==0: # Set initial state concentrations for stage 1 (equilibration) +63 for j, label in enumerate(species_labels): +64 initial_concentration_j = initial_conditions[j]*initial_conditions_scale[j] +65 setattr(rr, label, initial_concentration_j) +66 rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections) # Don't store equilibration results +67 else: +68 tmp = rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections) +69 results.append(tmp[remove_first_n:remove_after_n]) +70 return np.vstack(results).T +
Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.
+ +++results (ndarray): Array of simulation results.
+
1import numpy as np + 2import scipy as sp + 3import ssme_function as ssme + 4import time as time + 5 + 6 + 7def get_p0(b_list, n): + 8 """ + 9 Generate initial samples uniformly distributed within given boundaries. + 10 + 11 Args: + 12 b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter. + 13 n (int): Number of samples to generate. + 14 + 15 Returns: + 16 ndarray: Array of initial samples. + 17 """ + 18 p0_array = np.transpose(np.array([np.random.uniform(b[0],b[1],n) for b in b_list])) # re-arrange array for sampler + 19 return p0_array + 20 + 21 + 22def calc_normal_log_likelihood(y_obs, y_pred, sigma): + 23 """ + 24 Calculate the log likelihood of a Normal distribution. + 25 + 26 Args: + 27 y_obs (ndarray): Observed values. + 28 y_pred (ndarray): Predicted values. + 29 sigma (float): Standard deviation. + 30 + 31 Returns: + 32 float: Log likelihood value. + 33 """ + 34 y = sp.stats.norm.logpdf(y_obs, y_pred, sigma).sum() + 35 return y + 36 + 37 + 38def log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + 39 """ + 40 Calculate the log likelihood for given parameters and model. + 41 + 42 Args: + 43 params (list): List of parameters. + 44 - rate constant parameters are first + 45 - sigma is -1 index + 46 rr_model (object): RoadRunner model object. + 47 y_obs (ndarray): Observed data. + 48 initial_conditions (list): List of initial species concentrations. + 49 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. + 50 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. + 51 solver_arguments (dict): Dictionary containing the following keys: + 52 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. + 53 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. + 54 - time_per_stage (float): Duration of each assay stage. + 55 - n_points_per_stage (int): Number of data points per assay stage. + 56 - buffer_species_names (list): Names of buffer species. + 57 - solver_absolute_tolerance (float): Absolute tolerance for the solver. + 58 - solver_relative_tolerance (float): Relative tolerance for the solver. + 59 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. + 60 - remove_after_n (int): Index after which to stop including data points in the results for each stage. + 61 - species_labels (list): Labels of species for which initial conditions are provided. + 62 - rate_constant_names (list): Names of rate constants in the model. + 63 + 64 Returns: + 65 float: Log likelihood value. + 66 """ + 67 k = [10**i for i in params[:-1]] + 68 sigma = 10**params[-1] + 69 try: + 70 res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) + 71 y_pred = res[1] + 72 log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma) + 73 return log_like + 74 except: + 75 return -1e100 # arbitrarily large negative number --> 0 probability + 76 + 77 + 78def log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + 79 """ + 80 Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias. + 81 + 82 Args: + 83 params (list): List of parameters + 84 - rate constant parameters are first + 85 - buffer concentration scale factors (H_out and S_out) are indices -5 and -4 + 86 - transporter concentration scale factor is -3 index, + 87 - bias factor is -2 index, + 88 - sigma is -1 index + 89 rr_model (object): RoadRunner model object. + 90 y_obs (ndarray): Observed data. + 91 initial_conditions (list): List of initial species concentrations. + 92 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. + 93 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. + 94 solver_arguments (dict): Dictionary containing the following keys: + 95 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. + 96 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. + 97 - time_per_stage (float): Duration of each assay stage. + 98 - n_points_per_stage (int): Number of data points per assay stage. + 99 - buffer_species_names (list): Names of buffer species. +100 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +101 - solver_relative_tolerance (float): Relative tolerance for the solver. +102 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +103 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +104 - species_labels (list): Labels of species for which initial conditions are provided. +105 - rate_constant_names (list): Names of rate constants in the model. +106 +107 Returns: +108 float: Log likelihood value. +109 """ +110 k = [10**i for i in params[:-5]] +111 sigma = 10**params[-1] +112 bias = params[-2] +113 +114 # get concentration uncertainity +115 H_out_buffer_scale = params[-5] +116 S_out_buffer_scale = params[-4] +117 initial_transporter_concentration_scale = params[-3] +118 +119 # set concentration uncertainity +120 buffer_concentration_scale[0] = H_out_buffer_scale +121 buffer_concentration_scale[1] = S_out_buffer_scale +122 initial_conditions_scale[0] = initial_transporter_concentration_scale +123 try: +124 res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +125 y_pred = bias*res[1] +126 log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma) +127 return log_like +128 except: +129 return -1e100 # arbitrarily large negative number --> 0 probability +130 +131 +132def log_prior(params, param_lb, param_ub): +133 """ +134 Calculate the log prior for given parameters. +135 +136 Args: +137 params (list): List of parameters. +138 param_lb (list): Lower bounds for parameters. +139 param_ub (list): Upper bounds for parameters. +140 +141 Returns: +142 float: Log prior value. +143 """ +144 if ((param_lb < params) & (params < param_ub)).all(): +145 return 0 +146 else: +147 return -1e100 # arbitrarily large negative number --> 0 probability +148 +149 +150def log_post_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +151 """ +152 Wrapper function to calculate the log posterior (log likelihood + log prior). +153 +154 Args: +155 params (list): List of parameters. +156 rr_model (object): RoadRunner model object. +157 y_obs (ndarray): Observed values. +158 initial_conditions (list): List of initial species concentrations. +159 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +160 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +161 solver_arguments (dict): Dictionary containing the following keys: +162 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +163 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +164 - time_per_stage (float): Duration of each assay stage. +165 - n_points_per_stage (int): Number of data points per assay stage. +166 - buffer_species_names (list): Names of buffer species. +167 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +168 - solver_relative_tolerance (float): Relative tolerance for the solver. +169 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +170 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +171 - species_labels (list): Labels of species for which initial conditions are provided. +172 - rate_constant_names (list): Names of rate constants in the model. +173 param_lb (list): Lower bounds for parameters. +174 param_ub (list): Upper bounds for parameters. +175 +176 Returns: +177 float: Log posterior value. +178 """ +179 logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +180 logpr = log_prior(params, param_lb, param_ub) +181 return logl+logpr # arbitrarily large negative number --> 0 probability +182 +183 +184def log_post_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +185 """ +186 Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias. +187 +188 Args: +189 params (list): List of parameters. +190 rr_model (object): RoadRunner model object. +191 y_obs (ndarray): Observed values. +192 initial_conditions (list): List of initial species concentrations. +193 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +194 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +195 solver_arguments (dict): Dictionary containing the following keys: +196 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +197 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +198 - time_per_stage (float): Duration of each assay stage. +199 - n_points_per_stage (int): Number of data points per assay stage. +200 - buffer_species_names (list): Names of buffer species. +201 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +202 - solver_relative_tolerance (float): Relative tolerance for the solver. +203 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +204 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +205 - species_labels (list): Labels of species for which initial conditions are provided. +206 - rate_constant_names (list): Names of rate constants in the model. +207 param_lb (list): Lower bounds for parameters. +208 param_ub (list): Upper bounds for parameters. +209 +210 Returns: +211 float: Log posterior value. +212 """ +213 +214 logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +215 logpr = log_prior(params, param_lb, param_ub) +216 return logl+logpr +217 +218 +219def negative_log_likelihood_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +220 """ +221 Wrapper function to calculate the negative log likelihood. +222 Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint. +223 +224 Args: +225 params (list): List of parameters. +226 rr_model (object): RoadRunner model object. +227 y_obs (ndarray): Observed values. +228 initial_conditions (list): List of initial species concentrations. +229 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +230 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +231 solver_arguments (dict): Dictionary containing the following keys: +232 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +233 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +234 - time_per_stage (float): Duration of each assay stage. +235 - n_points_per_stage (int): Number of data points per assay stage. +236 - buffer_species_names (list): Names of buffer species. +237 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +238 - solver_relative_tolerance (float): Relative tolerance for the solver. +239 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +240 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +241 - species_labels (list): Labels of species for which initial conditions are provided. +242 - rate_constant_names (list): Names of rate constants in the model. +243 param_lb (list): Lower bounds for parameters. +244 param_ub (list): Upper bounds for parameters. +245 +246 Returns: +247 float: Negative log likelihood value. +248 """ +249 logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +250 logpr = log_prior(params, param_lb, param_ub) +251 return -1*(logl+logpr) +252 +253 +254def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +255 """ +256 Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. +257 Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint. +258 +259 Args: +260 params (list): List of parameters including concentration uncertainty. +261 rr_model (object): RoadRunner model object. +262 y_obs (ndarray): Observed values. +263 initial_conditions (list): List of initial species concentrations. +264 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +265 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +266 solver_arguments (dict): Dictionary containing the following keys: +267 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +268 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +269 - time_per_stage (float): Duration of each assay stage. +270 - n_points_per_stage (int): Number of data points per assay stage. +271 - buffer_species_names (list): Names of buffer species. +272 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +273 - solver_relative_tolerance (float): Relative tolerance for the solver. +274 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +275 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +276 - species_labels (list): Labels of species for which initial conditions are provided. +277 - rate_constant_names (list): Names of rate constants in the model. +278 param_lb (list): Lower bounds for parameters. +279 param_ub (list): Upper bounds for parameters. +280 +281 Returns: +282 float: Negative log likelihood value. +283 """ +284 logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +285 logpr = log_prior(params, param_lb, param_ub) +286 return -1*(logl+logpr) +
8def get_p0(b_list, n): + 9 """ +10 Generate initial samples uniformly distributed within given boundaries. +11 +12 Args: +13 b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter. +14 n (int): Number of samples to generate. +15 +16 Returns: +17 ndarray: Array of initial samples. +18 """ +19 p0_array = np.transpose(np.array([np.random.uniform(b[0],b[1],n) for b in b_list])) # re-arrange array for sampler +20 return p0_array +
Generate initial samples uniformly distributed within given boundaries.
+ +++ndarray: Array of initial samples.
+
23def calc_normal_log_likelihood(y_obs, y_pred, sigma): +24 """ +25 Calculate the log likelihood of a Normal distribution. +26 +27 Args: +28 y_obs (ndarray): Observed values. +29 y_pred (ndarray): Predicted values. +30 sigma (float): Standard deviation. +31 +32 Returns: +33 float: Log likelihood value. +34 """ +35 y = sp.stats.norm.logpdf(y_obs, y_pred, sigma).sum() +36 return y +
Calculate the log likelihood of a Normal distribution.
+ +++float: Log likelihood value.
+
39def log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): +40 """ +41 Calculate the log likelihood for given parameters and model. +42 +43 Args: +44 params (list): List of parameters. +45 - rate constant parameters are first +46 - sigma is -1 index +47 rr_model (object): RoadRunner model object. +48 y_obs (ndarray): Observed data. +49 initial_conditions (list): List of initial species concentrations. +50 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +51 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +52 solver_arguments (dict): Dictionary containing the following keys: +53 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +54 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +55 - time_per_stage (float): Duration of each assay stage. +56 - n_points_per_stage (int): Number of data points per assay stage. +57 - buffer_species_names (list): Names of buffer species. +58 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +59 - solver_relative_tolerance (float): Relative tolerance for the solver. +60 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +61 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +62 - species_labels (list): Labels of species for which initial conditions are provided. +63 - rate_constant_names (list): Names of rate constants in the model. +64 +65 Returns: +66 float: Log likelihood value. +67 """ +68 k = [10**i for i in params[:-1]] +69 sigma = 10**params[-1] +70 try: +71 res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +72 y_pred = res[1] +73 log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma) +74 return log_like +75 except: +76 return -1e100 # arbitrarily large negative number --> 0 probability +
Calculate the log likelihood for given parameters and model.
+ +++float: Log likelihood value.
+
79def log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + 80 """ + 81 Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias. + 82 + 83 Args: + 84 params (list): List of parameters + 85 - rate constant parameters are first + 86 - buffer concentration scale factors (H_out and S_out) are indices -5 and -4 + 87 - transporter concentration scale factor is -3 index, + 88 - bias factor is -2 index, + 89 - sigma is -1 index + 90 rr_model (object): RoadRunner model object. + 91 y_obs (ndarray): Observed data. + 92 initial_conditions (list): List of initial species concentrations. + 93 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. + 94 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. + 95 solver_arguments (dict): Dictionary containing the following keys: + 96 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. + 97 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. + 98 - time_per_stage (float): Duration of each assay stage. + 99 - n_points_per_stage (int): Number of data points per assay stage. +100 - buffer_species_names (list): Names of buffer species. +101 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +102 - solver_relative_tolerance (float): Relative tolerance for the solver. +103 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +104 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +105 - species_labels (list): Labels of species for which initial conditions are provided. +106 - rate_constant_names (list): Names of rate constants in the model. +107 +108 Returns: +109 float: Log likelihood value. +110 """ +111 k = [10**i for i in params[:-5]] +112 sigma = 10**params[-1] +113 bias = params[-2] +114 +115 # get concentration uncertainity +116 H_out_buffer_scale = params[-5] +117 S_out_buffer_scale = params[-4] +118 initial_transporter_concentration_scale = params[-3] +119 +120 # set concentration uncertainity +121 buffer_concentration_scale[0] = H_out_buffer_scale +122 buffer_concentration_scale[1] = S_out_buffer_scale +123 initial_conditions_scale[0] = initial_transporter_concentration_scale +124 try: +125 res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +126 y_pred = bias*res[1] +127 log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma) +128 return log_like +129 except: +130 return -1e100 # arbitrarily large negative number --> 0 probability +
Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.
+ +++float: Log likelihood value.
+
133def log_prior(params, param_lb, param_ub): +134 """ +135 Calculate the log prior for given parameters. +136 +137 Args: +138 params (list): List of parameters. +139 param_lb (list): Lower bounds for parameters. +140 param_ub (list): Upper bounds for parameters. +141 +142 Returns: +143 float: Log prior value. +144 """ +145 if ((param_lb < params) & (params < param_ub)).all(): +146 return 0 +147 else: +148 return -1e100 # arbitrarily large negative number --> 0 probability +
Calculate the log prior for given parameters.
+ +++float: Log prior value.
+
151def log_post_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +152 """ +153 Wrapper function to calculate the log posterior (log likelihood + log prior). +154 +155 Args: +156 params (list): List of parameters. +157 rr_model (object): RoadRunner model object. +158 y_obs (ndarray): Observed values. +159 initial_conditions (list): List of initial species concentrations. +160 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +161 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +162 solver_arguments (dict): Dictionary containing the following keys: +163 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +164 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +165 - time_per_stage (float): Duration of each assay stage. +166 - n_points_per_stage (int): Number of data points per assay stage. +167 - buffer_species_names (list): Names of buffer species. +168 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +169 - solver_relative_tolerance (float): Relative tolerance for the solver. +170 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +171 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +172 - species_labels (list): Labels of species for which initial conditions are provided. +173 - rate_constant_names (list): Names of rate constants in the model. +174 param_lb (list): Lower bounds for parameters. +175 param_ub (list): Upper bounds for parameters. +176 +177 Returns: +178 float: Log posterior value. +179 """ +180 logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +181 logpr = log_prior(params, param_lb, param_ub) +182 return logl+logpr # arbitrarily large negative number --> 0 probability +
Wrapper function to calculate the log posterior (log likelihood + log prior).
+ +++float: Log posterior value.
+
185def log_post_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +186 """ +187 Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias. +188 +189 Args: +190 params (list): List of parameters. +191 rr_model (object): RoadRunner model object. +192 y_obs (ndarray): Observed values. +193 initial_conditions (list): List of initial species concentrations. +194 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +195 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +196 solver_arguments (dict): Dictionary containing the following keys: +197 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +198 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +199 - time_per_stage (float): Duration of each assay stage. +200 - n_points_per_stage (int): Number of data points per assay stage. +201 - buffer_species_names (list): Names of buffer species. +202 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +203 - solver_relative_tolerance (float): Relative tolerance for the solver. +204 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +205 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +206 - species_labels (list): Labels of species for which initial conditions are provided. +207 - rate_constant_names (list): Names of rate constants in the model. +208 param_lb (list): Lower bounds for parameters. +209 param_ub (list): Upper bounds for parameters. +210 +211 Returns: +212 float: Log posterior value. +213 """ +214 +215 logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +216 logpr = log_prior(params, param_lb, param_ub) +217 return logl+logpr +
Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.
+ +++float: Log posterior value.
+
220def negative_log_likelihood_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +221 """ +222 Wrapper function to calculate the negative log likelihood. +223 Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint. +224 +225 Args: +226 params (list): List of parameters. +227 rr_model (object): RoadRunner model object. +228 y_obs (ndarray): Observed values. +229 initial_conditions (list): List of initial species concentrations. +230 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +231 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +232 solver_arguments (dict): Dictionary containing the following keys: +233 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +234 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +235 - time_per_stage (float): Duration of each assay stage. +236 - n_points_per_stage (int): Number of data points per assay stage. +237 - buffer_species_names (list): Names of buffer species. +238 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +239 - solver_relative_tolerance (float): Relative tolerance for the solver. +240 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +241 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +242 - species_labels (list): Labels of species for which initial conditions are provided. +243 - rate_constant_names (list): Names of rate constants in the model. +244 param_lb (list): Lower bounds for parameters. +245 param_ub (list): Upper bounds for parameters. +246 +247 Returns: +248 float: Negative log likelihood value. +249 """ +250 logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +251 logpr = log_prior(params, param_lb, param_ub) +252 return -1*(logl+logpr) +
Wrapper function to calculate the negative log likelihood. +Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+ +++float: Negative log likelihood value.
+
255def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): +256 """ +257 Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. +258 Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint. +259 +260 Args: +261 params (list): List of parameters including concentration uncertainty. +262 rr_model (object): RoadRunner model object. +263 y_obs (ndarray): Observed values. +264 initial_conditions (list): List of initial species concentrations. +265 initial_conditions_scale (list): List of scaling factors for the initial species concentrations. +266 buffer_concentration_scale (list): List of scaling factors for the buffer concentrations. +267 solver_arguments (dict): Dictionary containing the following keys: +268 - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output. +269 - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage. +270 - time_per_stage (float): Duration of each assay stage. +271 - n_points_per_stage (int): Number of data points per assay stage. +272 - buffer_species_names (list): Names of buffer species. +273 - solver_absolute_tolerance (float): Absolute tolerance for the solver. +274 - solver_relative_tolerance (float): Relative tolerance for the solver. +275 - remove_first_n (int): Number of initial data points to exclude from the results for each stage. +276 - remove_after_n (int): Index after which to stop including data points in the results for each stage. +277 - species_labels (list): Labels of species for which initial conditions are provided. +278 - rate_constant_names (list): Names of rate constants in the model. +279 param_lb (list): Lower bounds for parameters. +280 param_ub (list): Upper bounds for parameters. +281 +282 Returns: +283 float: Negative log likelihood value. +284 """ +285 logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments) +286 logpr = log_prior(params, param_lb, param_ub) +287 return -1*(logl+logpr) +
Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. +Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+ +++float: Negative log likelihood value.
+