diff --git a/docs/__pycache__.html b/docs/__pycache__.html new file mode 100644 index 0000000..4b349a0 --- /dev/null +++ b/docs/__pycache__.html @@ -0,0 +1,235 @@ + + + + + + + __pycache__ API documentation + + + + + + + + + +
+
+

+__pycache__

+ + + + + +
+
+ + \ No newline at end of file diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..5c88052 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,220 @@ + + + + + + + Module List – pdoc 14.0.0 + + + + + + + + + + +
+ + pdoc + + +
+
+ + \ No newline at end of file diff --git a/docs/run_emcee.html b/docs/run_emcee.html new file mode 100644 index 0000000..b09fe31 --- /dev/null +++ b/docs/run_emcee.html @@ -0,0 +1,743 @@ + + + + + + + run_emcee API documentation + + + + + + + + + +
+
+

+run_emcee

+ + + + + + +
  1import matplotlib.pyplot as plt 
+  2import numpy as np 
+  3import scipy as sp
+  4import emcee 
+  5import yaml
+  6import ssme_function as ssme
+  7import tellurium as te
+  8import os 
+  9import shutil
+ 10import datetime
+ 11import logging
+ 12import corner
+ 13import utility_functions as uf
+ 14
+ 15
+ 16def run_sampler(config_fname):
+ 17    """
+ 18    Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.
+ 19    
+ 20    This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.
+ 21    The sampler is used to infer model parameters from observed data. The function also provides
+ 22    preliminary data visualization of the MCMC results, including trace plots, corner plots, and 
+ 23    flux prediction plots.
+ 24    
+ 25    Args:
+ 26        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 27                            necessary settings and parameters for the run, including model file, 
+ 28                            data file, solver arguments, Bayesian inference settings, optimization 
+ 29                            settings, and random seed.
+ 30                            
+ 31    Outputs:
+ 32        - Creates a new directory for the run, containing:
+ 33            - Copied model, data, and configuration files.
+ 34            - Log file with run details.
+ 35            - MCMC trace plot.
+ 36            - MCMC corner plot.
+ 37            - Flux prediction plot.
+ 38            - Flat samples and log likelihoods as CSV files.
+ 39            
+ 40    Notes:
+ 41        - The configuration file should be structured appropriately with all necessary fields.
+ 42        - The function uses the `emcee` library for MCMC sampling.
+ 43        - The function uses the `corner` library for corner plots.
+ 44        - The function assumes the use of the RoadRunner model for simulations.
+ 45        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 46    """
+ 47
+ 48
+ 49    ##### Setup #####
+ 50    # Load the config.yaml file
+ 51    with open(config_fname, "r") as f:
+ 52        config = yaml.safe_load(f)
+ 53
+ 54    # get run configuration settings
+ 55    run_name = config["run_name"]
+ 56    model_file = config['model_file']
+ 57    data_file = config['data_file']
+ 58    simulation_kwargs = config['solver_arguments']
+ 59    inference_settings = config['bayesian_inference']
+ 60    optimization_settings = config['optimization']
+ 61    seed = config['random_seed']
+ 62    np.random.seed(seed)
+ 63    extended = config['bayesian_inference']['extended']
+ 64
+ 65    if extended == False:
+ 66        # get parameter names, values, and boundaries
+ 67        k_names = config['solver_arguments']['rate_constant_names']
+ 68        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 69        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 70        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 71        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 72        p_bounds = list(zip(p_lb,p_ub))
+ 73        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 74        k_nom = p_nom[:-1]  # last parameter is noise sigma
+ 75        sigma_nom = p_nom[-1]
+ 76
+ 77        # get assay initial conditions
+ 78        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 79        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 80        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 81    else:
+ 82        # get parameter names, values, and boundaries
+ 83        k_names = config['solver_arguments']['rate_constant_names']
+ 84        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 85        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 86        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 87        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 88        p_bounds = list(zip(p_lb,p_ub))
+ 89        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 90        k_nom = p_nom[:-6]  # extended model, last 5 parameters are nuisance parameters
+ 91        sigma_nom = p_nom[-1]
+ 92
+ 93        # get assay initial conditions
+ 94        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 95        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 96        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 97
+ 98    # create new directory for runs
+ 99    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+100    output_fname = f'run_{run_name}_r{seed}'
+101    output_dir = output_fname + '_' + timestamp
+102    os.mkdir(output_dir)
+103    shutil.copy(model_file, output_dir)
+104    shutil.copy(data_file, output_dir)
+105    shutil.copy(config_fname, output_dir)
+106
+107    # create logging file 
+108    logger = logging.getLogger('log')
+109    logger.setLevel(logging.INFO)
+110    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+111    file_handler.setLevel(logging.INFO)
+112    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+113    file_handler.setFormatter(log_format)
+114    logger.addHandler(file_handler)
+115    logger.info(f"output_dir: {output_dir}")
+116    logger.info(f"seed: {seed}")
+117
+118    # load roadrunner model and simulate assay at nominal values
+119    rr_model = te.loadSBMLModel(model_file)
+120    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+121    y_nom = data_nom[1]  # (time, output)
+122
+123    # Load data, plot trace, and calculate log-likelihood reference
+124    y_obs = np.loadtxt(data_file, delimiter=',')
+125    plt.figure(figsize=(8,6))
+126    plt.title('Net ion influx (M/s) vs simulation step')
+127    plt.plot(y_nom, label='y_nom')
+128    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+129    plt.legend()
+130    plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png'))
+131    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+132    logger.info(f"log_like_nom: {log_like_nom}")
+133
+134    # set sampler parameters 
+135    n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers'])
+136    n_steps = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_steps'])
+137    n_dim = len(p_names)
+138    thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin'])
+139    burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in'])
+140    logger.info(f"n_walkers: {n_walkers}")
+141    logger.info(f"n_steps: {n_steps}")
+142    logger.info(f"n_dim: {n_dim}")
+143    logger.info(f"thin: {thin}")
+144    logger.info(f"burn_in: {burn_in}")
+145  
+146    # get sampler starting points
+147    p_0 = uf.get_p0(p_bounds, n_walkers) 
+148
+149    ##### Run Sampler #####
+150    if extended:
+151        logger.info(f"extended")
+152        sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper_extended, args=[rr_model, 
+153                                                                            y_obs, 
+154                                                                            initial_conditions, 
+155                                                                            initial_conditions_scale, 
+156                                                                            buffer_concentration_scale, 
+157                                                                            simulation_kwargs, 
+158                                                                            p_lb, 
+159                                                                            p_ub]
+160    )
+161    else:
+162        sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper, args=[rr_model, 
+163                                                                                y_obs, 
+164                                                                                initial_conditions, 
+165                                                                                initial_conditions_scale, 
+166                                                                                buffer_concentration_scale, 
+167                                                                                simulation_kwargs, 
+168                                                                                p_lb, 
+169                                                                                p_ub]
+170        )
+171    logger.info(f"starting MCMC")
+172    sampler.run_mcmc(p_0, n_steps, progress=True)
+173    logger.info(f"finished MCMC")
+174
+175    ##### Store data #####
+176    samples = sampler.get_chain(discard =burn_in, thin=thin)
+177    flat_samples = sampler.get_chain(discard=burn_in, thin=thin, flat=True)
+178    log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin)
+179    flat_log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin, flat=True)
+180    np.savetxt(os.path.join(output_dir, f'flat_samples.csv'), flat_samples, delimiter=',')
+181    np.savetxt(os.path.join(output_dir, f'flat_log_likelihoods.csv'), flat_log_likelihoods, delimiter=',')
+182    logger.info(f"saved samples and log likelihoods")
+183
+184    ##### Preliminary data viz #####
+185    # MCMC trace plot
+186    fig, axes = plt.subplots(n_dim, 1, figsize=(10, 12+(0.25*n_dim)), sharex=True)
+187    labels = p_names
+188    plt.suptitle('MCMC sampling trace')
+189    for i in range(n_dim):
+190        ax = axes[i]
+191        ax.plot(samples[:, :, i], "k", alpha=0.3)
+192        ax.set_ylabel(labels[i])
+193        ax.yaxis.set_label_coords(-0.1, 0.5)
+194        ax.axhline(log10_p_nom[i], color='red')
+195    axes[-1].set_xlabel("step number")
+196    plt.savefig(os.path.join(output_dir, f'mcmc_trace.png'))
+197
+198    # corner plot 
+199    fig = corner.corner(
+200        flat_samples, labels=p_names, truths=log10_p_nom, range=p_bounds,
+201    )
+202    plt.savefig(os.path.join(output_dir, f'mcmc_corner.png'))
+203
+204    # flux prediction plot
+205    plt.figure(figsize=(10,8))
+206    inds = np.random.randint(len(flat_samples), size=100)
+207    for ind in inds:
+208        p_sample = flat_samples[ind]
+209        k_tmp = [10**i for i in p_sample[:-1]]
+210        res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+211        y_tmp = res_tmp[1]
+212        plt.plot(y_tmp, color='blue', alpha=0.1)
+213    plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred')
+214    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange')
+215    plt.legend(fontsize=14)
+216    plt.xlabel("time step")
+217    plt.ylabel("net ion influx (M/s)");
+218    plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png'))
+219    logger.info(f"plotted analysis")
+220
+221    return None 
+222
+223
+224
+225if __name__ == '__main__':
+226
+227    ##### Adjust this if needed ##### 
+228    config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml"
+229    run_sampler(config_fname)
+230    
+
+ + +
+
+ +
+ + def + run_sampler(config_fname): + + + +
+ +
 17def run_sampler(config_fname):
+ 18    """
+ 19    Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.
+ 20    
+ 21    This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.
+ 22    The sampler is used to infer model parameters from observed data. The function also provides
+ 23    preliminary data visualization of the MCMC results, including trace plots, corner plots, and 
+ 24    flux prediction plots.
+ 25    
+ 26    Args:
+ 27        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 28                            necessary settings and parameters for the run, including model file, 
+ 29                            data file, solver arguments, Bayesian inference settings, optimization 
+ 30                            settings, and random seed.
+ 31                            
+ 32    Outputs:
+ 33        - Creates a new directory for the run, containing:
+ 34            - Copied model, data, and configuration files.
+ 35            - Log file with run details.
+ 36            - MCMC trace plot.
+ 37            - MCMC corner plot.
+ 38            - Flux prediction plot.
+ 39            - Flat samples and log likelihoods as CSV files.
+ 40            
+ 41    Notes:
+ 42        - The configuration file should be structured appropriately with all necessary fields.
+ 43        - The function uses the `emcee` library for MCMC sampling.
+ 44        - The function uses the `corner` library for corner plots.
+ 45        - The function assumes the use of the RoadRunner model for simulations.
+ 46        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 47    """
+ 48
+ 49
+ 50    ##### Setup #####
+ 51    # Load the config.yaml file
+ 52    with open(config_fname, "r") as f:
+ 53        config = yaml.safe_load(f)
+ 54
+ 55    # get run configuration settings
+ 56    run_name = config["run_name"]
+ 57    model_file = config['model_file']
+ 58    data_file = config['data_file']
+ 59    simulation_kwargs = config['solver_arguments']
+ 60    inference_settings = config['bayesian_inference']
+ 61    optimization_settings = config['optimization']
+ 62    seed = config['random_seed']
+ 63    np.random.seed(seed)
+ 64    extended = config['bayesian_inference']['extended']
+ 65
+ 66    if extended == False:
+ 67        # get parameter names, values, and boundaries
+ 68        k_names = config['solver_arguments']['rate_constant_names']
+ 69        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 70        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 71        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 72        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 73        p_bounds = list(zip(p_lb,p_ub))
+ 74        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 75        k_nom = p_nom[:-1]  # last parameter is noise sigma
+ 76        sigma_nom = p_nom[-1]
+ 77
+ 78        # get assay initial conditions
+ 79        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 80        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 81        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 82    else:
+ 83        # get parameter names, values, and boundaries
+ 84        k_names = config['solver_arguments']['rate_constant_names']
+ 85        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 86        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 87        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 88        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 89        p_bounds = list(zip(p_lb,p_ub))
+ 90        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 91        k_nom = p_nom[:-6]  # extended model, last 5 parameters are nuisance parameters
+ 92        sigma_nom = p_nom[-1]
+ 93
+ 94        # get assay initial conditions
+ 95        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 96        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 97        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 98
+ 99    # create new directory for runs
+100    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+101    output_fname = f'run_{run_name}_r{seed}'
+102    output_dir = output_fname + '_' + timestamp
+103    os.mkdir(output_dir)
+104    shutil.copy(model_file, output_dir)
+105    shutil.copy(data_file, output_dir)
+106    shutil.copy(config_fname, output_dir)
+107
+108    # create logging file 
+109    logger = logging.getLogger('log')
+110    logger.setLevel(logging.INFO)
+111    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+112    file_handler.setLevel(logging.INFO)
+113    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+114    file_handler.setFormatter(log_format)
+115    logger.addHandler(file_handler)
+116    logger.info(f"output_dir: {output_dir}")
+117    logger.info(f"seed: {seed}")
+118
+119    # load roadrunner model and simulate assay at nominal values
+120    rr_model = te.loadSBMLModel(model_file)
+121    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+122    y_nom = data_nom[1]  # (time, output)
+123
+124    # Load data, plot trace, and calculate log-likelihood reference
+125    y_obs = np.loadtxt(data_file, delimiter=',')
+126    plt.figure(figsize=(8,6))
+127    plt.title('Net ion influx (M/s) vs simulation step')
+128    plt.plot(y_nom, label='y_nom')
+129    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+130    plt.legend()
+131    plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png'))
+132    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+133    logger.info(f"log_like_nom: {log_like_nom}")
+134
+135    # set sampler parameters 
+136    n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers'])
+137    n_steps = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_steps'])
+138    n_dim = len(p_names)
+139    thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin'])
+140    burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in'])
+141    logger.info(f"n_walkers: {n_walkers}")
+142    logger.info(f"n_steps: {n_steps}")
+143    logger.info(f"n_dim: {n_dim}")
+144    logger.info(f"thin: {thin}")
+145    logger.info(f"burn_in: {burn_in}")
+146  
+147    # get sampler starting points
+148    p_0 = uf.get_p0(p_bounds, n_walkers) 
+149
+150    ##### Run Sampler #####
+151    if extended:
+152        logger.info(f"extended")
+153        sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper_extended, args=[rr_model, 
+154                                                                            y_obs, 
+155                                                                            initial_conditions, 
+156                                                                            initial_conditions_scale, 
+157                                                                            buffer_concentration_scale, 
+158                                                                            simulation_kwargs, 
+159                                                                            p_lb, 
+160                                                                            p_ub]
+161    )
+162    else:
+163        sampler = emcee.EnsembleSampler(n_walkers, n_dim, uf.log_post_wrapper, args=[rr_model, 
+164                                                                                y_obs, 
+165                                                                                initial_conditions, 
+166                                                                                initial_conditions_scale, 
+167                                                                                buffer_concentration_scale, 
+168                                                                                simulation_kwargs, 
+169                                                                                p_lb, 
+170                                                                                p_ub]
+171        )
+172    logger.info(f"starting MCMC")
+173    sampler.run_mcmc(p_0, n_steps, progress=True)
+174    logger.info(f"finished MCMC")
+175
+176    ##### Store data #####
+177    samples = sampler.get_chain(discard =burn_in, thin=thin)
+178    flat_samples = sampler.get_chain(discard=burn_in, thin=thin, flat=True)
+179    log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin)
+180    flat_log_likelihoods = sampler.get_log_prob(discard = burn_in, thin=thin, flat=True)
+181    np.savetxt(os.path.join(output_dir, f'flat_samples.csv'), flat_samples, delimiter=',')
+182    np.savetxt(os.path.join(output_dir, f'flat_log_likelihoods.csv'), flat_log_likelihoods, delimiter=',')
+183    logger.info(f"saved samples and log likelihoods")
+184
+185    ##### Preliminary data viz #####
+186    # MCMC trace plot
+187    fig, axes = plt.subplots(n_dim, 1, figsize=(10, 12+(0.25*n_dim)), sharex=True)
+188    labels = p_names
+189    plt.suptitle('MCMC sampling trace')
+190    for i in range(n_dim):
+191        ax = axes[i]
+192        ax.plot(samples[:, :, i], "k", alpha=0.3)
+193        ax.set_ylabel(labels[i])
+194        ax.yaxis.set_label_coords(-0.1, 0.5)
+195        ax.axhline(log10_p_nom[i], color='red')
+196    axes[-1].set_xlabel("step number")
+197    plt.savefig(os.path.join(output_dir, f'mcmc_trace.png'))
+198
+199    # corner plot 
+200    fig = corner.corner(
+201        flat_samples, labels=p_names, truths=log10_p_nom, range=p_bounds,
+202    )
+203    plt.savefig(os.path.join(output_dir, f'mcmc_corner.png'))
+204
+205    # flux prediction plot
+206    plt.figure(figsize=(10,8))
+207    inds = np.random.randint(len(flat_samples), size=100)
+208    for ind in inds:
+209        p_sample = flat_samples[ind]
+210        k_tmp = [10**i for i in p_sample[:-1]]
+211        res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+212        y_tmp = res_tmp[1]
+213        plt.plot(y_tmp, color='blue', alpha=0.1)
+214    plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred')
+215    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange')
+216    plt.legend(fontsize=14)
+217    plt.xlabel("time step")
+218    plt.ylabel("net ion influx (M/s)");
+219    plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png'))
+220    logger.info(f"plotted analysis")
+221
+222    return None 
+
+ + +

Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.

+ +

This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. +The sampler is used to infer model parameters from observed data. The function also provides +preliminary data visualization of the MCMC results, including trace plots, corner plots, and +flux prediction plots.

+ +
Arguments:
+ +
    +
  • config_fname (str): Path to the configuration YAML file. This file should contain all +necessary settings and parameters for the run, including model file, +data file, solver arguments, Bayesian inference settings, optimization +settings, and random seed.
  • +
+ +
Outputs:
+ +
+
    +
  • Creates a new directory for the run, containing: +
      +
    • Copied model, data, and configuration files.
    • +
    • Log file with run details.
    • +
    • MCMC trace plot.
    • +
    • MCMC corner plot.
    • +
    • Flux prediction plot.
    • +
    • Flat samples and log likelihoods as CSV files.
    • +
  • +
+
+ +
Notes:
+ +
+
    +
  • The configuration file should be structured appropriately with all necessary fields.
  • +
  • The function uses the emcee library for MCMC sampling.
  • +
  • The function uses the corner library for corner plots.
  • +
  • The function assumes the use of the RoadRunner model for simulations.
  • +
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • +
+
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/run_optimizer.html b/docs/run_optimizer.html new file mode 100644 index 0000000..79888dd --- /dev/null +++ b/docs/run_optimizer.html @@ -0,0 +1,1906 @@ + + + + + + + run_optimizer API documentation + + + + + + + + + +
+
+

+run_optimizer

+ + + + + + +
  1import matplotlib.pyplot as plt 
+  2import numpy as np 
+  3import scipy as sp
+  4import yaml
+  5import ssme_function as ssme
+  6import tellurium as te
+  7import os 
+  8import shutil
+  9import datetime
+ 10import logging
+ 11import time as time
+ 12from tqdm import tqdm
+ 13from scipy.optimize import basinhopping, dual_annealing, shgo, minimize
+ 14import inspect
+ 15import utility_functions as uf
+ 16
+ 17
+ 18
+ 19def run_optimizer(config_fname):
+ 20    """
+ 21    Run maximum likelihood estimation based on a configuration file.
+ 22    
+ 23    This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file.
+ 24    Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP"
+ 25    methods are supported.
+ 26    
+ 27    Args:
+ 28        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 29                            necessary settings and parameters for the run, including model file, 
+ 30                            data file, solver arguments, Bayesian inference settings, optimization 
+ 31                            settings, and random seed.
+ 32                            
+ 33    Outputs:
+ 34        - Creates a new directory for the run, containing:
+ 35            - Copied model, data, and configuration files.
+ 36            - Log file with run details.
+ 37            - Flux trace plot.
+ 38            - Optimization plot.
+ 39            - Tuning results.
+ 40            - Optimization results.
+ 41            
+ 42    Notes:
+ 43        - The configuration file should be structured appropriately with all necessary fields (see example folder).
+ 44        - The function uses the `scipy` library for maximum likelihood estimation.
+ 45        - The function assumes the use of the RoadRunner model for simulations.
+ 46        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 47    """
+ 48
+ 49    ##### Setup #####
+ 50    # Load the config.yaml file
+ 51    with open(config_fname, "r") as f:
+ 52        config = yaml.safe_load(f)
+ 53
+ 54    # get run configuration settings
+ 55    run_name = config["run_name"]
+ 56    model_file = config['model_file']
+ 57    data_file = config['data_file']
+ 58    simulation_kwargs = config['solver_arguments']
+ 59    inference_settings = config['bayesian_inference']
+ 60    optimization_settings = config['optimization']
+ 61    mle_method = optimization_settings['method']
+ 62    seed = config['random_seed']
+ 63    np.random.seed(seed)
+ 64
+ 65    # get parameter names, values, and boundaries
+ 66    k_names = config['solver_arguments']['rate_constant_names']
+ 67    log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 68    p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 69    p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 70    p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 71    p_bounds = list(zip(p_lb,p_ub))
+ 72    p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 73    k_nom = p_nom[:-1]
+ 74    sigma_nom = p_nom[-1]
+ 75
+ 76    # get assay initial conditions
+ 77    initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 78    initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 79    buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 80
+ 81    # create new directory for runs
+ 82    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+ 83    output_fname = f'run_{run_name}_r{seed}'
+ 84    output_dir = output_fname + '_' + timestamp
+ 85    os.mkdir(output_dir)
+ 86    shutil.copy(model_file, output_dir)
+ 87    shutil.copy(data_file, output_dir)
+ 88    shutil.copy(config_fname, output_dir)
+ 89
+ 90    # create logging file 
+ 91    logger = logging.getLogger('log')
+ 92    logger.setLevel(logging.INFO)
+ 93    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+ 94    file_handler.setLevel(logging.INFO)
+ 95    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ 96    file_handler.setFormatter(log_format)
+ 97    logger.addHandler(file_handler)
+ 98    logger.info(f"output_dir: {output_dir}")
+ 99    logger.info(f"seed: {seed}")
+100
+101    # load roadrunner model and simulate assay at nominal values
+102    rr_model = te.loadSBMLModel(model_file)
+103    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+104    y_nom = data_nom[1]  # (time, output)
+105
+106    # Load data, plot trace, and calculate log-likelihood reference
+107    y_obs = np.loadtxt(data_file, delimiter=',')
+108    plt.figure(figsize=(8,6))
+109    plt.title('Net ion influx (M/s) vs simulation step')
+110    plt.plot(y_nom, label='y_nom')
+111    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+112    plt.legend()
+113    plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png'))
+114    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+115    logger.info(f"log_like_nom: {log_like_nom}")
+116
+117    ##### Run MLE optimization #####
+118    tuning = config['optimization']['tuning']
+119    extended = config['bayesian_inference']['extended']
+120    n_dim = len(p_names)
+121    logger.info(f"n_dim: {n_dim}")
+122    if extended:
+123        logger.info(f"using extended nll")
+124        optimization_func = uf.negative_log_likelihood_wrapper_extended
+125    else:
+126        optimization_func = uf.negative_log_likelihood_wrapper
+127
+128    optimization_func_args = [rr_model, 
+129                                y_obs, 
+130                                initial_conditions, 
+131                                initial_conditions_scale, 
+132                                buffer_concentration_scale, 
+133                                simulation_kwargs, 
+134                                p_lb, 
+135                                p_ub
+136                            ]
+137    x = []
+138    y = []
+139    s = []
+140    nf = []
+141    t = []
+142    n_trials = optimization_settings['n_replicas']
+143    n_tuning_steps = config['optimization']['n_tuning_steps']
+144
+145    ### Fix this! too repetitive 
+146    # run hyperparameter tuning
+147    if tuning: 
+148        logger.info(f"Starting optimization hyperparameter tuning")
+149
+150        initial_guess = uf.get_p0(p_bounds, 1)[0]  # use the same start point for each algorithm
+151        logger.info(f"Initial guess: {initial_guess}")
+152
+153        best_fun_value = np.inf
+154        best_hyperparams = None
+155        best_result = None
+156        tuning_results = []
+157
+158        for i in tqdm(range(n_tuning_steps)):
+159            logger.info(f"Starting optimization hyperparameter tuning run {i}")
+160            t0 = time.time()
+161
+162
+163            if mle_method == "differential_evolution":
+164                # Define possible hyperparameter ranges
+165                strategy_choices = ['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'currenttobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'currenttobest1bin', 'best2bin', 'rand2bin', 'rand1bin']
+166                popsize_range = (25, 50)
+167                mutation_range = (0, 1.9)
+168                recombination_range = (0, 1)
+169                tol_range = (1e-5, 1e-3)
+170                maxiter_range = (100, 1000)  # Define a range for maxiter
+171
+172                if i == 0:
+173                    # Use default hyperparameters for the first iteration
+174                    signature = inspect.signature(sp.optimize.differential_evolution)
+175                    parameters = signature.parameters
+176                    strategy = parameters["strategy"].default
+177                    popsize = parameters["popsize"].default
+178                    mutation = parameters["mutation"].default
+179                    recombination = parameters["recombination"].default
+180                    tol = parameters["tol"].default
+181                    maxiter = parameters["maxiter"].default  # Get default maxiter
+182                else:
+183                    # Sample random hyperparameters
+184                    strategy = np.random.choice(strategy_choices)
+185                    popsize = np.random.randint(*popsize_range)
+186                    mutation = np.random.uniform(*mutation_range)
+187                    recombination = np.random.uniform(*recombination_range)
+188                    tol = np.random.uniform(*tol_range)
+189                    maxiter = np.random.randint(*maxiter_range)  # Sample maxiter
+190
+191                # Run optimization with hyperparameters
+192                tune_res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, 
+193                                                                                initial_conditions_scale, buffer_concentration_scale, 
+194                                                                                simulation_kwargs, p_lb, p_ub), 
+195                                                                                strategy=strategy, popsize=popsize, mutation=mutation, 
+196                                                                                recombination=recombination, tol=tol, maxiter=maxiter)
+197                
+198
+199                if isinstance(mutation, tuple):
+200                    mutation_to_store = list(mutation)
+201                else:
+202                    mutation_to_store = mutation
+203                # Store results
+204                current_hyperparams = {
+205                    "strategy": str(strategy),
+206                    "popsize": popsize,
+207                    "mutation": mutation_to_store,
+208                    "recombination": recombination,
+209                    "tol": tol,
+210                    "maxiter": maxiter
+211                }
+212
+213            elif mle_method == "basinhopping":
+214                # Define possible hyperparameter ranges for basinhopping
+215                niter_range = (50, 500)  # Number of basin hopping iterations
+216                T_range = (0.5, 5.0)  # Temperature for acceptance criterion
+217                stepsize_range = (0.1, 2.0)  # Step size for random displacement
+218                interval_range = (10, 100)  # Interval for updating the step size
+219                niter_success_range = (5, 50)  # Number of iterations for success check
+220
+221                if i == 0:
+222                    # Use default hyperparameters for the first iteration
+223                    signature = inspect.signature(sp.optimize.basinhopping)
+224                    parameters = signature.parameters
+225                    niter = parameters["niter"].default
+226                    T = parameters["T"].default
+227                    stepsize = parameters["stepsize"].default
+228                    interval = parameters["interval"].default
+229                    niter_success = parameters["niter_success"].default
+230                else:
+231                    # Sample random hyperparameters for basinhopping
+232                    niter = np.random.randint(*niter_range)
+233                    T = np.random.uniform(*T_range)
+234                    stepsize = np.random.uniform(*stepsize_range)
+235                    interval = np.random.randint(*interval_range)
+236                    niter_success = np.random.randint(*niter_success_range)
+237
+238                # Run optimization with hyperparameters
+239                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+240                tune_res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, niter=niter, T=T, stepsize=stepsize, interval=interval, niter_success=niter_success)
+241
+242                # Store results
+243                current_hyperparams = {
+244                    "niter": niter,
+245                    "T": T,
+246                    "stepsize": stepsize,
+247                    "interval": interval,
+248                    "niter_success": niter_success
+249                }
+250
+251            elif mle_method == "dual_annealing":
+252                # Define possible hyperparameter ranges
+253                initial_temp_range = (0.01, 50000)
+254                restart_temp_ratio_range = (0, 1)
+255                visit_range = (1, 3)
+256                accept_range = (-1e4, -5)
+257                maxiter_range = (100, 1000)
+258                maxfun_range = (1000, 1e7)
+259                no_local_search_choices = [True, False]
+260
+261                if i == 0:
+262                    # Use default hyperparameters for the first iteration
+263                    signature = inspect.signature(sp.optimize.dual_annealing)
+264                    parameters = signature.parameters
+265                    initial_temp = parameters["initial_temp"].default
+266                    restart_temp_ratio = parameters["restart_temp_ratio"].default
+267                    visit = parameters["visit"].default
+268                    accept = parameters["accept"].default
+269                    maxiter = parameters["maxiter"].default
+270                    maxfun = parameters["maxfun"].default
+271                    no_local_search = parameters["no_local_search"].default
+272                else:
+273                    # Sample random hyperparameters
+274                    initial_temp = np.random.uniform(*initial_temp_range)
+275                    restart_temp_ratio = np.random.uniform(*restart_temp_ratio_range)
+276                    visit = np.random.uniform(*visit_range)
+277                    accept = np.random.uniform(*accept_range)
+278                    maxiter = np.random.randint(*maxiter_range)
+279                    maxfun = np.random.randint(*maxfun_range)
+280                    no_local_search = np.random.choice(no_local_search_choices)
+281
+282                # Run optimization with hyperparameters
+283                tune_res = sp.optimize.dual_annealing(optimization_func, bounds=p_bounds, 
+284                                                    args=(rr_model, y_obs, initial_conditions, 
+285                                                            initial_conditions_scale, buffer_concentration_scale, 
+286                                                            simulation_kwargs, p_lb, p_ub), 
+287                                                    initial_temp=initial_temp, restart_temp_ratio=restart_temp_ratio, 
+288                                                    visit=visit, accept=accept, maxiter=maxiter, maxfun=maxfun, 
+289                                                    no_local_search=no_local_search)
+290                # Store results
+291                current_hyperparams = {
+292                    "initial_temp": initial_temp,
+293                    "restart_temp_ratio": restart_temp_ratio,
+294                    "visit": visit,
+295                    "accept": accept,
+296                    "maxiter": maxiter,
+297                    "maxfun": maxfun,
+298                    "no_local_search": bool(no_local_search)
+299                }
+300
+301            elif mle_method == "shgo":
+302                # Define possible hyperparameter ranges
+303                n_range = (25, 500)  
+304                iters_range = (1, 10)  
+305                sampling_methods = ["simplicial", "halton", "sobol"]
+306                maxiter_range = (100, 1000)
+307                maxev_range = (100, 10000)  
+308                maxtime_range = (0.1, 10)  
+309                minimize_every_iter_choices = [True, False]
+310                local_iter_range = (10, 100)  
+311                infty_constraints_choices = [True, False]
+312                            
+313                if i == 0:
+314                    # Use default hyperparameters for the first iteration
+315                    signature = inspect.signature(sp.optimize.shgo)
+316                    parameters = signature.parameters
+317                    n = parameters["n"].default
+318                    iters = parameters["iters"].default
+319                    sampling_method = parameters["sampling_method"].default
+320                    
+321                    options_defaults = parameters["options"].default
+322                    if options_defaults is None:
+323                        options_defaults = {}
+324                    
+325                    maxiter = options_defaults.get("maxiter", maxiter_range[0])
+326                    maxev = options_defaults.get("maxev", maxev_range[0])
+327                    maxtime = options_defaults.get("maxtime", maxtime_range[0])
+328                    minimize_every_iter = options_defaults.get("minimize_every_iter", True)
+329                    local_iter = options_defaults.get("local_iter", local_iter_range[0])
+330                    infty_constraints = options_defaults.get("infty_constraints", True)
+331                else:
+332                    # Sample random hyperparameters
+333                    n = np.random.randint(*n_range)
+334                    iters = np.random.randint(*iters_range)
+335                    sampling_method = np.random.choice(sampling_methods)
+336                    maxiter = np.random.randint(*maxiter_range)
+337                    maxev = np.random.randint(*maxev_range)
+338                    maxtime = np.random.uniform(*maxtime_range)
+339                    minimize_every_iter = np.random.choice(minimize_every_iter_choices)
+340                    local_iter = np.random.randint(*local_iter_range)
+341                    infty_constraints = np.random.choice(infty_constraints_choices)
+342
+343                # Run optimization with hyperparameters
+344                tune_res = sp.optimize.shgo(optimization_func, bounds=p_bounds, 
+345                                            args=(rr_model, y_obs, initial_conditions, 
+346                                                initial_conditions_scale, buffer_concentration_scale, 
+347                                                simulation_kwargs, p_lb, p_ub), 
+348                                            n=n, iters=iters, sampling_method=sampling_method,
+349                                            options={"maxiter": maxiter, "maxev": maxev, "maxtime": maxtime,
+350                                                    "minimize_every_iter": minimize_every_iter, "local_iter": local_iter,
+351                                                    "infty_constraints": infty_constraints})
+352                            
+353                # Store results
+354                current_hyperparams = {
+355                    "n": n,
+356                    "iters": iters,
+357                    "sampling_method": sampling_method,
+358                    "maxiter": maxiter,
+359                    "maxev": maxev,
+360                    "maxtime": maxtime,
+361                    "minimize_every_iter": bool(minimize_every_iter),
+362                    "local_iter": local_iter,
+363                    "infty_constraints": bool(infty_constraints)
+364                }
+365
+366            elif mle_method == "Nelder-Mead":
+367                # Define possible hyperparameter ranges
+368                maxiter_range = (1000, 10000)
+369                maxfev_range = (1000, 10000)
+370                xatol_range = (1e-8, 1e-4)
+371                fatol_range = (1e-8, 1e-4)
+372                adaptive_choices = [True, False]
+373
+374                if i == 0:  # Use default hyperparameters for the first iteration
+375                    # Hard-coded default values based on scipy documentation / common values
+376                    maxiter = len(p_bounds) * 200  # N * 200, where N is the number of variables
+377                    maxfev = len(p_bounds) * 200  # Same as maxiter by default
+378                    xatol = 0.0001  
+379                    fatol = 0.0001  
+380                    adaptive = True
+381                else:
+382                    # Sample random hyperparameters
+383                    maxiter = np.random.randint(*maxiter_range)
+384                    maxfev = np.random.randint(*maxfev_range)
+385                    xatol = np.random.uniform(*xatol_range)
+386                    fatol = np.random.uniform(*fatol_range)
+387                    adaptive = np.random.choice(adaptive_choices)
+388
+389                # Run optimization with hyperparameters
+390                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+391                                                                              initial_conditions_scale, buffer_concentration_scale, 
+392                                                                              simulation_kwargs, p_lb, p_ub),
+393                                                method='Nelder-Mead', 
+394                                                options={"maxiter": maxiter, "maxfev": maxfev, "xatol": xatol, "fatol": fatol, "adaptive": adaptive})
+395
+396
+397                # Store results
+398                current_hyperparams = {
+399                    "maxiter": maxiter,
+400                    "maxfev": maxfev,
+401                    "xatol": xatol,
+402                    "fatol": fatol,
+403                    "adaptive": bool(adaptive)
+404                }
+405
+406            elif mle_method == "Powell":
+407                # Define possible hyperparameter ranges
+408                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+409                maxfev_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+410                xtol_range = (1e-8, 1e-4)
+411                ftol_range = (1e-8, 1e-4)
+412
+413                if i == 0:  # Use default hyperparameters for the first iteration
+414                    # Hard-coded default values based on scipy documentation / common values
+415                    maxiter = len(p_bounds) * 1000  # N * 1000, where N is the number of variables
+416                    maxfev = len(p_bounds) * 1000  # Same as maxiter by default
+417                    xtol = 0.0001  # Placeholder value based on common settings
+418                    ftol = 0.0001  # Placeholder value based on common settings
+419                else:
+420                    # Sample random hyperparameters
+421                    maxiter = np.random.randint(*maxiter_range)
+422                    maxfev = np.random.randint(*maxfev_range)
+423                    xtol = np.random.uniform(*xtol_range)
+424                    ftol = np.random.uniform(*ftol_range)
+425
+426                # Run optimization with hyperparameters
+427                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+428                                                                                        initial_conditions_scale, buffer_concentration_scale, 
+429                                                                                        simulation_kwargs, p_lb, p_ub),
+430                                                method='Powell', 
+431                                                options={"maxiter": maxiter, "maxfev": maxfev, "xtol": xtol, "ftol": ftol})
+432
+433                # Store results
+434                current_hyperparams = {
+435                    "maxiter": maxiter,
+436                    "maxfev": maxfev,
+437                    "xtol": xtol,
+438                    "ftol": ftol
+439                }
+440
+441
+442            elif mle_method == "CG":
+443                # Define possible hyperparameter ranges
+444                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+445                gtol_range = (1e-8, 1e-4)
+446                norm_choices = [np.inf, -np.inf, 1, 2]  
+447                eps_range = (1e-8, 1e-4)  
+448
+449                if i == 0:  # Use default hyperparameters for the first iteration
+450                    # Hard-coded default values
+451                    maxiter = len(p_bounds) * 1000  
+452                    gtol = 0.0001  
+453                    norm = np.inf 
+454                    eps = 0.0001  
+455                else:
+456                    # Sample random hyperparameters
+457                    maxiter = np.random.randint(*maxiter_range)
+458                    gtol = np.random.uniform(*gtol_range)
+459                    norm = np.random.choice(norm_choices)
+460                    eps = np.random.uniform(*eps_range)
+461
+462                # Run optimization with hyperparameters
+463                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+464                                                                                    initial_conditions_scale, buffer_concentration_scale, 
+465                                                                                    simulation_kwargs, p_lb, p_ub),
+466                                                method='CG', 
+467                                                options={"maxiter": maxiter, "gtol": gtol, "norm": norm, "eps": eps})
+468
+469                if norm == np.inf:
+470                    norm_to_save = float('inf')
+471                elif norm == -np.inf:
+472                    norm_to_save = float('-inf')
+473                else:
+474                    norm_to_save = float(norm)
+475
+476                # Store results
+477                current_hyperparams = {
+478                    "maxiter": maxiter,
+479                    "gtol": gtol,
+480                    "norm": norm_to_save,
+481                    "eps": eps
+482                }
+483
+484            elif mle_method == "L-BFGS-B":
+485                # Define possible hyperparameter ranges
+486                maxcor_range = (5, 20) 
+487                maxfun_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+488                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+489                ftol_range = (1e-10, 1e-4)
+490                gtol_range = (1e-10, 1e-4)
+491                eps_range = (1e-10, 1e-4)
+492                maxls_range = (10, 100) 
+493
+494                if i == 0:  # Use default hyperparameters for the first iteration
+495                    # Hard-coded default values
+496                    maxcor = 10  
+497                    maxfun = len(p_bounds) * 1000
+498                    maxiter = len(p_bounds) * 1000
+499                    ftol = 2.2e-9  # ftol = factr * numpy.finfo(float).eps where factr = 1e7.
+500                    gtol = 1e-5  
+501                    eps = 1e-8  
+502                    maxls = 20  
+503                else:
+504                    # Sample random hyperparameters
+505                    maxcor = np.random.randint(*maxcor_range)
+506                    maxfun = np.random.randint(*maxfun_range)
+507                    maxiter = np.random.randint(*maxiter_range)
+508                    ftol = np.random.uniform(*ftol_range)
+509                    gtol = np.random.uniform(*gtol_range)
+510                    eps = np.random.uniform(*eps_range)
+511                    maxls = np.random.randint(*maxls_range)
+512
+513                # Run optimization with hyperparameters
+514                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+515                                                                                        initial_conditions_scale, buffer_concentration_scale, 
+516                                                                                        simulation_kwargs, p_lb, p_ub),
+517                                                method='L-BFGS-B', 
+518                                                bounds=p_bounds,  # Necessary for L-BFGS-B
+519                                                options={"maxcor": maxcor, "maxfun": maxfun, "maxiter": maxiter, "ftol": ftol, 
+520                                                        "gtol": gtol, "eps": eps, "maxls": maxls})
+521
+522                # Store results
+523                current_hyperparams = {
+524                    "maxcor": maxcor,
+525                    "maxfun": maxfun,
+526                    "maxiter": maxiter,
+527                    "ftol": ftol,
+528                    "gtol": gtol,
+529                    "eps": eps,
+530                    "maxls": maxls
+531                }
+532
+533            elif mle_method == "COBYLA":
+534                # Define possible hyperparameter ranges
+535                rhobeg_range = (0.01, 1.0)  
+536                tol_range = (1e-10, 1e-4)
+537                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+538                catol_range = (1e-10, 1e-4)  
+539
+540                if i == 0:  # Use default hyperparameters for the first iteration
+541                    # Hard-coded default values
+542                    rhobeg = 0.1  
+543                    tol = 1e-6  
+544                    maxiter = len(p_bounds) * 1000
+545                    catol = 1e-6  
+546                else:
+547                    # Sample random hyperparameters
+548                    rhobeg = np.random.uniform(*rhobeg_range)
+549                    tol = np.random.uniform(*tol_range)
+550                    maxiter = np.random.randint(*maxiter_range)
+551                    catol = np.random.uniform(*catol_range)
+552
+553                # Run optimization with hyperparameters
+554                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+555                                                                                            initial_conditions_scale, buffer_concentration_scale, 
+556                                                                                            simulation_kwargs, p_lb, p_ub),
+557                                                method='COBYLA', 
+558                                                options={"rhobeg": rhobeg, "tol": tol, "maxiter": maxiter, "catol": catol, "disp": False})  # disp set to True for convergence messages
+559
+560                # Store results
+561                current_hyperparams = {
+562                    "rhobeg": rhobeg,
+563                    "tol": tol,
+564                    "maxiter": maxiter,
+565                    "catol": catol
+566                }
+567
+568
+569            elif mle_method == "direct":
+570                # Define possible hyperparameter ranges
+571                eps_range = (1e-6, 1e-2)
+572                maxfun_range = (10000, 100000)
+573                maxiter_range = (500, 2000)
+574                locally_biased_choices = [True, False]
+575                f_min_rtol_range = (1e-6, 1e-2)
+576                vol_tol_range = (1e-20, 1e-10)
+577                len_tol_range = (1e-8, 1e-4)
+578
+579                if i == 0:  # Use default hyperparameters for the first iteration
+580                    # Get the default values from the sp.optimize.direct function
+581                    signature = inspect.signature(sp.optimize.direct)
+582                    parameters = signature.parameters
+583
+584                    default_eps = parameters["eps"].default
+585                    default_maxfun = parameters["maxfun"].default
+586                    default_maxiter = parameters["maxiter"].default
+587                    default_locally_biased = parameters["locally_biased"].default
+588                    default_f_min_rtol = parameters["f_min_rtol"].default
+589                    default_vol_tol = parameters["vol_tol"].default
+590                    default_len_tol = parameters["len_tol"].default
+591                    eps = default_eps
+592                    maxfun = default_maxfun
+593                    maxiter = default_maxiter
+594                    locally_biased = default_locally_biased
+595                    f_min_rtol = default_f_min_rtol
+596                    vol_tol = default_vol_tol
+597                    len_tol = default_len_tol
+598                else:
+599                    # Sample random hyperparameters
+600                    eps = np.random.uniform(*eps_range)
+601                    maxfun = np.random.randint(*maxfun_range)
+602                    maxiter = np.random.randint(*maxiter_range)
+603                    locally_biased = np.random.choice(locally_biased_choices)
+604                    f_min_rtol = np.random.uniform(*f_min_rtol_range)
+605                    vol_tol = np.random.uniform(*vol_tol_range)
+606                    len_tol = np.random.uniform(*len_tol_range)
+607
+608                # Run optimization with hyperparameters
+609                tune_res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, 
+610                                                                                initial_conditions_scale, buffer_concentration_scale, 
+611                                                                                simulation_kwargs, p_lb, p_ub),
+612                                            eps=eps, maxfun=maxfun, maxiter=maxiter, locally_biased=bool(locally_biased),
+613                                            f_min_rtol=f_min_rtol, vol_tol=vol_tol, len_tol=len_tol)
+614
+615                # Store results
+616                current_hyperparams = {
+617                    "eps": eps,
+618                    "maxfun": maxfun,
+619                    "maxiter": maxiter,
+620                    "locally_biased": bool(locally_biased),
+621                    "f_min_rtol": f_min_rtol,
+622                    "vol_tol": vol_tol,
+623                    "len_tol": len_tol
+624                }
+625
+626            elif mle_method == "SLSQP":
+627                # Define possible hyperparameter ranges
+628                ftol_range = (1e-10, 1e-4)
+629                eps_range = (1e-10, 1e-4)
+630                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+631
+632                if i == 0:  # Use default hyperparameters for the first iteration
+633                    # Hard-coded default values
+634                    ftol = 1e-6
+635                    eps = 1e-8
+636                    maxiter = len(p_bounds) * 1000
+637          
+638                else:
+639                    # Sample random hyperparameters
+640                    ftol = np.random.uniform(*ftol_range)
+641                    eps = np.random.uniform(*eps_range)
+642                    maxiter = np.random.randint(*maxiter_range)
+643
+644                # Run optimization with hyperparameters
+645                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+646                                                                                            initial_conditions_scale, buffer_concentration_scale, 
+647                                                                                            simulation_kwargs, p_lb, p_ub),
+648                                                method='SLSQP', 
+649                                                bounds=p_bounds,  
+650                                                options={"ftol": ftol, "eps": eps, "maxiter": maxiter, "disp": False,})
+651
+652                # Store results
+653                current_hyperparams = {
+654                    "ftol": ftol,
+655                    "eps": eps,
+656                    "maxiter": maxiter,
+657                }
+658
+659            else:
+660                print('error: invalid MLE algorithm.')
+661                assert(1==0)
+662
+663            tuning_results.append({
+664                "hyperparameters": current_hyperparams,
+665                "function_value": float(tune_res.fun),  # convert numpy float to python float if needed
+666                "x_values": [i.item() if isinstance(i, np.generic) else i for i in tune_res.x], # convert numpy array to python list
+667                "success": bool(tune_res.success),
+668                "func_calls": int(tune_res.nfev),       # convert numpy int to python int if needed
+669                "duration": time.time() - t0
+670            })
+671
+672            if tune_res.fun < best_fun_value:
+673                best_fun_value = tune_res.fun
+674                best_hyperparams = current_hyperparams
+675                best_result = tune_res
+676
+677            logger.info(f"Hyperparameters: {current_hyperparams}")
+678            logger.info(f"Func value: {tune_res.fun}")
+679            logger.info(f"Time (s): {time.time() - t0}\n")
+680
+681
+682
+683        logger.info(f"Best function value: {best_fun_value}")
+684        logger.info(f"Best hyperparameters: {best_hyperparams}\n")
+685
+686        with open(os.path.join(output_dir, f'tuning_results.yaml'), 'w') as file:
+687            yaml.dump(tuning_results, file, sort_keys=False)
+688
+689
+690    # start MLE optimization runs
+691    for i in tqdm(range(n_trials)):
+692        logger.info(f"starting optimization run {i}")
+693        t0 = time.time()
+694        initial_guess = uf.get_p0(p_bounds,1)[0]
+695        logger.info(f"initial guess: {initial_guess}")
+696       
+697        if mle_method == "differential_evolution":
+698            if tuning:
+699                res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+700            else:
+701                res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args)
+702        elif mle_method == "basinhopping":
+703            if tuning:
+704                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+705                res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, **best_hyperparams)
+706            else:
+707                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+708                res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs)
+709        elif mle_method == "dual_annealing":
+710            if tuning:
+711                res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+712            else:
+713                res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args)
+714        elif mle_method == "shgo":
+715            if tuning:
+716                res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+717            else:
+718                res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args)
+719        elif mle_method == "Nelder-Mead":
+720            if tuning:
+721                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead",  options=best_hyperparams)
+722            else:
+723                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead")
+724        elif mle_method == "Powell":
+725            if tuning:
+726                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="Powell",  options=best_hyperparams)
+727            else:
+728                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="Powell")
+729        elif mle_method == "CG":
+730            if tuning:
+731                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="CG",  options=best_hyperparams)
+732            else:
+733                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="CG", ) 
+734        elif mle_method == "L-BFGS-B":
+735            if tuning:
+736                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="L-BFGS-B",   options=best_hyperparams) 
+737            else:
+738                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="L-BFGS-B") 
+739        elif mle_method == "COBYLA":
+740            if tuning:
+741                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="COBYLA",  options=best_hyperparams)
+742            else:
+743                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="COBYLA")
+744        elif mle_method == "direct":
+745            if tuning:
+746                res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), **best_hyperparams)
+747            else:
+748                res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub))
+749        elif mle_method == "SLSQP":
+750            if tuning:
+751                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="SLSQP", options=best_hyperparams)
+752            else:    
+753                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="SLSQP")
+754        else:
+755            print('invalid MLE method.')
+756            assert(1==0)
+757
+758
+759        dt = time.time() - t0
+760        logger.info(f"method: {mle_method}")
+761        logger.info(f"x: {res.x}")
+762        logger.info(f"func: {res.fun}")
+763        logger.info(f"success: {res.success}")
+764        logger.info(f"nfev: {res.nfev}")
+765        logger.info(f"time (s): {dt}\n")
+766        x.append(res.x)
+767        y.append(res.fun)
+768        s.append(res.success)
+769        nf.append(res.nfev)
+770        t.append(dt)
+771    
+772    ##### Save and plot the data #####
+773    results_list=[x,y,s,nf,t]
+774
+775    y_data_list = [y,nf,t]
+776    fig, axes = plt.subplots(len(y_data_list), 1, figsize=(10, 12), sharex=True)
+777    trial_idx = [int(i+1) for i in range(n_trials)]
+778    y_labels = ['log-likelihood', 'n likelihood evaluations', 'runtime (s)']
+779    plt.suptitle('Maximum likelihood optimization runs')
+780
+781    for i in range(len(y_data_list)):
+782        ax = axes[i]
+783        ax.set_ylabel(y_labels[i])
+784        ax.yaxis.set_label_coords(-0.1, 0.5)
+785        if y_labels[i] == 'log-likelihood':
+786            ax.axhline(log_like_nom, color='red')
+787            ax.plot(trial_idx, [-1*j for j in y_data_list[i]], '-o', )
+788        else:
+789            ax.plot(trial_idx, y_data_list[i], '-o',)
+790    axes[-1].set_xlabel("replica number")
+791    plt.tight_layout()
+792    plt.savefig(os.path.join(output_dir, f'optimization_plot.png'))
+793
+794    # Convert each NumPy array in results_list to a standard Python list
+795    results_dict = {
+796        'run': output_fname,
+797        'method': mle_method,
+798        'x': [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in results_list[0]],
+799        'fun': [float(i) for i in results_list[1]],
+800        'success': [bool(i) for i in results_list[2]],
+801        'nfev': results_list[3],
+802        't': results_list[4]
+803    }
+804
+805    with open(os.path.join(output_dir, f'optimization_results.yaml'), 'w') as file:
+806        yaml.dump(results_dict, file, sort_keys=False)
+807
+808
+809if __name__ == '__main__':
+810
+811    ##### Adjust this if needed ##### 
+812    config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml"
+813    run_optimizer(config_fname)
+
+ + +
+
+ +
+ + def + run_optimizer(config_fname): + + + +
+ +
 20def run_optimizer(config_fname):
+ 21    """
+ 22    Run maximum likelihood estimation based on a configuration file.
+ 23    
+ 24    This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file.
+ 25    Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP"
+ 26    methods are supported.
+ 27    
+ 28    Args:
+ 29        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 30                            necessary settings and parameters for the run, including model file, 
+ 31                            data file, solver arguments, Bayesian inference settings, optimization 
+ 32                            settings, and random seed.
+ 33                            
+ 34    Outputs:
+ 35        - Creates a new directory for the run, containing:
+ 36            - Copied model, data, and configuration files.
+ 37            - Log file with run details.
+ 38            - Flux trace plot.
+ 39            - Optimization plot.
+ 40            - Tuning results.
+ 41            - Optimization results.
+ 42            
+ 43    Notes:
+ 44        - The configuration file should be structured appropriately with all necessary fields (see example folder).
+ 45        - The function uses the `scipy` library for maximum likelihood estimation.
+ 46        - The function assumes the use of the RoadRunner model for simulations.
+ 47        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 48    """
+ 49
+ 50    ##### Setup #####
+ 51    # Load the config.yaml file
+ 52    with open(config_fname, "r") as f:
+ 53        config = yaml.safe_load(f)
+ 54
+ 55    # get run configuration settings
+ 56    run_name = config["run_name"]
+ 57    model_file = config['model_file']
+ 58    data_file = config['data_file']
+ 59    simulation_kwargs = config['solver_arguments']
+ 60    inference_settings = config['bayesian_inference']
+ 61    optimization_settings = config['optimization']
+ 62    mle_method = optimization_settings['method']
+ 63    seed = config['random_seed']
+ 64    np.random.seed(seed)
+ 65
+ 66    # get parameter names, values, and boundaries
+ 67    k_names = config['solver_arguments']['rate_constant_names']
+ 68    log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 69    p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 70    p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 71    p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 72    p_bounds = list(zip(p_lb,p_ub))
+ 73    p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 74    k_nom = p_nom[:-1]
+ 75    sigma_nom = p_nom[-1]
+ 76
+ 77    # get assay initial conditions
+ 78    initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 79    initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 80    buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 81
+ 82    # create new directory for runs
+ 83    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+ 84    output_fname = f'run_{run_name}_r{seed}'
+ 85    output_dir = output_fname + '_' + timestamp
+ 86    os.mkdir(output_dir)
+ 87    shutil.copy(model_file, output_dir)
+ 88    shutil.copy(data_file, output_dir)
+ 89    shutil.copy(config_fname, output_dir)
+ 90
+ 91    # create logging file 
+ 92    logger = logging.getLogger('log')
+ 93    logger.setLevel(logging.INFO)
+ 94    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+ 95    file_handler.setLevel(logging.INFO)
+ 96    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ 97    file_handler.setFormatter(log_format)
+ 98    logger.addHandler(file_handler)
+ 99    logger.info(f"output_dir: {output_dir}")
+100    logger.info(f"seed: {seed}")
+101
+102    # load roadrunner model and simulate assay at nominal values
+103    rr_model = te.loadSBMLModel(model_file)
+104    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+105    y_nom = data_nom[1]  # (time, output)
+106
+107    # Load data, plot trace, and calculate log-likelihood reference
+108    y_obs = np.loadtxt(data_file, delimiter=',')
+109    plt.figure(figsize=(8,6))
+110    plt.title('Net ion influx (M/s) vs simulation step')
+111    plt.plot(y_nom, label='y_nom')
+112    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+113    plt.legend()
+114    plt.savefig(os.path.join(output_dir, 'net_ion_influx_trace_nom.png'))
+115    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+116    logger.info(f"log_like_nom: {log_like_nom}")
+117
+118    ##### Run MLE optimization #####
+119    tuning = config['optimization']['tuning']
+120    extended = config['bayesian_inference']['extended']
+121    n_dim = len(p_names)
+122    logger.info(f"n_dim: {n_dim}")
+123    if extended:
+124        logger.info(f"using extended nll")
+125        optimization_func = uf.negative_log_likelihood_wrapper_extended
+126    else:
+127        optimization_func = uf.negative_log_likelihood_wrapper
+128
+129    optimization_func_args = [rr_model, 
+130                                y_obs, 
+131                                initial_conditions, 
+132                                initial_conditions_scale, 
+133                                buffer_concentration_scale, 
+134                                simulation_kwargs, 
+135                                p_lb, 
+136                                p_ub
+137                            ]
+138    x = []
+139    y = []
+140    s = []
+141    nf = []
+142    t = []
+143    n_trials = optimization_settings['n_replicas']
+144    n_tuning_steps = config['optimization']['n_tuning_steps']
+145
+146    ### Fix this! too repetitive 
+147    # run hyperparameter tuning
+148    if tuning: 
+149        logger.info(f"Starting optimization hyperparameter tuning")
+150
+151        initial_guess = uf.get_p0(p_bounds, 1)[0]  # use the same start point for each algorithm
+152        logger.info(f"Initial guess: {initial_guess}")
+153
+154        best_fun_value = np.inf
+155        best_hyperparams = None
+156        best_result = None
+157        tuning_results = []
+158
+159        for i in tqdm(range(n_tuning_steps)):
+160            logger.info(f"Starting optimization hyperparameter tuning run {i}")
+161            t0 = time.time()
+162
+163
+164            if mle_method == "differential_evolution":
+165                # Define possible hyperparameter ranges
+166                strategy_choices = ['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'currenttobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'currenttobest1bin', 'best2bin', 'rand2bin', 'rand1bin']
+167                popsize_range = (25, 50)
+168                mutation_range = (0, 1.9)
+169                recombination_range = (0, 1)
+170                tol_range = (1e-5, 1e-3)
+171                maxiter_range = (100, 1000)  # Define a range for maxiter
+172
+173                if i == 0:
+174                    # Use default hyperparameters for the first iteration
+175                    signature = inspect.signature(sp.optimize.differential_evolution)
+176                    parameters = signature.parameters
+177                    strategy = parameters["strategy"].default
+178                    popsize = parameters["popsize"].default
+179                    mutation = parameters["mutation"].default
+180                    recombination = parameters["recombination"].default
+181                    tol = parameters["tol"].default
+182                    maxiter = parameters["maxiter"].default  # Get default maxiter
+183                else:
+184                    # Sample random hyperparameters
+185                    strategy = np.random.choice(strategy_choices)
+186                    popsize = np.random.randint(*popsize_range)
+187                    mutation = np.random.uniform(*mutation_range)
+188                    recombination = np.random.uniform(*recombination_range)
+189                    tol = np.random.uniform(*tol_range)
+190                    maxiter = np.random.randint(*maxiter_range)  # Sample maxiter
+191
+192                # Run optimization with hyperparameters
+193                tune_res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, 
+194                                                                                initial_conditions_scale, buffer_concentration_scale, 
+195                                                                                simulation_kwargs, p_lb, p_ub), 
+196                                                                                strategy=strategy, popsize=popsize, mutation=mutation, 
+197                                                                                recombination=recombination, tol=tol, maxiter=maxiter)
+198                
+199
+200                if isinstance(mutation, tuple):
+201                    mutation_to_store = list(mutation)
+202                else:
+203                    mutation_to_store = mutation
+204                # Store results
+205                current_hyperparams = {
+206                    "strategy": str(strategy),
+207                    "popsize": popsize,
+208                    "mutation": mutation_to_store,
+209                    "recombination": recombination,
+210                    "tol": tol,
+211                    "maxiter": maxiter
+212                }
+213
+214            elif mle_method == "basinhopping":
+215                # Define possible hyperparameter ranges for basinhopping
+216                niter_range = (50, 500)  # Number of basin hopping iterations
+217                T_range = (0.5, 5.0)  # Temperature for acceptance criterion
+218                stepsize_range = (0.1, 2.0)  # Step size for random displacement
+219                interval_range = (10, 100)  # Interval for updating the step size
+220                niter_success_range = (5, 50)  # Number of iterations for success check
+221
+222                if i == 0:
+223                    # Use default hyperparameters for the first iteration
+224                    signature = inspect.signature(sp.optimize.basinhopping)
+225                    parameters = signature.parameters
+226                    niter = parameters["niter"].default
+227                    T = parameters["T"].default
+228                    stepsize = parameters["stepsize"].default
+229                    interval = parameters["interval"].default
+230                    niter_success = parameters["niter_success"].default
+231                else:
+232                    # Sample random hyperparameters for basinhopping
+233                    niter = np.random.randint(*niter_range)
+234                    T = np.random.uniform(*T_range)
+235                    stepsize = np.random.uniform(*stepsize_range)
+236                    interval = np.random.randint(*interval_range)
+237                    niter_success = np.random.randint(*niter_success_range)
+238
+239                # Run optimization with hyperparameters
+240                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+241                tune_res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, niter=niter, T=T, stepsize=stepsize, interval=interval, niter_success=niter_success)
+242
+243                # Store results
+244                current_hyperparams = {
+245                    "niter": niter,
+246                    "T": T,
+247                    "stepsize": stepsize,
+248                    "interval": interval,
+249                    "niter_success": niter_success
+250                }
+251
+252            elif mle_method == "dual_annealing":
+253                # Define possible hyperparameter ranges
+254                initial_temp_range = (0.01, 50000)
+255                restart_temp_ratio_range = (0, 1)
+256                visit_range = (1, 3)
+257                accept_range = (-1e4, -5)
+258                maxiter_range = (100, 1000)
+259                maxfun_range = (1000, 1e7)
+260                no_local_search_choices = [True, False]
+261
+262                if i == 0:
+263                    # Use default hyperparameters for the first iteration
+264                    signature = inspect.signature(sp.optimize.dual_annealing)
+265                    parameters = signature.parameters
+266                    initial_temp = parameters["initial_temp"].default
+267                    restart_temp_ratio = parameters["restart_temp_ratio"].default
+268                    visit = parameters["visit"].default
+269                    accept = parameters["accept"].default
+270                    maxiter = parameters["maxiter"].default
+271                    maxfun = parameters["maxfun"].default
+272                    no_local_search = parameters["no_local_search"].default
+273                else:
+274                    # Sample random hyperparameters
+275                    initial_temp = np.random.uniform(*initial_temp_range)
+276                    restart_temp_ratio = np.random.uniform(*restart_temp_ratio_range)
+277                    visit = np.random.uniform(*visit_range)
+278                    accept = np.random.uniform(*accept_range)
+279                    maxiter = np.random.randint(*maxiter_range)
+280                    maxfun = np.random.randint(*maxfun_range)
+281                    no_local_search = np.random.choice(no_local_search_choices)
+282
+283                # Run optimization with hyperparameters
+284                tune_res = sp.optimize.dual_annealing(optimization_func, bounds=p_bounds, 
+285                                                    args=(rr_model, y_obs, initial_conditions, 
+286                                                            initial_conditions_scale, buffer_concentration_scale, 
+287                                                            simulation_kwargs, p_lb, p_ub), 
+288                                                    initial_temp=initial_temp, restart_temp_ratio=restart_temp_ratio, 
+289                                                    visit=visit, accept=accept, maxiter=maxiter, maxfun=maxfun, 
+290                                                    no_local_search=no_local_search)
+291                # Store results
+292                current_hyperparams = {
+293                    "initial_temp": initial_temp,
+294                    "restart_temp_ratio": restart_temp_ratio,
+295                    "visit": visit,
+296                    "accept": accept,
+297                    "maxiter": maxiter,
+298                    "maxfun": maxfun,
+299                    "no_local_search": bool(no_local_search)
+300                }
+301
+302            elif mle_method == "shgo":
+303                # Define possible hyperparameter ranges
+304                n_range = (25, 500)  
+305                iters_range = (1, 10)  
+306                sampling_methods = ["simplicial", "halton", "sobol"]
+307                maxiter_range = (100, 1000)
+308                maxev_range = (100, 10000)  
+309                maxtime_range = (0.1, 10)  
+310                minimize_every_iter_choices = [True, False]
+311                local_iter_range = (10, 100)  
+312                infty_constraints_choices = [True, False]
+313                            
+314                if i == 0:
+315                    # Use default hyperparameters for the first iteration
+316                    signature = inspect.signature(sp.optimize.shgo)
+317                    parameters = signature.parameters
+318                    n = parameters["n"].default
+319                    iters = parameters["iters"].default
+320                    sampling_method = parameters["sampling_method"].default
+321                    
+322                    options_defaults = parameters["options"].default
+323                    if options_defaults is None:
+324                        options_defaults = {}
+325                    
+326                    maxiter = options_defaults.get("maxiter", maxiter_range[0])
+327                    maxev = options_defaults.get("maxev", maxev_range[0])
+328                    maxtime = options_defaults.get("maxtime", maxtime_range[0])
+329                    minimize_every_iter = options_defaults.get("minimize_every_iter", True)
+330                    local_iter = options_defaults.get("local_iter", local_iter_range[0])
+331                    infty_constraints = options_defaults.get("infty_constraints", True)
+332                else:
+333                    # Sample random hyperparameters
+334                    n = np.random.randint(*n_range)
+335                    iters = np.random.randint(*iters_range)
+336                    sampling_method = np.random.choice(sampling_methods)
+337                    maxiter = np.random.randint(*maxiter_range)
+338                    maxev = np.random.randint(*maxev_range)
+339                    maxtime = np.random.uniform(*maxtime_range)
+340                    minimize_every_iter = np.random.choice(minimize_every_iter_choices)
+341                    local_iter = np.random.randint(*local_iter_range)
+342                    infty_constraints = np.random.choice(infty_constraints_choices)
+343
+344                # Run optimization with hyperparameters
+345                tune_res = sp.optimize.shgo(optimization_func, bounds=p_bounds, 
+346                                            args=(rr_model, y_obs, initial_conditions, 
+347                                                initial_conditions_scale, buffer_concentration_scale, 
+348                                                simulation_kwargs, p_lb, p_ub), 
+349                                            n=n, iters=iters, sampling_method=sampling_method,
+350                                            options={"maxiter": maxiter, "maxev": maxev, "maxtime": maxtime,
+351                                                    "minimize_every_iter": minimize_every_iter, "local_iter": local_iter,
+352                                                    "infty_constraints": infty_constraints})
+353                            
+354                # Store results
+355                current_hyperparams = {
+356                    "n": n,
+357                    "iters": iters,
+358                    "sampling_method": sampling_method,
+359                    "maxiter": maxiter,
+360                    "maxev": maxev,
+361                    "maxtime": maxtime,
+362                    "minimize_every_iter": bool(minimize_every_iter),
+363                    "local_iter": local_iter,
+364                    "infty_constraints": bool(infty_constraints)
+365                }
+366
+367            elif mle_method == "Nelder-Mead":
+368                # Define possible hyperparameter ranges
+369                maxiter_range = (1000, 10000)
+370                maxfev_range = (1000, 10000)
+371                xatol_range = (1e-8, 1e-4)
+372                fatol_range = (1e-8, 1e-4)
+373                adaptive_choices = [True, False]
+374
+375                if i == 0:  # Use default hyperparameters for the first iteration
+376                    # Hard-coded default values based on scipy documentation / common values
+377                    maxiter = len(p_bounds) * 200  # N * 200, where N is the number of variables
+378                    maxfev = len(p_bounds) * 200  # Same as maxiter by default
+379                    xatol = 0.0001  
+380                    fatol = 0.0001  
+381                    adaptive = True
+382                else:
+383                    # Sample random hyperparameters
+384                    maxiter = np.random.randint(*maxiter_range)
+385                    maxfev = np.random.randint(*maxfev_range)
+386                    xatol = np.random.uniform(*xatol_range)
+387                    fatol = np.random.uniform(*fatol_range)
+388                    adaptive = np.random.choice(adaptive_choices)
+389
+390                # Run optimization with hyperparameters
+391                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+392                                                                              initial_conditions_scale, buffer_concentration_scale, 
+393                                                                              simulation_kwargs, p_lb, p_ub),
+394                                                method='Nelder-Mead', 
+395                                                options={"maxiter": maxiter, "maxfev": maxfev, "xatol": xatol, "fatol": fatol, "adaptive": adaptive})
+396
+397
+398                # Store results
+399                current_hyperparams = {
+400                    "maxiter": maxiter,
+401                    "maxfev": maxfev,
+402                    "xatol": xatol,
+403                    "fatol": fatol,
+404                    "adaptive": bool(adaptive)
+405                }
+406
+407            elif mle_method == "Powell":
+408                # Define possible hyperparameter ranges
+409                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+410                maxfev_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+411                xtol_range = (1e-8, 1e-4)
+412                ftol_range = (1e-8, 1e-4)
+413
+414                if i == 0:  # Use default hyperparameters for the first iteration
+415                    # Hard-coded default values based on scipy documentation / common values
+416                    maxiter = len(p_bounds) * 1000  # N * 1000, where N is the number of variables
+417                    maxfev = len(p_bounds) * 1000  # Same as maxiter by default
+418                    xtol = 0.0001  # Placeholder value based on common settings
+419                    ftol = 0.0001  # Placeholder value based on common settings
+420                else:
+421                    # Sample random hyperparameters
+422                    maxiter = np.random.randint(*maxiter_range)
+423                    maxfev = np.random.randint(*maxfev_range)
+424                    xtol = np.random.uniform(*xtol_range)
+425                    ftol = np.random.uniform(*ftol_range)
+426
+427                # Run optimization with hyperparameters
+428                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+429                                                                                        initial_conditions_scale, buffer_concentration_scale, 
+430                                                                                        simulation_kwargs, p_lb, p_ub),
+431                                                method='Powell', 
+432                                                options={"maxiter": maxiter, "maxfev": maxfev, "xtol": xtol, "ftol": ftol})
+433
+434                # Store results
+435                current_hyperparams = {
+436                    "maxiter": maxiter,
+437                    "maxfev": maxfev,
+438                    "xtol": xtol,
+439                    "ftol": ftol
+440                }
+441
+442
+443            elif mle_method == "CG":
+444                # Define possible hyperparameter ranges
+445                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+446                gtol_range = (1e-8, 1e-4)
+447                norm_choices = [np.inf, -np.inf, 1, 2]  
+448                eps_range = (1e-8, 1e-4)  
+449
+450                if i == 0:  # Use default hyperparameters for the first iteration
+451                    # Hard-coded default values
+452                    maxiter = len(p_bounds) * 1000  
+453                    gtol = 0.0001  
+454                    norm = np.inf 
+455                    eps = 0.0001  
+456                else:
+457                    # Sample random hyperparameters
+458                    maxiter = np.random.randint(*maxiter_range)
+459                    gtol = np.random.uniform(*gtol_range)
+460                    norm = np.random.choice(norm_choices)
+461                    eps = np.random.uniform(*eps_range)
+462
+463                # Run optimization with hyperparameters
+464                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+465                                                                                    initial_conditions_scale, buffer_concentration_scale, 
+466                                                                                    simulation_kwargs, p_lb, p_ub),
+467                                                method='CG', 
+468                                                options={"maxiter": maxiter, "gtol": gtol, "norm": norm, "eps": eps})
+469
+470                if norm == np.inf:
+471                    norm_to_save = float('inf')
+472                elif norm == -np.inf:
+473                    norm_to_save = float('-inf')
+474                else:
+475                    norm_to_save = float(norm)
+476
+477                # Store results
+478                current_hyperparams = {
+479                    "maxiter": maxiter,
+480                    "gtol": gtol,
+481                    "norm": norm_to_save,
+482                    "eps": eps
+483                }
+484
+485            elif mle_method == "L-BFGS-B":
+486                # Define possible hyperparameter ranges
+487                maxcor_range = (5, 20) 
+488                maxfun_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+489                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+490                ftol_range = (1e-10, 1e-4)
+491                gtol_range = (1e-10, 1e-4)
+492                eps_range = (1e-10, 1e-4)
+493                maxls_range = (10, 100) 
+494
+495                if i == 0:  # Use default hyperparameters for the first iteration
+496                    # Hard-coded default values
+497                    maxcor = 10  
+498                    maxfun = len(p_bounds) * 1000
+499                    maxiter = len(p_bounds) * 1000
+500                    ftol = 2.2e-9  # ftol = factr * numpy.finfo(float).eps where factr = 1e7.
+501                    gtol = 1e-5  
+502                    eps = 1e-8  
+503                    maxls = 20  
+504                else:
+505                    # Sample random hyperparameters
+506                    maxcor = np.random.randint(*maxcor_range)
+507                    maxfun = np.random.randint(*maxfun_range)
+508                    maxiter = np.random.randint(*maxiter_range)
+509                    ftol = np.random.uniform(*ftol_range)
+510                    gtol = np.random.uniform(*gtol_range)
+511                    eps = np.random.uniform(*eps_range)
+512                    maxls = np.random.randint(*maxls_range)
+513
+514                # Run optimization with hyperparameters
+515                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+516                                                                                        initial_conditions_scale, buffer_concentration_scale, 
+517                                                                                        simulation_kwargs, p_lb, p_ub),
+518                                                method='L-BFGS-B', 
+519                                                bounds=p_bounds,  # Necessary for L-BFGS-B
+520                                                options={"maxcor": maxcor, "maxfun": maxfun, "maxiter": maxiter, "ftol": ftol, 
+521                                                        "gtol": gtol, "eps": eps, "maxls": maxls})
+522
+523                # Store results
+524                current_hyperparams = {
+525                    "maxcor": maxcor,
+526                    "maxfun": maxfun,
+527                    "maxiter": maxiter,
+528                    "ftol": ftol,
+529                    "gtol": gtol,
+530                    "eps": eps,
+531                    "maxls": maxls
+532                }
+533
+534            elif mle_method == "COBYLA":
+535                # Define possible hyperparameter ranges
+536                rhobeg_range = (0.01, 1.0)  
+537                tol_range = (1e-10, 1e-4)
+538                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+539                catol_range = (1e-10, 1e-4)  
+540
+541                if i == 0:  # Use default hyperparameters for the first iteration
+542                    # Hard-coded default values
+543                    rhobeg = 0.1  
+544                    tol = 1e-6  
+545                    maxiter = len(p_bounds) * 1000
+546                    catol = 1e-6  
+547                else:
+548                    # Sample random hyperparameters
+549                    rhobeg = np.random.uniform(*rhobeg_range)
+550                    tol = np.random.uniform(*tol_range)
+551                    maxiter = np.random.randint(*maxiter_range)
+552                    catol = np.random.uniform(*catol_range)
+553
+554                # Run optimization with hyperparameters
+555                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+556                                                                                            initial_conditions_scale, buffer_concentration_scale, 
+557                                                                                            simulation_kwargs, p_lb, p_ub),
+558                                                method='COBYLA', 
+559                                                options={"rhobeg": rhobeg, "tol": tol, "maxiter": maxiter, "catol": catol, "disp": False})  # disp set to True for convergence messages
+560
+561                # Store results
+562                current_hyperparams = {
+563                    "rhobeg": rhobeg,
+564                    "tol": tol,
+565                    "maxiter": maxiter,
+566                    "catol": catol
+567                }
+568
+569
+570            elif mle_method == "direct":
+571                # Define possible hyperparameter ranges
+572                eps_range = (1e-6, 1e-2)
+573                maxfun_range = (10000, 100000)
+574                maxiter_range = (500, 2000)
+575                locally_biased_choices = [True, False]
+576                f_min_rtol_range = (1e-6, 1e-2)
+577                vol_tol_range = (1e-20, 1e-10)
+578                len_tol_range = (1e-8, 1e-4)
+579
+580                if i == 0:  # Use default hyperparameters for the first iteration
+581                    # Get the default values from the sp.optimize.direct function
+582                    signature = inspect.signature(sp.optimize.direct)
+583                    parameters = signature.parameters
+584
+585                    default_eps = parameters["eps"].default
+586                    default_maxfun = parameters["maxfun"].default
+587                    default_maxiter = parameters["maxiter"].default
+588                    default_locally_biased = parameters["locally_biased"].default
+589                    default_f_min_rtol = parameters["f_min_rtol"].default
+590                    default_vol_tol = parameters["vol_tol"].default
+591                    default_len_tol = parameters["len_tol"].default
+592                    eps = default_eps
+593                    maxfun = default_maxfun
+594                    maxiter = default_maxiter
+595                    locally_biased = default_locally_biased
+596                    f_min_rtol = default_f_min_rtol
+597                    vol_tol = default_vol_tol
+598                    len_tol = default_len_tol
+599                else:
+600                    # Sample random hyperparameters
+601                    eps = np.random.uniform(*eps_range)
+602                    maxfun = np.random.randint(*maxfun_range)
+603                    maxiter = np.random.randint(*maxiter_range)
+604                    locally_biased = np.random.choice(locally_biased_choices)
+605                    f_min_rtol = np.random.uniform(*f_min_rtol_range)
+606                    vol_tol = np.random.uniform(*vol_tol_range)
+607                    len_tol = np.random.uniform(*len_tol_range)
+608
+609                # Run optimization with hyperparameters
+610                tune_res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, 
+611                                                                                initial_conditions_scale, buffer_concentration_scale, 
+612                                                                                simulation_kwargs, p_lb, p_ub),
+613                                            eps=eps, maxfun=maxfun, maxiter=maxiter, locally_biased=bool(locally_biased),
+614                                            f_min_rtol=f_min_rtol, vol_tol=vol_tol, len_tol=len_tol)
+615
+616                # Store results
+617                current_hyperparams = {
+618                    "eps": eps,
+619                    "maxfun": maxfun,
+620                    "maxiter": maxiter,
+621                    "locally_biased": bool(locally_biased),
+622                    "f_min_rtol": f_min_rtol,
+623                    "vol_tol": vol_tol,
+624                    "len_tol": len_tol
+625                }
+626
+627            elif mle_method == "SLSQP":
+628                # Define possible hyperparameter ranges
+629                ftol_range = (1e-10, 1e-4)
+630                eps_range = (1e-10, 1e-4)
+631                maxiter_range = (len(p_bounds) * 1000, len(p_bounds) * 10000)
+632
+633                if i == 0:  # Use default hyperparameters for the first iteration
+634                    # Hard-coded default values
+635                    ftol = 1e-6
+636                    eps = 1e-8
+637                    maxiter = len(p_bounds) * 1000
+638          
+639                else:
+640                    # Sample random hyperparameters
+641                    ftol = np.random.uniform(*ftol_range)
+642                    eps = np.random.uniform(*eps_range)
+643                    maxiter = np.random.randint(*maxiter_range)
+644
+645                # Run optimization with hyperparameters
+646                tune_res = sp.optimize.minimize(optimization_func, x0=initial_guess, args=(rr_model, y_obs, initial_conditions, 
+647                                                                                            initial_conditions_scale, buffer_concentration_scale, 
+648                                                                                            simulation_kwargs, p_lb, p_ub),
+649                                                method='SLSQP', 
+650                                                bounds=p_bounds,  
+651                                                options={"ftol": ftol, "eps": eps, "maxiter": maxiter, "disp": False,})
+652
+653                # Store results
+654                current_hyperparams = {
+655                    "ftol": ftol,
+656                    "eps": eps,
+657                    "maxiter": maxiter,
+658                }
+659
+660            else:
+661                print('error: invalid MLE algorithm.')
+662                assert(1==0)
+663
+664            tuning_results.append({
+665                "hyperparameters": current_hyperparams,
+666                "function_value": float(tune_res.fun),  # convert numpy float to python float if needed
+667                "x_values": [i.item() if isinstance(i, np.generic) else i for i in tune_res.x], # convert numpy array to python list
+668                "success": bool(tune_res.success),
+669                "func_calls": int(tune_res.nfev),       # convert numpy int to python int if needed
+670                "duration": time.time() - t0
+671            })
+672
+673            if tune_res.fun < best_fun_value:
+674                best_fun_value = tune_res.fun
+675                best_hyperparams = current_hyperparams
+676                best_result = tune_res
+677
+678            logger.info(f"Hyperparameters: {current_hyperparams}")
+679            logger.info(f"Func value: {tune_res.fun}")
+680            logger.info(f"Time (s): {time.time() - t0}\n")
+681
+682
+683
+684        logger.info(f"Best function value: {best_fun_value}")
+685        logger.info(f"Best hyperparameters: {best_hyperparams}\n")
+686
+687        with open(os.path.join(output_dir, f'tuning_results.yaml'), 'w') as file:
+688            yaml.dump(tuning_results, file, sort_keys=False)
+689
+690
+691    # start MLE optimization runs
+692    for i in tqdm(range(n_trials)):
+693        logger.info(f"starting optimization run {i}")
+694        t0 = time.time()
+695        initial_guess = uf.get_p0(p_bounds,1)[0]
+696        logger.info(f"initial guess: {initial_guess}")
+697       
+698        if mle_method == "differential_evolution":
+699            if tuning:
+700                res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+701            else:
+702                res = sp.optimize.differential_evolution(optimization_func, p_bounds, args=optimization_func_args)
+703        elif mle_method == "basinhopping":
+704            if tuning:
+705                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+706                res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs, **best_hyperparams)
+707            else:
+708                minimizer_kwargs = {"args": (rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), "bounds": p_bounds}
+709                res = sp.optimize.basinhopping(optimization_func, initial_guess, minimizer_kwargs=minimizer_kwargs)
+710        elif mle_method == "dual_annealing":
+711            if tuning:
+712                res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+713            else:
+714                res = sp.optimize.dual_annealing(optimization_func, p_bounds, args=optimization_func_args)
+715        elif mle_method == "shgo":
+716            if tuning:
+717                res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args,  **best_hyperparams)
+718            else:
+719                res = sp.optimize.shgo(optimization_func, p_bounds, args=optimization_func_args)
+720        elif mle_method == "Nelder-Mead":
+721            if tuning:
+722                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead",  options=best_hyperparams)
+723            else:
+724                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), bounds=p_bounds, method="Nelder-Mead")
+725        elif mle_method == "Powell":
+726            if tuning:
+727                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="Powell",  options=best_hyperparams)
+728            else:
+729                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="Powell")
+730        elif mle_method == "CG":
+731            if tuning:
+732                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="CG",  options=best_hyperparams)
+733            else:
+734                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="CG", ) 
+735        elif mle_method == "L-BFGS-B":
+736            if tuning:
+737                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="L-BFGS-B",   options=best_hyperparams) 
+738            else:
+739                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  bounds=p_bounds, method="L-BFGS-B") 
+740        elif mle_method == "COBYLA":
+741            if tuning:
+742                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="COBYLA",  options=best_hyperparams)
+743            else:
+744                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="COBYLA")
+745        elif mle_method == "direct":
+746            if tuning:
+747                res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub), **best_hyperparams)
+748            else:
+749                res = sp.optimize.direct(optimization_func, p_bounds, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub))
+750        elif mle_method == "SLSQP":
+751            if tuning:
+752                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="SLSQP", options=best_hyperparams)
+753            else:    
+754                res = sp.optimize.minimize(optimization_func, initial_guess, args=(rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs, p_lb, p_ub),  method="SLSQP")
+755        else:
+756            print('invalid MLE method.')
+757            assert(1==0)
+758
+759
+760        dt = time.time() - t0
+761        logger.info(f"method: {mle_method}")
+762        logger.info(f"x: {res.x}")
+763        logger.info(f"func: {res.fun}")
+764        logger.info(f"success: {res.success}")
+765        logger.info(f"nfev: {res.nfev}")
+766        logger.info(f"time (s): {dt}\n")
+767        x.append(res.x)
+768        y.append(res.fun)
+769        s.append(res.success)
+770        nf.append(res.nfev)
+771        t.append(dt)
+772    
+773    ##### Save and plot the data #####
+774    results_list=[x,y,s,nf,t]
+775
+776    y_data_list = [y,nf,t]
+777    fig, axes = plt.subplots(len(y_data_list), 1, figsize=(10, 12), sharex=True)
+778    trial_idx = [int(i+1) for i in range(n_trials)]
+779    y_labels = ['log-likelihood', 'n likelihood evaluations', 'runtime (s)']
+780    plt.suptitle('Maximum likelihood optimization runs')
+781
+782    for i in range(len(y_data_list)):
+783        ax = axes[i]
+784        ax.set_ylabel(y_labels[i])
+785        ax.yaxis.set_label_coords(-0.1, 0.5)
+786        if y_labels[i] == 'log-likelihood':
+787            ax.axhline(log_like_nom, color='red')
+788            ax.plot(trial_idx, [-1*j for j in y_data_list[i]], '-o', )
+789        else:
+790            ax.plot(trial_idx, y_data_list[i], '-o',)
+791    axes[-1].set_xlabel("replica number")
+792    plt.tight_layout()
+793    plt.savefig(os.path.join(output_dir, f'optimization_plot.png'))
+794
+795    # Convert each NumPy array in results_list to a standard Python list
+796    results_dict = {
+797        'run': output_fname,
+798        'method': mle_method,
+799        'x': [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in results_list[0]],
+800        'fun': [float(i) for i in results_list[1]],
+801        'success': [bool(i) for i in results_list[2]],
+802        'nfev': results_list[3],
+803        't': results_list[4]
+804    }
+805
+806    with open(os.path.join(output_dir, f'optimization_results.yaml'), 'w') as file:
+807        yaml.dump(results_dict, file, sort_keys=False)
+
+ + +

Run maximum likelihood estimation based on a configuration file.

+ +

This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file. +Currently the "differential_evolution", "basinhopping", "dual_annealing", "shgo", "Nelder-Mead", "Powell", "CG", "L-BFGS-B", "COBYLA", "direct" and "SLSQP" +methods are supported.

+ +
Arguments:
+ +
    +
  • config_fname (str): Path to the configuration YAML file. This file should contain all +necessary settings and parameters for the run, including model file, +data file, solver arguments, Bayesian inference settings, optimization +settings, and random seed.
  • +
+ +
Outputs:
+ +
+
    +
  • Creates a new directory for the run, containing: +
      +
    • Copied model, data, and configuration files.
    • +
    • Log file with run details.
    • +
    • Flux trace plot.
    • +
    • Optimization plot.
    • +
    • Tuning results.
    • +
    • Optimization results.
    • +
  • +
+
+ +
Notes:
+ +
+
    +
  • The configuration file should be structured appropriately with all necessary fields (see example folder).
  • +
  • The function uses the scipy library for maximum likelihood estimation.
  • +
  • The function assumes the use of the RoadRunner model for simulations.
  • +
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • +
+
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/run_pocomc.html b/docs/run_pocomc.html new file mode 100644 index 0000000..74eb1ab --- /dev/null +++ b/docs/run_pocomc.html @@ -0,0 +1,789 @@ + + + + + + + run_pocomc API documentation + + + + + + + + + +
+
+

+run_pocomc

+ + + + + + +
  1import matplotlib.pyplot as plt 
+  2import numpy as np 
+  3import scipy as sp
+  4import pocomc as pmc 
+  5import yaml
+  6import ssme_function as ssme
+  7import tellurium as te
+  8import os 
+  9import shutil
+ 10import datetime
+ 11import logging
+ 12import corner
+ 13import arviz as az
+ 14import os 
+ 15import utility_functions as uf
+ 16
+ 17
+ 18os.environ['KMP_DUPLICATE_LIB_OK']='True'  # may be needed depending on installation of math library
+ 19
+ 20
+ 21def run_sampler(config_fname):
+ 22    """
+ 23    Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.
+ 24    
+ 25    This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.
+ 26    The sampler is used to infer model parameters from observed data. The function also provides
+ 27    preliminary data visualization of the MCMC results, including trace plots, corner plots, and 
+ 28    flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo 
+ 29    and Bridge Sampling methods.
+ 30    
+ 31    Args:
+ 32        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 33                            necessary settings and parameters for the run, including model file, 
+ 34                            data file, solver arguments, Bayesian inference settings, optimization 
+ 35                            settings, and random seed.
+ 36                            
+ 37    Outputs:
+ 38        - Creates a new directory for the run, containing:
+ 39            - Copied model, data, and configuration files.
+ 40            - Log file with run details.
+ 41            - MCMC run and trace plot.
+ 42            - MCMC corner plot.
+ 43            - Flux prediction plot.
+ 44            - Flat samples and log likelihoods as CSV files.
+ 45            
+ 46    Notes:
+ 47        - The configuration file should be structured appropriately with all necessary fields.
+ 48        - The function uses the `pmc` library for MCMC sampling.
+ 49        - The function uses the `pmc` library for corner plots.
+ 50        - The function assumes the use of the RoadRunner model for simulations.
+ 51        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 52    """
+ 53
+ 54    ##### Setup #####
+ 55    # Load the config.yaml file
+ 56    with open(config_fname, "r") as f:
+ 57        config = yaml.safe_load(f)
+ 58
+ 59    # get run configuration settings
+ 60    run_name = config["run_name"]
+ 61    model_file = config['model_file']
+ 62    data_file = config['data_file']
+ 63    simulation_kwargs = config['solver_arguments']
+ 64    inference_settings = config['bayesian_inference']
+ 65    optimization_settings = config['optimization']
+ 66    seed = config['random_seed']
+ 67    np.random.seed(seed)
+ 68    extended = config['bayesian_inference']['extended']
+ 69
+ 70    if extended == False:
+ 71        # get parameter names, values, and boundaries
+ 72        k_names = config['solver_arguments']['rate_constant_names']
+ 73        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 74        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 75        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 76        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 77        p_bounds = list(zip(p_lb,p_ub))
+ 78        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 79        k_nom = p_nom[:-1]
+ 80        sigma_nom = p_nom[-1]
+ 81
+ 82        # get assay initial conditions
+ 83        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 84        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 85        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 86    else:
+ 87        # get parameter names, values, and boundaries
+ 88        k_names = config['solver_arguments']['rate_constant_names']
+ 89        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 90        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 91        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 92        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 93        p_bounds = list(zip(p_lb,p_ub))
+ 94        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 95        k_nom = p_nom[:-6]
+ 96        sigma_nom = p_nom[-1]
+ 97
+ 98        # get assay initial conditions
+ 99        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+100        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+101        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+102
+103    # create new directory for runs
+104    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+105    output_fname = f'run_{run_name}_r{seed}'
+106    output_dir = output_fname + '_' + timestamp
+107    os.mkdir(output_dir)
+108    shutil.copy(model_file, output_dir)
+109    shutil.copy(data_file, output_dir)
+110    shutil.copy(config_fname, output_dir)
+111
+112    # create logging file 
+113    logger = logging.getLogger('log')
+114    logger.setLevel(logging.INFO)
+115    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+116    file_handler.setLevel(logging.INFO)
+117    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+118    file_handler.setFormatter(log_format)
+119    logger.addHandler(file_handler)
+120    logger.info(f"output_dir: {output_dir}")
+121    logger.info(f"seed: {seed}")
+122
+123    # load roadrunner model and simulate assay at nominal values
+124    rr_model = te.loadSBMLModel(model_file)
+125    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+126  
+127    y_nom = data_nom[1]  # (time, output)
+128
+129
+130    # Load data, plot trace, and calculate log-likelihood reference
+131    y_obs = np.loadtxt(data_file, delimiter=',')
+132    plt.figure(figsize=(8,6))
+133    plt.title('Current (A) vs simulation step')
+134    plt.plot(y_nom, label='y_nom')
+135    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+136    plt.legend()
+137    plt.savefig(os.path.join(output_dir, 'current_trace_nom.png'))
+138    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+139    logger.info(f"log_like_nom: {log_like_nom}")
+140    
+141    # set sampler parameters 
+142    n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers'])
+143    ess = config['bayesian_inference']['mcmc_algorithm_arguments']['ess']
+144    gamma = config['bayesian_inference']['mcmc_algorithm_arguments']['gamma']
+145    additional_samples = config['bayesian_inference']['mcmc_algorithm_arguments']['additional_samples']
+146    n_max = config['bayesian_inference']['mcmc_algorithm_arguments']['n_max']
+147    n_dim = len(p_names)
+148    thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin'])
+149    burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in'])
+150    logger.info(f"n_walkers: {n_walkers}")
+151    logger.info(f"ess: {ess}")
+152    logger.info(f"gamma: {gamma}")
+153    logger.info(f"additional samples: {additional_samples}")
+154    logger.info(f"n_max: {n_max}")
+155    logger.info(f"n_dim: {n_dim}")
+156    logger.info(f"thin: {thin}")
+157    logger.info(f"burn_in: {burn_in}")
+158  
+159    # get sampler starting points
+160    p_0 = uf.get_p0(p_bounds, n_walkers)
+161
+162
+163    ##### Run Sampler #####
+164
+165    if extended:
+166        sampler = pmc.Sampler(
+167            n_walkers,
+168            n_dim,
+169            log_likelihood=uf.log_like_extended,
+170            log_prior=uf.log_prior,
+171            log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs],
+172            log_prior_args= [p_lb, p_ub],
+173            infer_vectorization=False,
+174            bounds=np.array(p_bounds),
+175            random_state=seed,
+176            diagonal = True,
+177        )
+178    else:
+179        sampler = pmc.Sampler(
+180            n_walkers,
+181            n_dim,
+182            log_likelihood=uf.log_like,
+183            log_prior=uf.log_prior,
+184            log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs],
+185            log_prior_args= [p_lb, p_ub],
+186            infer_vectorization=False,
+187            bounds=np.array(p_bounds),
+188            random_state=seed,
+189            diagonal = True, 
+190        )
+191    logger.info(f"starting MCMC")
+192    sampler.run(prior_samples = p_0,
+193            ess = ess,
+194            gamma = gamma,
+195            n_max = n_max
+196           )
+197    logger.info(f"finished MCMC")
+198    sampler.add_samples(additional_samples)
+199    logger.info(f"finished additional MCMC")
+200
+201    ##### Store data #####
+202    results = sampler.results
+203    logger.info(f"total n steps: {np.sum(results['steps'])}")
+204
+205    np.savetxt(os.path.join(output_dir, f'log_evidence.csv'), results['logz'], delimiter=',')
+206    np.savetxt(os.path.join(output_dir, f'samples.csv'), results['samples'], delimiter=',')
+207    np.savetxt(os.path.join(output_dir, f'log_likelihood.csv'), results['loglikelihood'], delimiter=',')
+208    logger.info(f"saved data")
+209
+210    ##### bridge sampling for log evidence calculation #####
+211    logz_bs, logz_bs_error = sampler.bridge_sampling()
+212    logger.info(f"logZ estimated using SMC: {results['logz'][-1]}")
+213    logger.info(f"logZ estimated using Bridge Sampling: {logz_bs} +- {logz_bs_error}")
+214
+215
+216    ##### Preliminary data viz #####
+217    # run plot 
+218    fig = pmc.plotting.run(results)
+219    plt.savefig(os.path.join(output_dir, f'run_plot.png'))
+220    plt.close()
+221
+222    # trace plot
+223    fig = pmc.plotting.trace(results)
+224    plt.savefig(os.path.join(output_dir, f'trace_plot.png'))
+225    plt.close()
+226
+227    # corner plot 
+228    fig = pmc.plotting.corner(results,labels=p_names, truths=log10_p_nom, range=p_bounds)
+229    plt.savefig(os.path.join(output_dir, f'mcmc_corner.png'))
+230
+231    # flux prediction plot
+232    plt.figure(figsize=(10,8))
+233    inds = np.random.randint(len(results['samples']), size=100)
+234    for ind in inds:
+235        p_sample = results['samples'][ind]
+236        k_tmp = [10**i for i in p_sample[:-1]]
+237        res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+238        y_tmp = res_tmp[1]
+239        plt.plot(y_tmp, color='blue', alpha=0.1)
+240    plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred')
+241    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange')
+242    plt.legend(fontsize=14)
+243    plt.xlabel("time step")
+244    plt.ylabel("net ion influx (M/s)");
+245    plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png'))
+246    logger.info(f"plotted analysis")
+247
+248
+249
+250if __name__ == '__main__':
+251
+252    ##### Adjust this if needed ##### 
+253    config_fname = "./example/antiporter_1_1_12D_cycle1_config.yaml"
+254    run_sampler(config_fname)
+255    
+
+ + +
+
+ +
+ + def + run_sampler(config_fname): + + + +
+ +
 22def run_sampler(config_fname):
+ 23    """
+ 24    Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.
+ 25    
+ 26    This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.
+ 27    The sampler is used to infer model parameters from observed data. The function also provides
+ 28    preliminary data visualization of the MCMC results, including trace plots, corner plots, and 
+ 29    flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo 
+ 30    and Bridge Sampling methods.
+ 31    
+ 32    Args:
+ 33        config_fname (str): Path to the configuration YAML file. This file should contain all 
+ 34                            necessary settings and parameters for the run, including model file, 
+ 35                            data file, solver arguments, Bayesian inference settings, optimization 
+ 36                            settings, and random seed.
+ 37                            
+ 38    Outputs:
+ 39        - Creates a new directory for the run, containing:
+ 40            - Copied model, data, and configuration files.
+ 41            - Log file with run details.
+ 42            - MCMC run and trace plot.
+ 43            - MCMC corner plot.
+ 44            - Flux prediction plot.
+ 45            - Flat samples and log likelihoods as CSV files.
+ 46            
+ 47    Notes:
+ 48        - The configuration file should be structured appropriately with all necessary fields.
+ 49        - The function uses the `pmc` library for MCMC sampling.
+ 50        - The function uses the `pmc` library for corner plots.
+ 51        - The function assumes the use of the RoadRunner model for simulations.
+ 52        - The function handles both standard and extended models based on the 'extended' flag in the configuration.
+ 53    """
+ 54
+ 55    ##### Setup #####
+ 56    # Load the config.yaml file
+ 57    with open(config_fname, "r") as f:
+ 58        config = yaml.safe_load(f)
+ 59
+ 60    # get run configuration settings
+ 61    run_name = config["run_name"]
+ 62    model_file = config['model_file']
+ 63    data_file = config['data_file']
+ 64    simulation_kwargs = config['solver_arguments']
+ 65    inference_settings = config['bayesian_inference']
+ 66    optimization_settings = config['optimization']
+ 67    seed = config['random_seed']
+ 68    np.random.seed(seed)
+ 69    extended = config['bayesian_inference']['extended']
+ 70
+ 71    if extended == False:
+ 72        # get parameter names, values, and boundaries
+ 73        k_names = config['solver_arguments']['rate_constant_names']
+ 74        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 75        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 76        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 77        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 78        p_bounds = list(zip(p_lb,p_ub))
+ 79        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 80        k_nom = p_nom[:-1]
+ 81        sigma_nom = p_nom[-1]
+ 82
+ 83        # get assay initial conditions
+ 84        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+ 85        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+ 86        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+ 87    else:
+ 88        # get parameter names, values, and boundaries
+ 89        k_names = config['solver_arguments']['rate_constant_names']
+ 90        log10_p_nom = [d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 91        p_names = [d['name'] for d in config['bayesian_inference']['parameters']]
+ 92        p_lb = [d['bounds'][0] for d in config['bayesian_inference']['parameters']]
+ 93        p_ub = [d['bounds'][1] for d in config['bayesian_inference']['parameters']]
+ 94        p_bounds = list(zip(p_lb,p_ub))
+ 95        p_nom = [10**d['nominal'] for d in config['bayesian_inference']['parameters']]
+ 96        k_nom = p_nom[:-6]
+ 97        sigma_nom = p_nom[-1]
+ 98
+ 99        # get assay initial conditions
+100        initial_conditions = config['solver_arguments']['species_initial_concentrations']
+101        initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+102        buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+103
+104    # create new directory for runs
+105    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
+106    output_fname = f'run_{run_name}_r{seed}'
+107    output_dir = output_fname + '_' + timestamp
+108    os.mkdir(output_dir)
+109    shutil.copy(model_file, output_dir)
+110    shutil.copy(data_file, output_dir)
+111    shutil.copy(config_fname, output_dir)
+112
+113    # create logging file 
+114    logger = logging.getLogger('log')
+115    logger.setLevel(logging.INFO)
+116    file_handler = logging.FileHandler(os.path.join(output_dir, f'log_file.log'))
+117    file_handler.setLevel(logging.INFO)
+118    log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+119    file_handler.setFormatter(log_format)
+120    logger.addHandler(file_handler)
+121    logger.info(f"output_dir: {output_dir}")
+122    logger.info(f"seed: {seed}")
+123
+124    # load roadrunner model and simulate assay at nominal values
+125    rr_model = te.loadSBMLModel(model_file)
+126    data_nom = ssme.simulate_assay(rr_model, k_nom, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+127  
+128    y_nom = data_nom[1]  # (time, output)
+129
+130
+131    # Load data, plot trace, and calculate log-likelihood reference
+132    y_obs = np.loadtxt(data_file, delimiter=',')
+133    plt.figure(figsize=(8,6))
+134    plt.title('Current (A) vs simulation step')
+135    plt.plot(y_nom, label='y_nom')
+136    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3)
+137    plt.legend()
+138    plt.savefig(os.path.join(output_dir, 'current_trace_nom.png'))
+139    log_like_nom = uf.log_like(log10_p_nom, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+140    logger.info(f"log_like_nom: {log_like_nom}")
+141    
+142    # set sampler parameters 
+143    n_walkers = int(config['bayesian_inference']['mcmc_algorithm_arguments']['n_walkers'])
+144    ess = config['bayesian_inference']['mcmc_algorithm_arguments']['ess']
+145    gamma = config['bayesian_inference']['mcmc_algorithm_arguments']['gamma']
+146    additional_samples = config['bayesian_inference']['mcmc_algorithm_arguments']['additional_samples']
+147    n_max = config['bayesian_inference']['mcmc_algorithm_arguments']['n_max']
+148    n_dim = len(p_names)
+149    thin = int(config['bayesian_inference']['mcmc_algorithm_arguments']['thin'])
+150    burn_in = int(config['bayesian_inference']['mcmc_algorithm_arguments']['burn_in'])
+151    logger.info(f"n_walkers: {n_walkers}")
+152    logger.info(f"ess: {ess}")
+153    logger.info(f"gamma: {gamma}")
+154    logger.info(f"additional samples: {additional_samples}")
+155    logger.info(f"n_max: {n_max}")
+156    logger.info(f"n_dim: {n_dim}")
+157    logger.info(f"thin: {thin}")
+158    logger.info(f"burn_in: {burn_in}")
+159  
+160    # get sampler starting points
+161    p_0 = uf.get_p0(p_bounds, n_walkers)
+162
+163
+164    ##### Run Sampler #####
+165
+166    if extended:
+167        sampler = pmc.Sampler(
+168            n_walkers,
+169            n_dim,
+170            log_likelihood=uf.log_like_extended,
+171            log_prior=uf.log_prior,
+172            log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs],
+173            log_prior_args= [p_lb, p_ub],
+174            infer_vectorization=False,
+175            bounds=np.array(p_bounds),
+176            random_state=seed,
+177            diagonal = True,
+178        )
+179    else:
+180        sampler = pmc.Sampler(
+181            n_walkers,
+182            n_dim,
+183            log_likelihood=uf.log_like,
+184            log_prior=uf.log_prior,
+185            log_likelihood_args= [rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs],
+186            log_prior_args= [p_lb, p_ub],
+187            infer_vectorization=False,
+188            bounds=np.array(p_bounds),
+189            random_state=seed,
+190            diagonal = True, 
+191        )
+192    logger.info(f"starting MCMC")
+193    sampler.run(prior_samples = p_0,
+194            ess = ess,
+195            gamma = gamma,
+196            n_max = n_max
+197           )
+198    logger.info(f"finished MCMC")
+199    sampler.add_samples(additional_samples)
+200    logger.info(f"finished additional MCMC")
+201
+202    ##### Store data #####
+203    results = sampler.results
+204    logger.info(f"total n steps: {np.sum(results['steps'])}")
+205
+206    np.savetxt(os.path.join(output_dir, f'log_evidence.csv'), results['logz'], delimiter=',')
+207    np.savetxt(os.path.join(output_dir, f'samples.csv'), results['samples'], delimiter=',')
+208    np.savetxt(os.path.join(output_dir, f'log_likelihood.csv'), results['loglikelihood'], delimiter=',')
+209    logger.info(f"saved data")
+210
+211    ##### bridge sampling for log evidence calculation #####
+212    logz_bs, logz_bs_error = sampler.bridge_sampling()
+213    logger.info(f"logZ estimated using SMC: {results['logz'][-1]}")
+214    logger.info(f"logZ estimated using Bridge Sampling: {logz_bs} +- {logz_bs_error}")
+215
+216
+217    ##### Preliminary data viz #####
+218    # run plot 
+219    fig = pmc.plotting.run(results)
+220    plt.savefig(os.path.join(output_dir, f'run_plot.png'))
+221    plt.close()
+222
+223    # trace plot
+224    fig = pmc.plotting.trace(results)
+225    plt.savefig(os.path.join(output_dir, f'trace_plot.png'))
+226    plt.close()
+227
+228    # corner plot 
+229    fig = pmc.plotting.corner(results,labels=p_names, truths=log10_p_nom, range=p_bounds)
+230    plt.savefig(os.path.join(output_dir, f'mcmc_corner.png'))
+231
+232    # flux prediction plot
+233    plt.figure(figsize=(10,8))
+234    inds = np.random.randint(len(results['samples']), size=100)
+235    for ind in inds:
+236        p_sample = results['samples'][ind]
+237        k_tmp = [10**i for i in p_sample[:-1]]
+238        res_tmp = ssme.simulate_assay(rr_model, k_tmp, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+239        y_tmp = res_tmp[1]
+240        plt.plot(y_tmp, color='blue', alpha=0.1)
+241    plt.plot(y_tmp, color='blue', alpha=0.1, label='y_pred')
+242    plt.plot(y_obs, 'o', label='y_obs', alpha=0.3,color='orange')
+243    plt.legend(fontsize=14)
+244    plt.xlabel("time step")
+245    plt.ylabel("net ion influx (M/s)");
+246    plt.savefig(os.path.join(output_dir, f'net_ion_influx_pred.png'))
+247    logger.info(f"plotted analysis")
+
+ + +

Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.

+ +

This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file. +The sampler is used to infer model parameters from observed data. The function also provides +preliminary data visualization of the MCMC results, including trace plots, corner plots, and +flux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo +and Bridge Sampling methods.

+ +
Arguments:
+ +
    +
  • config_fname (str): Path to the configuration YAML file. This file should contain all +necessary settings and parameters for the run, including model file, +data file, solver arguments, Bayesian inference settings, optimization +settings, and random seed.
  • +
+ +
Outputs:
+ +
+
    +
  • Creates a new directory for the run, containing: +
      +
    • Copied model, data, and configuration files.
    • +
    • Log file with run details.
    • +
    • MCMC run and trace plot.
    • +
    • MCMC corner plot.
    • +
    • Flux prediction plot.
    • +
    • Flat samples and log likelihoods as CSV files.
    • +
  • +
+
+ +
Notes:
+ +
+
    +
  • The configuration file should be structured appropriately with all necessary fields.
  • +
  • The function uses the pmc library for MCMC sampling.
  • +
  • The function uses the pmc library for corner plots.
  • +
  • The function assumes the use of the RoadRunner model for simulations.
  • +
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • +
+
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/search.js b/docs/search.js new file mode 100644 index 0000000..ad819e1 --- /dev/null +++ b/docs/search.js @@ -0,0 +1,46 @@ +window.pdocSearch = (function(){ +/** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o

\n"}, {"fullname": "run_emcee.run_sampler", "modulename": "run_emcee", "qualname": "run_sampler", "kind": "function", "doc": "

Run a Bayesian sampler (emcee) based on a configuration file to infer model parameters.

\n\n

This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.\nThe sampler is used to infer model parameters from observed data. The function also provides\npreliminary data visualization of the MCMC results, including trace plots, corner plots, and \nflux prediction plots.

\n\n
Arguments:
\n\n
    \n
  • config_fname (str): Path to the configuration YAML file. This file should contain all \nnecessary settings and parameters for the run, including model file, \ndata file, solver arguments, Bayesian inference settings, optimization \nsettings, and random seed.
  • \n
\n\n
Outputs:
\n\n
\n
    \n
  • Creates a new directory for the run, containing:\n
      \n
    • Copied model, data, and configuration files.
    • \n
    • Log file with run details.
    • \n
    • MCMC trace plot.
    • \n
    • MCMC corner plot.
    • \n
    • Flux prediction plot.
    • \n
    • Flat samples and log likelihoods as CSV files.
    • \n
  • \n
\n
\n\n
Notes:
\n\n
\n
    \n
  • The configuration file should be structured appropriately with all necessary fields.
  • \n
  • The function uses the emcee library for MCMC sampling.
  • \n
  • The function uses the corner library for corner plots.
  • \n
  • The function assumes the use of the RoadRunner model for simulations.
  • \n
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • \n
\n
\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "run_optimizer", "modulename": "run_optimizer", "kind": "module", "doc": "

\n"}, {"fullname": "run_optimizer.run_optimizer", "modulename": "run_optimizer", "qualname": "run_optimizer", "kind": "function", "doc": "

Run maximum likelihood estimation based on a configuration file.

\n\n

This function will do both (random) hyperparameter tuning and replica maximum likelihood runs for an algorithm specified in the configuration file.\nCurrently the \"differential_evolution\", \"basinhopping\", \"dual_annealing\", \"shgo\", \"Nelder-Mead\", \"Powell\", \"CG\", \"L-BFGS-B\", \"COBYLA\", \"direct\" and \"SLSQP\"\nmethods are supported.

\n\n
Arguments:
\n\n
    \n
  • config_fname (str): Path to the configuration YAML file. This file should contain all \nnecessary settings and parameters for the run, including model file, \ndata file, solver arguments, Bayesian inference settings, optimization \nsettings, and random seed.
  • \n
\n\n
Outputs:
\n\n
\n
    \n
  • Creates a new directory for the run, containing:\n
      \n
    • Copied model, data, and configuration files.
    • \n
    • Log file with run details.
    • \n
    • Flux trace plot.
    • \n
    • Optimization plot.
    • \n
    • Tuning results.
    • \n
    • Optimization results.
    • \n
  • \n
\n
\n\n
Notes:
\n\n
\n
    \n
  • The configuration file should be structured appropriately with all necessary fields (see example folder).
  • \n
  • The function uses the scipy library for maximum likelihood estimation.
  • \n
  • The function assumes the use of the RoadRunner model for simulations.
  • \n
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • \n
\n
\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "run_pocomc", "modulename": "run_pocomc", "kind": "module", "doc": "

\n"}, {"fullname": "run_pocomc.run_sampler", "modulename": "run_pocomc", "qualname": "run_sampler", "kind": "function", "doc": "

Run a Bayesian sampler (pocoMC) based on a configuration file to infer model parameters.

\n\n

This function sets up and runs a Bayesian sampler (MCMC) based on the provided configuration file.\nThe sampler is used to infer model parameters from observed data. The function also provides\npreliminary data visualization of the MCMC results, including trace plots, corner plots, and \nflux prediction plots. Additionally, it calculates the log evidence using both Sequential Monte Carlo \nand Bridge Sampling methods.

\n\n
Arguments:
\n\n
    \n
  • config_fname (str): Path to the configuration YAML file. This file should contain all \nnecessary settings and parameters for the run, including model file, \ndata file, solver arguments, Bayesian inference settings, optimization \nsettings, and random seed.
  • \n
\n\n
Outputs:
\n\n
\n
    \n
  • Creates a new directory for the run, containing:\n
      \n
    • Copied model, data, and configuration files.
    • \n
    • Log file with run details.
    • \n
    • MCMC run and trace plot.
    • \n
    • MCMC corner plot.
    • \n
    • Flux prediction plot.
    • \n
    • Flat samples and log likelihoods as CSV files.
    • \n
  • \n
\n
\n\n
Notes:
\n\n
\n
    \n
  • The configuration file should be structured appropriately with all necessary fields.
  • \n
  • The function uses the pmc library for MCMC sampling.
  • \n
  • The function uses the pmc library for corner plots.
  • \n
  • The function assumes the use of the RoadRunner model for simulations.
  • \n
  • The function handles both standard and extended models based on the 'extended' flag in the configuration.
  • \n
\n
\n", "signature": "(config_fname):", "funcdef": "def"}, {"fullname": "ssme_function", "modulename": "ssme_function", "kind": "module", "doc": "

\n"}, {"fullname": "ssme_function.simulate_assay", "modulename": "ssme_function", "qualname": "simulate_assay", "kind": "function", "doc": "

Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.

\n\n
Arguments:
\n\n
    \n
  • rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle).
  • \n
  • rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
\n\n
Returns:
\n\n
\n

results (ndarray): Array of simulation results.

\n
\n", "signature": "(\trr,\trate_constants,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions", "modulename": "utility_functions", "kind": "module", "doc": "

\n"}, {"fullname": "utility_functions.get_p0", "modulename": "utility_functions", "qualname": "get_p0", "kind": "function", "doc": "

Generate initial samples uniformly distributed within given boundaries.

\n\n
Arguments:
\n\n
    \n
  • b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter.
  • \n
  • n (int): Number of samples to generate.
  • \n
\n\n
Returns:
\n\n
\n

ndarray: Array of initial samples.

\n
\n", "signature": "(b_list, n):", "funcdef": "def"}, {"fullname": "utility_functions.calc_normal_log_likelihood", "modulename": "utility_functions", "qualname": "calc_normal_log_likelihood", "kind": "function", "doc": "

Calculate the log likelihood of a Normal distribution.

\n\n
Arguments:
\n\n
    \n
  • y_obs (ndarray): Observed values.
  • \n
  • y_pred (ndarray): Predicted values.
  • \n
  • sigma (float): Standard deviation.
  • \n
\n\n
Returns:
\n\n
\n

float: Log likelihood value.

\n
\n", "signature": "(y_obs, y_pred, sigma):", "funcdef": "def"}, {"fullname": "utility_functions.log_like", "modulename": "utility_functions", "qualname": "log_like", "kind": "function", "doc": "

Calculate the log likelihood for given parameters and model.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters.\n
      \n
    • rate constant parameters are first
    • \n
    • sigma is -1 index
    • \n
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed data.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
\n\n
Returns:
\n\n
\n

float: Log likelihood value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions.log_like_extended", "modulename": "utility_functions", "qualname": "log_like_extended", "kind": "function", "doc": "

Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters \n
      \n
    • rate constant parameters are first
    • \n
    • buffer concentration scale factors (H_out and S_out) are indices -5 and -4
    • \n
    • transporter concentration scale factor is -3 index,
    • \n
    • bias factor is -2 index,
    • \n
    • sigma is -1 index
    • \n
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed data.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
\n\n
Returns:
\n\n
\n

float: Log likelihood value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments):", "funcdef": "def"}, {"fullname": "utility_functions.log_prior", "modulename": "utility_functions", "qualname": "log_prior", "kind": "function", "doc": "

Calculate the log prior for given parameters.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters.
  • \n
  • param_lb (list): Lower bounds for parameters.
  • \n
  • param_ub (list): Upper bounds for parameters.
  • \n
\n\n
Returns:
\n\n
\n

float: Log prior value.

\n
\n", "signature": "(params, param_lb, param_ub):", "funcdef": "def"}, {"fullname": "utility_functions.log_post_wrapper", "modulename": "utility_functions", "qualname": "log_post_wrapper", "kind": "function", "doc": "

Wrapper function to calculate the log posterior (log likelihood + log prior).

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters.
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed values.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
  • param_lb (list): Lower bounds for parameters.
  • \n
  • param_ub (list): Upper bounds for parameters.
  • \n
\n\n
Returns:
\n\n
\n

float: Log posterior value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.log_post_wrapper_extended", "modulename": "utility_functions", "qualname": "log_post_wrapper_extended", "kind": "function", "doc": "

Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters.
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed values.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
  • param_lb (list): Lower bounds for parameters.
  • \n
  • param_ub (list): Upper bounds for parameters.
  • \n
\n\n
Returns:
\n\n
\n

float: Log posterior value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.negative_log_likelihood_wrapper", "modulename": "utility_functions", "qualname": "negative_log_likelihood_wrapper", "kind": "function", "doc": "

Wrapper function to calculate the negative log likelihood.\nTechnically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters.
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed values.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
  • param_lb (list): Lower bounds for parameters.
  • \n
  • param_ub (list): Upper bounds for parameters.
  • \n
\n\n
Returns:
\n\n
\n

float: Negative log likelihood value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}, {"fullname": "utility_functions.negative_log_likelihood_wrapper_extended", "modulename": "utility_functions", "qualname": "negative_log_likelihood_wrapper_extended", "kind": "function", "doc": "

Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. \nTechnically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.

\n\n
Arguments:
\n\n
    \n
  • params (list): List of parameters including concentration uncertainty.
  • \n
  • rr_model (object): RoadRunner model object.
  • \n
  • y_obs (ndarray): Observed values.
  • \n
  • initial_conditions (list): List of initial species concentrations.
  • \n
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • \n
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • \n
  • solver_arguments (dict): Dictionary containing the following keys:\n
      \n
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • \n
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • \n
    • time_per_stage (float): Duration of each assay stage.
    • \n
    • n_points_per_stage (int): Number of data points per assay stage.
    • \n
    • buffer_species_names (list): Names of buffer species.
    • \n
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • \n
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • \n
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • \n
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • \n
    • species_labels (list): Labels of species for which initial conditions are provided.
    • \n
    • rate_constant_names (list): Names of rate constants in the model.
    • \n
  • \n
  • param_lb (list): Lower bounds for parameters.
  • \n
  • param_ub (list): Upper bounds for parameters.
  • \n
\n\n
Returns:
\n\n
\n

float: Negative log likelihood value.

\n
\n", "signature": "(\tparams,\trr_model,\ty_obs,\tinitial_conditions,\tinitial_conditions_scale,\tbuffer_concentration_scale,\tsolver_arguments,\tparam_lb,\tparam_ub):", "funcdef": "def"}]; + + // mirrored in build-search-index.js (part 1) + // Also split on html tags. this is a cheap heuristic, but good enough. + elasticlunr.tokenizer.setSeperator(/[\s\-.;&_'"=,()]+|<[^>]*>/); + + let searchIndex; + if (docs._isPrebuiltIndex) { + console.info("using precompiled search index"); + searchIndex = elasticlunr.Index.load(docs); + } else { + console.time("building search index"); + // mirrored in build-search-index.js (part 2) + searchIndex = elasticlunr(function () { + this.pipeline.remove(elasticlunr.stemmer); + this.pipeline.remove(elasticlunr.stopWordFilter); + this.addField("qualname"); + this.addField("fullname"); + this.addField("annotation"); + this.addField("default_value"); + this.addField("signature"); + this.addField("bases"); + this.addField("doc"); + this.setRef("fullname"); + }); + for (let doc of docs) { + searchIndex.addDoc(doc); + } + console.timeEnd("building search index"); + } + + return (term) => searchIndex.search(term, { + fields: { + qualname: {boost: 4}, + fullname: {boost: 2}, + annotation: {boost: 2}, + default_value: {boost: 2}, + signature: {boost: 2}, + bases: {boost: 2}, + doc: {boost: 1}, + }, + expand: true + }); +})(); \ No newline at end of file diff --git a/docs/ssme_function.html b/docs/ssme_function.html new file mode 100644 index 0000000..55ce90a --- /dev/null +++ b/docs/ssme_function.html @@ -0,0 +1,429 @@ + + + + + + + ssme_function API documentation + + + + + + + + + +
+
+

+ssme_function

+ + + + + + +
 1import roadrunner
+ 2import numpy as np
+ 3
+ 4
+ 5def simulate_assay(rr, rate_constants, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):
+ 6    """
+ 7    Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.
+ 8
+ 9    Args:
+10        rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle).
+11        rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments.
+12        initial_conditions (list): List of initial species concentrations.
+13        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+14        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+15        solver_arguments (dict): Dictionary containing the following keys:
+16            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+17            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+18            - time_per_stage (float): Duration of each assay stage.
+19            - n_points_per_stage (int): Number of data points per assay stage.
+20            - buffer_species_names (list): Names of buffer species.
+21            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+22            - solver_relative_tolerance (float): Relative tolerance for the solver.
+23            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+24            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+25            - species_labels (list): Labels of species for which initial conditions are provided.
+26            - rate_constant_names (list): Names of rate constants in the model.
+27
+28    Returns:
+29        results (ndarray): Array of simulation results.
+30    """
+31    # Load simulation settings
+32    roadrunner_solver_output_selections = solver_arguments['roadrunner_solver_output_selections']
+33    buffer_concentration_sequence = solver_arguments['buffer_concentration_sequence']
+34    time_per_stage = solver_arguments['time_per_stage']
+35    n_points_per_stage = solver_arguments['n_points_per_stage']
+36    buffer_species_names = solver_arguments['buffer_species_names']
+37    absolute_tolerance = solver_arguments['solver_absolute_tolerance']
+38    relative_tolerance = solver_arguments['solver_relative_tolerance']
+39    remove_first_n = solver_arguments['remove_first_n']
+40    remove_after_n = solver_arguments['remove_after_n']
+41    species_labels = solver_arguments['species_labels']
+42    rate_constant_names = solver_arguments['rate_constant_names']
+43
+44    # Reset roadrunner model and set integrator tolerances
+45    rr.resetToOrigin()
+46    rr.integrator.absolute_tolerance = absolute_tolerance
+47    rr.integrator.relative_tolerance = relative_tolerance
+48
+49    # Update model rate constants
+50    parameters = dict(zip(rate_constant_names, rate_constants))
+51    for name, value in parameters.items():
+52        rr[name] = value
+53
+54    # Run synthetic SSME assay
+55    ## iterate through buffer concentration sequence for assay
+56    results = []
+57    for i, solution in enumerate(buffer_concentration_sequence):  # Update buffer solution for each assay stage
+58        for j, label in enumerate(buffer_species_names):
+59            buffer_concentration_j = solution[j]*buffer_concentration_scale[j]  # scale buffer concentrations (obs = pred*scale_factor)
+60            setattr(rr, label, buffer_concentration_j)
+61        if i==0:  # Set initial state concentrations for stage 1 (equilibration)
+62            for j, label in enumerate(species_labels):
+63                initial_concentration_j = initial_conditions[j]*initial_conditions_scale[j]
+64                setattr(rr, label, initial_concentration_j)
+65            rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections)  # Don't store equilibration results
+66        else:
+67            tmp = rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections)
+68            results.append(tmp[remove_first_n:remove_after_n])
+69    return np.vstack(results).T
+
+ + +
+
+ +
+ + def + simulate_assay( rr, rate_constants, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + + + +
+ +
 6def simulate_assay(rr, rate_constants, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):
+ 7    """
+ 8    Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.
+ 9
+10    Args:
+11        rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle).
+12        rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments.
+13        initial_conditions (list): List of initial species concentrations.
+14        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+15        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+16        solver_arguments (dict): Dictionary containing the following keys:
+17            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+18            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+19            - time_per_stage (float): Duration of each assay stage.
+20            - n_points_per_stage (int): Number of data points per assay stage.
+21            - buffer_species_names (list): Names of buffer species.
+22            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+23            - solver_relative_tolerance (float): Relative tolerance for the solver.
+24            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+25            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+26            - species_labels (list): Labels of species for which initial conditions are provided.
+27            - rate_constant_names (list): Names of rate constants in the model.
+28
+29    Returns:
+30        results (ndarray): Array of simulation results.
+31    """
+32    # Load simulation settings
+33    roadrunner_solver_output_selections = solver_arguments['roadrunner_solver_output_selections']
+34    buffer_concentration_sequence = solver_arguments['buffer_concentration_sequence']
+35    time_per_stage = solver_arguments['time_per_stage']
+36    n_points_per_stage = solver_arguments['n_points_per_stage']
+37    buffer_species_names = solver_arguments['buffer_species_names']
+38    absolute_tolerance = solver_arguments['solver_absolute_tolerance']
+39    relative_tolerance = solver_arguments['solver_relative_tolerance']
+40    remove_first_n = solver_arguments['remove_first_n']
+41    remove_after_n = solver_arguments['remove_after_n']
+42    species_labels = solver_arguments['species_labels']
+43    rate_constant_names = solver_arguments['rate_constant_names']
+44
+45    # Reset roadrunner model and set integrator tolerances
+46    rr.resetToOrigin()
+47    rr.integrator.absolute_tolerance = absolute_tolerance
+48    rr.integrator.relative_tolerance = relative_tolerance
+49
+50    # Update model rate constants
+51    parameters = dict(zip(rate_constant_names, rate_constants))
+52    for name, value in parameters.items():
+53        rr[name] = value
+54
+55    # Run synthetic SSME assay
+56    ## iterate through buffer concentration sequence for assay
+57    results = []
+58    for i, solution in enumerate(buffer_concentration_sequence):  # Update buffer solution for each assay stage
+59        for j, label in enumerate(buffer_species_names):
+60            buffer_concentration_j = solution[j]*buffer_concentration_scale[j]  # scale buffer concentrations (obs = pred*scale_factor)
+61            setattr(rr, label, buffer_concentration_j)
+62        if i==0:  # Set initial state concentrations for stage 1 (equilibration)
+63            for j, label in enumerate(species_labels):
+64                initial_concentration_j = initial_conditions[j]*initial_conditions_scale[j]
+65                setattr(rr, label, initial_concentration_j)
+66            rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections)  # Don't store equilibration results
+67        else:
+68            tmp = rr.simulate(i,i+time_per_stage,n_points_per_stage, selections=roadrunner_solver_output_selections)
+69            results.append(tmp[remove_first_n:remove_after_n])
+70    return np.vstack(results).T
+
+ + +

Simulate the SSME assay using the provided rate constants, initial conditions, and solver arguments.

+ +
Arguments:
+ +
    +
  • rr (object): RoadRunner object for simulating biochemical network models (e.g. transporter cycle).
  • +
  • rate_constants (list): List of rate constant values. Must be in the same order as rate constant names in solver arguments.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
+ +
Returns:
+ +
+

results (ndarray): Array of simulation results.

+
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/utility_functions.html b/docs/utility_functions.html new file mode 100644 index 0000000..7978bb1 --- /dev/null +++ b/docs/utility_functions.html @@ -0,0 +1,1239 @@ + + + + + + + utility_functions API documentation + + + + + + + + + +
+
+

+utility_functions

+ + + + + + +
  1import numpy as np 
+  2import scipy as sp
+  3import ssme_function as ssme
+  4import time as time
+  5
+  6
+  7def get_p0(b_list, n):
+  8    """
+  9    Generate initial samples uniformly distributed within given boundaries.
+ 10    
+ 11    Args:
+ 12        b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter.
+ 13        n (int): Number of samples to generate.
+ 14        
+ 15    Returns:
+ 16        ndarray: Array of initial samples.
+ 17    """
+ 18    p0_array = np.transpose(np.array([np.random.uniform(b[0],b[1],n) for b in b_list]))  # re-arrange array for sampler
+ 19    return p0_array
+ 20
+ 21
+ 22def calc_normal_log_likelihood(y_obs, y_pred, sigma):
+ 23    """
+ 24    Calculate the log likelihood of a Normal distribution.
+ 25    
+ 26    Args:
+ 27        y_obs (ndarray): Observed values.
+ 28        y_pred (ndarray): Predicted values.
+ 29        sigma (float): Standard deviation.
+ 30        
+ 31    Returns:
+ 32        float: Log likelihood value.
+ 33    """
+ 34    y = sp.stats.norm.logpdf(y_obs, y_pred, sigma).sum()
+ 35    return y
+ 36
+ 37
+ 38def log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):  
+ 39    """
+ 40    Calculate the log likelihood for given parameters and model.
+ 41    
+ 42    Args:
+ 43        params (list): List of parameters.
+ 44         - rate constant parameters are first
+ 45         - sigma is -1 index
+ 46        rr_model (object): RoadRunner model object.
+ 47        y_obs (ndarray): Observed data.
+ 48        initial_conditions (list): List of initial species concentrations.
+ 49        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+ 50        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+ 51        solver_arguments (dict): Dictionary containing the following keys:
+ 52            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+ 53            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+ 54            - time_per_stage (float): Duration of each assay stage.
+ 55            - n_points_per_stage (int): Number of data points per assay stage.
+ 56            - buffer_species_names (list): Names of buffer species.
+ 57            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+ 58            - solver_relative_tolerance (float): Relative tolerance for the solver.
+ 59            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+ 60            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+ 61            - species_labels (list): Labels of species for which initial conditions are provided.
+ 62            - rate_constant_names (list): Names of rate constants in the model.
+ 63        
+ 64    Returns:
+ 65        float: Log likelihood value.
+ 66    """ 
+ 67    k = [10**i for i in params[:-1]]
+ 68    sigma = 10**params[-1]
+ 69    try:
+ 70        res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+ 71        y_pred = res[1]
+ 72        log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma)
+ 73        return log_like
+ 74    except:
+ 75        return -1e100  # arbitrarily large negative number --> 0 probability
+ 76
+ 77
+ 78def log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):   
+ 79    """
+ 80    Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.
+ 81    
+ 82    Args:
+ 83        params (list): List of parameters 
+ 84         - rate constant parameters are first
+ 85         - buffer concentration scale factors (H_out and S_out) are indices -5 and -4 
+ 86         - transporter concentration scale factor is -3  index, 
+ 87         - bias factor is -2 index, 
+ 88         - sigma is -1 index
+ 89        rr_model (object): RoadRunner model object.
+ 90        y_obs (ndarray): Observed data.
+ 91        initial_conditions (list): List of initial species concentrations.
+ 92        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+ 93        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+ 94        solver_arguments (dict): Dictionary containing the following keys:
+ 95            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+ 96            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+ 97            - time_per_stage (float): Duration of each assay stage.
+ 98            - n_points_per_stage (int): Number of data points per assay stage.
+ 99            - buffer_species_names (list): Names of buffer species.
+100            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+101            - solver_relative_tolerance (float): Relative tolerance for the solver.
+102            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+103            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+104            - species_labels (list): Labels of species for which initial conditions are provided.
+105            - rate_constant_names (list): Names of rate constants in the model.
+106        
+107    Returns:
+108        float: Log likelihood value.
+109    """
+110    k = [10**i for i in params[:-5]]
+111    sigma = 10**params[-1]
+112    bias = params[-2]
+113    
+114    # get concentration uncertainity
+115    H_out_buffer_scale = params[-5]
+116    S_out_buffer_scale = params[-4]
+117    initial_transporter_concentration_scale = params[-3]
+118
+119    # set concentration uncertainity
+120    buffer_concentration_scale[0] = H_out_buffer_scale
+121    buffer_concentration_scale[1] = S_out_buffer_scale
+122    initial_conditions_scale[0] = initial_transporter_concentration_scale
+123    try:
+124        res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+125        y_pred = bias*res[1]
+126        log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma)
+127        return log_like
+128    except:
+129        return -1e100  # arbitrarily large negative number --> 0 probability
+130
+131
+132def log_prior(params, param_lb, param_ub):  
+133    """
+134    Calculate the log prior for given parameters.
+135    
+136    Args:
+137        params (list): List of parameters.
+138        param_lb (list): Lower bounds for parameters.
+139        param_ub (list): Upper bounds for parameters.
+140        
+141    Returns:
+142        float: Log prior value.
+143    """
+144    if ((param_lb < params) & (params < param_ub)).all():
+145        return 0
+146    else:
+147        return -1e100  # arbitrarily large negative number --> 0 probability
+148   
+149
+150def log_post_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+151    """
+152    Wrapper function to calculate the log posterior (log likelihood + log prior).
+153    
+154    Args:
+155        params (list): List of parameters.
+156        rr_model (object): RoadRunner model object.
+157        y_obs (ndarray): Observed values.
+158        initial_conditions (list): List of initial species concentrations.
+159        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+160        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+161        solver_arguments (dict): Dictionary containing the following keys:
+162            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+163            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+164            - time_per_stage (float): Duration of each assay stage.
+165            - n_points_per_stage (int): Number of data points per assay stage.
+166            - buffer_species_names (list): Names of buffer species.
+167            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+168            - solver_relative_tolerance (float): Relative tolerance for the solver.
+169            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+170            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+171            - species_labels (list): Labels of species for which initial conditions are provided.
+172            - rate_constant_names (list): Names of rate constants in the model.
+173        param_lb (list): Lower bounds for parameters.
+174        param_ub (list): Upper bounds for parameters.
+175        
+176    Returns:
+177        float: Log posterior value.
+178    """
+179    logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+180    logpr = log_prior(params, param_lb, param_ub)
+181    return logl+logpr # arbitrarily large negative number --> 0 probability
+182
+183
+184def log_post_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+185    """
+186    Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.
+187    
+188    Args:
+189        params (list): List of parameters.
+190        rr_model (object): RoadRunner model object.
+191        y_obs (ndarray): Observed values.
+192        initial_conditions (list): List of initial species concentrations.
+193        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+194        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+195        solver_arguments (dict): Dictionary containing the following keys:
+196            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+197            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+198            - time_per_stage (float): Duration of each assay stage.
+199            - n_points_per_stage (int): Number of data points per assay stage.
+200            - buffer_species_names (list): Names of buffer species.
+201            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+202            - solver_relative_tolerance (float): Relative tolerance for the solver.
+203            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+204            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+205            - species_labels (list): Labels of species for which initial conditions are provided.
+206            - rate_constant_names (list): Names of rate constants in the model.
+207        param_lb (list): Lower bounds for parameters.
+208        param_ub (list): Upper bounds for parameters.
+209        
+210    Returns:
+211        float: Log posterior value.
+212    """
+213
+214    logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+215    logpr = log_prior(params, param_lb, param_ub)
+216    return logl+logpr
+217
+218
+219def negative_log_likelihood_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+220    """
+221    Wrapper function to calculate the negative log likelihood.
+222    Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+223    
+224    Args:
+225        params (list): List of parameters.
+226        rr_model (object): RoadRunner model object.
+227        y_obs (ndarray): Observed values.
+228        initial_conditions (list): List of initial species concentrations.
+229        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+230        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+231        solver_arguments (dict): Dictionary containing the following keys:
+232            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+233            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+234            - time_per_stage (float): Duration of each assay stage.
+235            - n_points_per_stage (int): Number of data points per assay stage.
+236            - buffer_species_names (list): Names of buffer species.
+237            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+238            - solver_relative_tolerance (float): Relative tolerance for the solver.
+239            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+240            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+241            - species_labels (list): Labels of species for which initial conditions are provided.
+242            - rate_constant_names (list): Names of rate constants in the model.
+243        param_lb (list): Lower bounds for parameters.
+244        param_ub (list): Upper bounds for parameters.
+245        
+246    Returns:
+247        float: Negative log likelihood value.
+248    """
+249    logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+250    logpr = log_prior(params, param_lb, param_ub)
+251    return -1*(logl+logpr) 
+252
+253
+254def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+255    """
+256    Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. 
+257    Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+258    
+259    Args:
+260        params (list): List of parameters including concentration uncertainty.
+261        rr_model (object): RoadRunner model object.
+262        y_obs (ndarray): Observed values.
+263        initial_conditions (list): List of initial species concentrations.
+264        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+265        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+266        solver_arguments (dict): Dictionary containing the following keys:
+267            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+268            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+269            - time_per_stage (float): Duration of each assay stage.
+270            - n_points_per_stage (int): Number of data points per assay stage.
+271            - buffer_species_names (list): Names of buffer species.
+272            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+273            - solver_relative_tolerance (float): Relative tolerance for the solver.
+274            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+275            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+276            - species_labels (list): Labels of species for which initial conditions are provided.
+277            - rate_constant_names (list): Names of rate constants in the model.
+278        param_lb (list): Lower bounds for parameters.
+279        param_ub (list): Upper bounds for parameters.
+280        
+281    Returns:
+282        float: Negative log likelihood value.
+283    """
+284    logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+285    logpr = log_prior(params, param_lb, param_ub)
+286    return -1*(logl+logpr)
+
+ + +
+
+ +
+ + def + get_p0(b_list, n): + + + +
+ +
 8def get_p0(b_list, n):
+ 9    """
+10    Generate initial samples uniformly distributed within given boundaries.
+11    
+12    Args:
+13        b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter.
+14        n (int): Number of samples to generate.
+15        
+16    Returns:
+17        ndarray: Array of initial samples.
+18    """
+19    p0_array = np.transpose(np.array([np.random.uniform(b[0],b[1],n) for b in b_list]))  # re-arrange array for sampler
+20    return p0_array
+
+ + +

Generate initial samples uniformly distributed within given boundaries.

+ +
Arguments:
+ +
    +
  • b_list (list of lists): Each sublist contains [lower bound, upper bound] for a parameter.
  • +
  • n (int): Number of samples to generate.
  • +
+ +
Returns:
+ +
+

ndarray: Array of initial samples.

+
+
+ + +
+
+ +
+ + def + calc_normal_log_likelihood(y_obs, y_pred, sigma): + + + +
+ +
23def calc_normal_log_likelihood(y_obs, y_pred, sigma):
+24    """
+25    Calculate the log likelihood of a Normal distribution.
+26    
+27    Args:
+28        y_obs (ndarray): Observed values.
+29        y_pred (ndarray): Predicted values.
+30        sigma (float): Standard deviation.
+31        
+32    Returns:
+33        float: Log likelihood value.
+34    """
+35    y = sp.stats.norm.logpdf(y_obs, y_pred, sigma).sum()
+36    return y
+
+ + +

Calculate the log likelihood of a Normal distribution.

+ +
Arguments:
+ +
    +
  • y_obs (ndarray): Observed values.
  • +
  • y_pred (ndarray): Predicted values.
  • +
  • sigma (float): Standard deviation.
  • +
+ +
Returns:
+ +
+

float: Log likelihood value.

+
+
+ + +
+
+ +
+ + def + log_like( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + + + +
+ +
39def log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):  
+40    """
+41    Calculate the log likelihood for given parameters and model.
+42    
+43    Args:
+44        params (list): List of parameters.
+45         - rate constant parameters are first
+46         - sigma is -1 index
+47        rr_model (object): RoadRunner model object.
+48        y_obs (ndarray): Observed data.
+49        initial_conditions (list): List of initial species concentrations.
+50        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+51        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+52        solver_arguments (dict): Dictionary containing the following keys:
+53            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+54            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+55            - time_per_stage (float): Duration of each assay stage.
+56            - n_points_per_stage (int): Number of data points per assay stage.
+57            - buffer_species_names (list): Names of buffer species.
+58            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+59            - solver_relative_tolerance (float): Relative tolerance for the solver.
+60            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+61            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+62            - species_labels (list): Labels of species for which initial conditions are provided.
+63            - rate_constant_names (list): Names of rate constants in the model.
+64        
+65    Returns:
+66        float: Log likelihood value.
+67    """ 
+68    k = [10**i for i in params[:-1]]
+69    sigma = 10**params[-1]
+70    try:
+71        res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+72        y_pred = res[1]
+73        log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma)
+74        return log_like
+75    except:
+76        return -1e100  # arbitrarily large negative number --> 0 probability
+
+ + +

Calculate the log likelihood for given parameters and model.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters. +
      +
    • rate constant parameters are first
    • +
    • sigma is -1 index
    • +
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed data.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
+ +
Returns:
+ +
+

float: Log likelihood value.

+
+
+ + +
+
+ +
+ + def + log_like_extended( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments): + + + +
+ +
 79def log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments):   
+ 80    """
+ 81    Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.
+ 82    
+ 83    Args:
+ 84        params (list): List of parameters 
+ 85         - rate constant parameters are first
+ 86         - buffer concentration scale factors (H_out and S_out) are indices -5 and -4 
+ 87         - transporter concentration scale factor is -3  index, 
+ 88         - bias factor is -2 index, 
+ 89         - sigma is -1 index
+ 90        rr_model (object): RoadRunner model object.
+ 91        y_obs (ndarray): Observed data.
+ 92        initial_conditions (list): List of initial species concentrations.
+ 93        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+ 94        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+ 95        solver_arguments (dict): Dictionary containing the following keys:
+ 96            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+ 97            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+ 98            - time_per_stage (float): Duration of each assay stage.
+ 99            - n_points_per_stage (int): Number of data points per assay stage.
+100            - buffer_species_names (list): Names of buffer species.
+101            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+102            - solver_relative_tolerance (float): Relative tolerance for the solver.
+103            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+104            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+105            - species_labels (list): Labels of species for which initial conditions are provided.
+106            - rate_constant_names (list): Names of rate constants in the model.
+107        
+108    Returns:
+109        float: Log likelihood value.
+110    """
+111    k = [10**i for i in params[:-5]]
+112    sigma = 10**params[-1]
+113    bias = params[-2]
+114    
+115    # get concentration uncertainity
+116    H_out_buffer_scale = params[-5]
+117    S_out_buffer_scale = params[-4]
+118    initial_transporter_concentration_scale = params[-3]
+119
+120    # set concentration uncertainity
+121    buffer_concentration_scale[0] = H_out_buffer_scale
+122    buffer_concentration_scale[1] = S_out_buffer_scale
+123    initial_conditions_scale[0] = initial_transporter_concentration_scale
+124    try:
+125        res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+126        y_pred = bias*res[1]
+127        log_like = calc_normal_log_likelihood(y_obs, y_pred, sigma)
+128        return log_like
+129    except:
+130        return -1e100  # arbitrarily large negative number --> 0 probability
+
+ + +

Calculate the extended log likelihood considering additional nuisance parameters - buffer solution and protein concentations, bias.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters +
      +
    • rate constant parameters are first
    • +
    • buffer concentration scale factors (H_out and S_out) are indices -5 and -4
    • +
    • transporter concentration scale factor is -3 index,
    • +
    • bias factor is -2 index,
    • +
    • sigma is -1 index
    • +
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed data.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
+ +
Returns:
+ +
+

float: Log likelihood value.

+
+
+ + +
+
+ +
+ + def + log_prior(params, param_lb, param_ub): + + + +
+ +
133def log_prior(params, param_lb, param_ub):  
+134    """
+135    Calculate the log prior for given parameters.
+136    
+137    Args:
+138        params (list): List of parameters.
+139        param_lb (list): Lower bounds for parameters.
+140        param_ub (list): Upper bounds for parameters.
+141        
+142    Returns:
+143        float: Log prior value.
+144    """
+145    if ((param_lb < params) & (params < param_ub)).all():
+146        return 0
+147    else:
+148        return -1e100  # arbitrarily large negative number --> 0 probability
+
+ + +

Calculate the log prior for given parameters.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters.
  • +
  • param_lb (list): Lower bounds for parameters.
  • +
  • param_ub (list): Upper bounds for parameters.
  • +
+ +
Returns:
+ +
+

float: Log prior value.

+
+
+ + +
+
+ +
+ + def + log_post_wrapper( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): + + + +
+ +
151def log_post_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+152    """
+153    Wrapper function to calculate the log posterior (log likelihood + log prior).
+154    
+155    Args:
+156        params (list): List of parameters.
+157        rr_model (object): RoadRunner model object.
+158        y_obs (ndarray): Observed values.
+159        initial_conditions (list): List of initial species concentrations.
+160        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+161        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+162        solver_arguments (dict): Dictionary containing the following keys:
+163            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+164            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+165            - time_per_stage (float): Duration of each assay stage.
+166            - n_points_per_stage (int): Number of data points per assay stage.
+167            - buffer_species_names (list): Names of buffer species.
+168            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+169            - solver_relative_tolerance (float): Relative tolerance for the solver.
+170            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+171            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+172            - species_labels (list): Labels of species for which initial conditions are provided.
+173            - rate_constant_names (list): Names of rate constants in the model.
+174        param_lb (list): Lower bounds for parameters.
+175        param_ub (list): Upper bounds for parameters.
+176        
+177    Returns:
+178        float: Log posterior value.
+179    """
+180    logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+181    logpr = log_prior(params, param_lb, param_ub)
+182    return logl+logpr # arbitrarily large negative number --> 0 probability
+
+ + +

Wrapper function to calculate the log posterior (log likelihood + log prior).

+ +
Arguments:
+ +
    +
  • params (list): List of parameters.
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed values.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
  • param_lb (list): Lower bounds for parameters.
  • +
  • param_ub (list): Upper bounds for parameters.
  • +
+ +
Returns:
+ +
+

float: Log posterior value.

+
+
+ + +
+
+ +
+ + def + log_post_wrapper_extended( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): + + + +
+ +
185def log_post_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+186    """
+187    Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.
+188    
+189    Args:
+190        params (list): List of parameters.
+191        rr_model (object): RoadRunner model object.
+192        y_obs (ndarray): Observed values.
+193        initial_conditions (list): List of initial species concentrations.
+194        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+195        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+196        solver_arguments (dict): Dictionary containing the following keys:
+197            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+198            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+199            - time_per_stage (float): Duration of each assay stage.
+200            - n_points_per_stage (int): Number of data points per assay stage.
+201            - buffer_species_names (list): Names of buffer species.
+202            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+203            - solver_relative_tolerance (float): Relative tolerance for the solver.
+204            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+205            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+206            - species_labels (list): Labels of species for which initial conditions are provided.
+207            - rate_constant_names (list): Names of rate constants in the model.
+208        param_lb (list): Lower bounds for parameters.
+209        param_ub (list): Upper bounds for parameters.
+210        
+211    Returns:
+212        float: Log posterior value.
+213    """
+214
+215    logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+216    logpr = log_prior(params, param_lb, param_ub)
+217    return logl+logpr
+
+ + +

Wrapper function to calculate the log posterior (log likelihood + log prior) considering considering additional nuisance parameters - buffer solution and protein concentations, bias.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters.
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed values.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
  • param_lb (list): Lower bounds for parameters.
  • +
  • param_ub (list): Upper bounds for parameters.
  • +
+ +
Returns:
+ +
+

float: Log posterior value.

+
+
+ + +
+
+ +
+ + def + negative_log_likelihood_wrapper( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): + + + +
+ +
220def negative_log_likelihood_wrapper(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+221    """
+222    Wrapper function to calculate the negative log likelihood.
+223    Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+224    
+225    Args:
+226        params (list): List of parameters.
+227        rr_model (object): RoadRunner model object.
+228        y_obs (ndarray): Observed values.
+229        initial_conditions (list): List of initial species concentrations.
+230        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+231        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+232        solver_arguments (dict): Dictionary containing the following keys:
+233            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+234            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+235            - time_per_stage (float): Duration of each assay stage.
+236            - n_points_per_stage (int): Number of data points per assay stage.
+237            - buffer_species_names (list): Names of buffer species.
+238            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+239            - solver_relative_tolerance (float): Relative tolerance for the solver.
+240            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+241            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+242            - species_labels (list): Labels of species for which initial conditions are provided.
+243            - rate_constant_names (list): Names of rate constants in the model.
+244        param_lb (list): Lower bounds for parameters.
+245        param_ub (list): Upper bounds for parameters.
+246        
+247    Returns:
+248        float: Negative log likelihood value.
+249    """
+250    logl = log_like(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+251    logpr = log_prior(params, param_lb, param_ub)
+252    return -1*(logl+logpr) 
+
+ + +

Wrapper function to calculate the negative log likelihood. +Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters.
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed values.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
  • param_lb (list): Lower bounds for parameters.
  • +
  • param_ub (list): Upper bounds for parameters.
  • +
+ +
Returns:
+ +
+

float: Negative log likelihood value.

+
+
+ + +
+
+ +
+ + def + negative_log_likelihood_wrapper_extended( params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub): + + + +
+ +
255def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments, param_lb, param_ub):
+256    """
+257    Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. 
+258    Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.
+259    
+260    Args:
+261        params (list): List of parameters including concentration uncertainty.
+262        rr_model (object): RoadRunner model object.
+263        y_obs (ndarray): Observed values.
+264        initial_conditions (list): List of initial species concentrations.
+265        initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
+266        buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
+267        solver_arguments (dict): Dictionary containing the following keys:
+268            - roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
+269            - buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
+270            - time_per_stage (float): Duration of each assay stage.
+271            - n_points_per_stage (int): Number of data points per assay stage.
+272            - buffer_species_names (list): Names of buffer species.
+273            - solver_absolute_tolerance (float): Absolute tolerance for the solver.
+274            - solver_relative_tolerance (float): Relative tolerance for the solver.
+275            - remove_first_n (int): Number of initial data points to exclude from the results for each stage.
+276            - remove_after_n (int): Index after which to stop including data points in the results for each stage.
+277            - species_labels (list): Labels of species for which initial conditions are provided.
+278            - rate_constant_names (list): Names of rate constants in the model.
+279        param_lb (list): Lower bounds for parameters.
+280        param_ub (list): Upper bounds for parameters.
+281        
+282    Returns:
+283        float: Negative log likelihood value.
+284    """
+285    logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
+286    logpr = log_prior(params, param_lb, param_ub)
+287    return -1*(logl+logpr)
+
+ + +

Wrapper function to calculate the extended negative log likelihood considering concentration uncertainty. +Technically the negative log-posterior, but the prior is uninformative so it is the same as the log-likelihood with a boundary constraint.

+ +
Arguments:
+ +
    +
  • params (list): List of parameters including concentration uncertainty.
  • +
  • rr_model (object): RoadRunner model object.
  • +
  • y_obs (ndarray): Observed values.
  • +
  • initial_conditions (list): List of initial species concentrations.
  • +
  • initial_conditions_scale (list): List of scaling factors for the initial species concentrations.
  • +
  • buffer_concentration_scale (list): List of scaling factors for the buffer concentrations.
  • +
  • solver_arguments (dict): Dictionary containing the following keys: +
      +
    • roadrunner_solver_output_selections (list): Species or parameters to be included in the simulation output.
    • +
    • buffer_concentration_sequence (list of lists): Sequence of buffer concentrations for each assay stage.
    • +
    • time_per_stage (float): Duration of each assay stage.
    • +
    • n_points_per_stage (int): Number of data points per assay stage.
    • +
    • buffer_species_names (list): Names of buffer species.
    • +
    • solver_absolute_tolerance (float): Absolute tolerance for the solver.
    • +
    • solver_relative_tolerance (float): Relative tolerance for the solver.
    • +
    • remove_first_n (int): Number of initial data points to exclude from the results for each stage.
    • +
    • remove_after_n (int): Index after which to stop including data points in the results for each stage.
    • +
    • species_labels (list): Labels of species for which initial conditions are provided.
    • +
    • rate_constant_names (list): Names of rate constants in the model.
    • +
  • +
  • param_lb (list): Lower bounds for parameters.
  • +
  • param_ub (list): Upper bounds for parameters.
  • +
+ +
Returns:
+ +
+

float: Negative log likelihood value.

+
+
+ + +
+
+ + \ No newline at end of file