From 606bffe98f73327af5ca40eabecc4e07f81ad277 Mon Sep 17 00:00:00 2001 From: rakow Date: Sat, 15 Jun 2024 20:51:13 +0200 Subject: [PATCH 1/2] add likelihood test --- matsim/calibration/run_simulations.py | 33 +++++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/matsim/calibration/run_simulations.py b/matsim/calibration/run_simulations.py index 7a7607d..a8242ad 100644 --- a/matsim/calibration/run_simulations.py +++ b/matsim/calibration/run_simulations.py @@ -9,12 +9,21 @@ from time import sleep from typing import Union, Callable -import pandas as pd import numpy as np +import pandas as pd METADATA = "run-simulations", "Utility to run multiple simulations at once." +def likelihood_ratio(ll, ll_null): + return (2 * (max(ll, ll_null) - ll_null)) + + +def likelihood_ratio_test(ll, ll_null, dof=1): + from scipy.stats.distributions import chi2 + return chi2.sf(likelihood_ratio(ll, ll_null), dof) + + def process_results(runs): """Process results of multiple simulations""" from sklearn.metrics import log_loss, accuracy_score @@ -31,7 +40,8 @@ def process_results(runs): if dfs is None: dfs = df else: - dfs= dfs.merge(df, left_on=["person", "n", "true_mode"], right_on=["person", "n", "true_mode"], suffixes=("", "_%s" % run)) + dfs = dfs.merge(df, left_on=["person", "n", "true_mode"], right_on=["person", "n", "true_mode"], + suffixes=("", "_%s" % run)) shares = dfs.groupby("true_mode").size() / len(dfs) modes = shares.index @@ -48,8 +58,8 @@ def process_results(runs): for j, m in enumerate(modes): c = 0 for col in pred_cols: - if getattr(p, col) == m: - c += 1 + if getattr(p, col) == m: + c += 1 y_pred[p.Index, j] = c / len(pred_cols) @@ -57,14 +67,23 @@ def process_results(runs): accs_d = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight * dists) for col in pred_cols] result = [ - ("Log likelihood", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=False), -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=False)), - ("Log likelihood (normalized)", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=True), -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=True)), + ("Log likelihood", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=False), + -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=False)), + ("Log likelihood (normalized)", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=True), + -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=True)), + ("Log likelihood (null)", -log_loss(y_true, y_null, sample_weight=dfs.weight, normalize=False), + -log_loss(y_true, y_null, sample_weight=dfs.weight * dists, normalize=False)), ("Mean Accuracy", np.mean(accs), np.mean(accs_d)), - ("Log likelihood (null)", -log_loss(y_true, y_null, sample_weight=dfs.weight, normalize=False), -log_loss(y_true, y_null, sample_weight=dfs.weight * dists, normalize=False)), ("Samples", len(dfs), sum(dists)), ("Runs", len(pred_cols), len(pred_cols)) ] + result.insert(4, ("McFadden R2", 1 - (result[0][1] / result[2][1]), 1 - (result[0][2] / result[2][2]))) + result.insert(5, ("LL ratio", likelihood_ratio(result[0][1], result[2][1]), + likelihood_ratio(result[0][2], result[2][2]))) + result.insert(6, ("LL ratio test", likelihood_ratio_test(result[0][1], result[2][1]), + likelihood_ratio_test(result[0][2], result[2][2]))) + df = pd.DataFrame(result, columns=["Metric", "Value", "Distance weighted"]).set_index("Metric") print(df) From 61a6eb7d7f919968f120aa60ed78a4ff069ecf5e Mon Sep 17 00:00:00 2001 From: rakow Date: Sun, 16 Jun 2024 10:34:15 +0200 Subject: [PATCH 2/2] update descriptions --- matsim/calibration/run_simulations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matsim/calibration/run_simulations.py b/matsim/calibration/run_simulations.py index a8242ad..512a7af 100644 --- a/matsim/calibration/run_simulations.py +++ b/matsim/calibration/run_simulations.py @@ -16,7 +16,7 @@ def likelihood_ratio(ll, ll_null): - return (2 * (max(ll, ll_null) - ll_null)) + return (2 * (ll - ll_null)) def likelihood_ratio_test(ll, ll_null, dof=1): @@ -81,7 +81,7 @@ def process_results(runs): result.insert(4, ("McFadden R2", 1 - (result[0][1] / result[2][1]), 1 - (result[0][2] / result[2][2]))) result.insert(5, ("LL ratio", likelihood_ratio(result[0][1], result[2][1]), likelihood_ratio(result[0][2], result[2][2]))) - result.insert(6, ("LL ratio test", likelihood_ratio_test(result[0][1], result[2][1]), + result.insert(6, ("LL ratio test (dof=1)", likelihood_ratio_test(result[0][1], result[2][1]), likelihood_ratio_test(result[0][2], result[2][2]))) df = pd.DataFrame(result, columns=["Metric", "Value", "Distance weighted"]).set_index("Metric")