Merge branch 'refs/heads/master' into dist-calibration

matsim-vsp · Jul 15, 2024 · 5b9a9c8 · 5b9a9c8
2 parents 5a002ea + 5d78e70
commit 5b9a9c8
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 4 deletions.
diff --git a/matsim/calibration/run_simulations.py b/matsim/calibration/run_simulations.py
@@ -24,6 +24,23 @@ def likelihood_ratio_test(ll, ll_null, dof=1):
     return chi2.sf(likelihood_ratio(ll, ll_null), dof)
 
 
+def sample_y_null(shares: np.array, num_persons: int, num_samples: int):
+    """ Replicates a discrete sampling of the null model. For each person, the same number of modes are drawn from the original distribution.
+    This is done to make the discrete sampling of the simulation comparable to the continous probabilities of the given mode shares.
+    """
+    rng = np.random.default_rng(seed=4711)
+
+    samples = rng.choice(len(shares), (num_persons, num_samples), p=shares)
+    y_null = np.zeros((num_persons, len(shares)))
+
+    for i, s in enumerate(samples):
+        for j in range(len(shares)):
+            c = np.sum(s == j)
+            y_null[i, j] = c / num_samples
+
+    return y_null
+
+
 def process_results(runs):
     """Process results of multiple simulations"""
     from sklearn.metrics import log_loss, accuracy_score
@@ -53,11 +70,13 @@ def process_results(runs):
 
     labels = LabelEncoder().fit(modes)
     y_true = labels.transform(dfs["true_mode"])
-    y_null = np.tile(shares.to_numpy(), reps=(len(y_true), 1))
-    y_pred = np.zeros((len(y_true), len(modes)))
-    dists = dfs.euclidean_distance.to_numpy() / 1000
 
+    dists = dfs.euclidean_distance.to_numpy() / 1000
     pred_cols = [c for c in dfs.columns if c.startswith("pred_mode")]
+
+    y_pred = np.zeros((len(y_true), len(modes)))
+    y_null = sample_y_null(shares.to_numpy(), len(dfs), len(pred_cols))
+
     for p in dfs[pred_cols].itertuples():
 
         for j, m in enumerate(modes):
@@ -68,6 +87,13 @@ def process_results(runs):
 
             y_pred[p.Index, j] = c / len(pred_cols)
 
+    choices = pd.DataFrame(data=y_pred, columns=modes)
+    choices.insert(0, "person", dfs.person)
+    choices.insert(1, "n", dfs.n)
+    choices.insert(2, "true_mode", dfs.true_mode)
+
+    choices.to_csv(os.path.join(runs, "choices.csv"), index=False)
+
     accs = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight) for col in pred_cols]
     accs_d = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight * dists) for col in pred_cols]
 

diff --git a/matsim/scenariogen/data/formats/mid.py b/matsim/scenariogen/data/formats/mid.py
@@ -101,7 +101,7 @@ def convert(data: tuple, regio=None):
 
         trip = Trip(
             t_id + "_" + str(n),
-            get(t, "W_GEW", "gew_wege"), p_id, get(t, "H_ID"),
+            get(t, "W_GEW", "gew_wege"), p_id, str(get_int_id(t, "H_ID")),
             n, int(t.ST_WOTAG), depature, int(t.wegmin), float(t.wegkm),
             Mid2017.main_mode(t), Mid2017.purpose(t), None,
             float(t.wegkm) < 9994 and int(t.wegmin) < 9994 and depature is not None