Skip to content

Commit

Permalink
sample null model for better comparability
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Jul 1, 2024
1 parent 9a76afa commit 2164349
Showing 1 changed file with 28 additions and 3 deletions.
31 changes: 28 additions & 3 deletions matsim/calibration/run_simulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ def likelihood_ratio_test(ll, ll_null, dof=1):
return chi2.sf(likelihood_ratio(ll, ll_null), dof)


def sample_y_null(shares: np.array, num_persons: int, num_samples: int):
""" Replicates a discrete sampling of the null model. For each person, the same number of modes are drawn from the original distribution.
This is done to make the discrete sampling of the simulation comparable to the continous probabilities of the given mode shares.
"""
rng = np.random.default_rng(seed=4711)

samples = rng.choice(len(shares), (num_persons, num_samples), p=shares)
y_null = np.zeros((num_persons, len(shares)))

for i, s in enumerate(samples):
for j in range(len(shares)):
c = np.sum(s == j)
y_null[i, j] = c / num_samples

return y_null


def process_results(runs):
"""Process results of multiple simulations"""
from sklearn.metrics import log_loss, accuracy_score
Expand Down Expand Up @@ -53,11 +70,13 @@ def process_results(runs):

labels = LabelEncoder().fit(modes)
y_true = labels.transform(dfs["true_mode"])
y_null = np.tile(shares.to_numpy(), reps=(len(y_true), 1))
y_pred = np.zeros((len(y_true), len(modes)))
dists = dfs.euclidean_distance.to_numpy() / 1000

dists = dfs.euclidean_distance.to_numpy() / 1000
pred_cols = [c for c in dfs.columns if c.startswith("pred_mode")]

y_pred = np.zeros((len(y_true), len(modes)))
y_null = sample_y_null(shares.to_numpy(), len(dfs), len(pred_cols))

for p in dfs[pred_cols].itertuples():

for j, m in enumerate(modes):
Expand All @@ -68,6 +87,12 @@ def process_results(runs):

y_pred[p.Index, j] = c / len(pred_cols)

choices = pd.DataFrame(data=y_pred, columns=modes)
choices.insert(0, "person", dfs.person)
choices.insert(1, "n", dfs.n)

choices.to_csv(os.path.join(runs, "choices.csv"), index=False)

accs = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight) for col in pred_cols]
accs_d = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight * dists) for col in pred_cols]

Expand Down

0 comments on commit 2164349

Please sign in to comment.