diff --git a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java index 62e9a826..25f5e23c 100644 --- a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java +++ b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java @@ -96,6 +96,7 @@ public static void main(String[] args) { } @Override + @SuppressWarnings("JavaNCSS") public Integer call() throws Exception { if (!output.getFileName().toString().contains(".csv")) { @@ -217,6 +218,11 @@ public Integer call() throws Exception { header.add(String.format("plan_%d_%s_ride_hours", i, mode)); header.add(String.format("plan_%d_%s_n_switches", i, mode)); } + + for (int j = 0; j < maxPlanLength; j++) { + header.add("plan_%d_trip_%d_mode".formatted(i, j)); + } + header.add(String.format("plan_%d_act_util", i)); header.add(String.format("plan_%d_valid", i)); } @@ -335,15 +341,20 @@ private List convert(@Nullable Plan plan, PseudoScorer scorer) { for (String ignored : modes) { row.addAll(List.of(0, 0, 0, 0, 0)); } + + for (int j = 0; j < maxPlanLength; j++) { + row.add(-1); + } + row.add(0); return row; } - Map stats = collect(plan); + AggrModeInfo info = collect(plan); for (String mode : modes) { - ModeStats modeStats = stats.get(mode); + ModeStats modeStats = info.stats.get(mode); row.add(modeStats.usage); row.add(modeStats.travelDistance / 1000); row.add(modeStats.travelTime / 3600); @@ -351,6 +362,15 @@ private List convert(@Nullable Plan plan, PseudoScorer scorer) { row.add(modeStats.numSwitches); } + // Fill information of used modes + for (int j = 0; j < maxPlanLength; j++) { + if (j < info.modes.size()) { + row.add(info.modes.get(j)); + } else { + row.add(-1); + } + } + if (calcScores) row.add(scorer.score(plan).getDouble("score")); else @@ -362,10 +382,12 @@ private List convert(@Nullable Plan plan, PseudoScorer scorer) { /** * Collect aggregated mode stats. */ - private Map collect(Plan plan) { + private AggrModeInfo collect(Plan plan) { + List usedModes = new ArrayList<>(); Map stats = new HashMap<>(); + List trips = TripStructureUtils.getTrips(plan); for (String mode : modes) { int usage = 0; @@ -374,7 +396,7 @@ private Map collect(Plan plan) { double travelDistance = 0; long switches = 0; - for (TripStructureUtils.Trip trip : TripStructureUtils.getTrips(plan)) { + for (TripStructureUtils.Trip trip : trips) { List legs = trip.getLegsOnly(); String mainMode = mmi.identifyMainMode(legs); if (mode.equals(mainMode)) { @@ -392,7 +414,11 @@ private Map collect(Plan plan) { stats.put(mode, new ModeStats(usage, travelTime, travelDistance, rideTime, switches)); } - return stats; + for (TripStructureUtils.Trip trip : trips) { + usedModes.add(mmi.identifyMainMode(trip.getLegsOnly())); + } + + return new AggrModeInfo(usedModes, stats); } /** @@ -402,6 +428,9 @@ public enum PlanCandidates { bestK, diverse, random, carAlternative, subtour } + private record AggrModeInfo(List modes, Map stats) { + } + private record ModeStats(int usage, double travelTime, double travelDistance, double rideTime, long numSwitches) { } diff --git a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java index 9e7fc6eb..a120f192 100644 --- a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java +++ b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java @@ -91,7 +91,11 @@ public enum LoadPreferences { * Variate values with random draw from specific distribution. */ public enum VariationType { - fixed, normal, truncatedNormal, gumbel + fixed, normal, truncatedNormal, gumbel, + /** + * Normal with mean at 0.5772 (Euler–Mascheroni constant). Can be used as coarse approximation to gumbel. + */ + eulerGammaNormal } /** diff --git a/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java b/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java index 2257e746..23635b99 100644 --- a/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java +++ b/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java @@ -63,6 +63,7 @@ public double scoreTrip(Id personId, String mainMode, TripStructureUtils return switch (distribution) { case gumbel -> sampleGumbel(rng, 0, scale); case normal -> sampleNormal(rng, 0, scale); + case eulerGammaNormal -> sampleNormal(rng, 0.5772156649015329, scale); default -> throw new IllegalStateException("Unsupported distribution: " + distribution); }; } diff --git a/src/main/python/choicemodels/biogeme.toml b/src/main/python/choicemodels/biogeme.toml index 97239f9b..a3c888d8 100644 --- a/src/main/python/choicemodels/biogeme.toml +++ b/src/main/python/choicemodels/biogeme.toml @@ -70,7 +70,7 @@ optimization_algorithm = "simple_bounds" # str: optimization algorithm to be use # 'simple_bounds_BFGS'] [MonteCarlo] -number_of_draws = 1500 # int: Number of draws for Monte-Carlo integration. +number_of_draws = 2000 # int: Number of draws for Monte-Carlo integration. seed = 42 # int: Seed used for the pseudo-random number generation. It is useful # only when each run should generate the exact same result. If 0, a new # seed is used at each run. diff --git a/src/main/python/choicemodels/estimate_biogeme_plan_choice.py b/src/main/python/choicemodels/estimate_biogeme_plan_choice.py index 2011908d..76b4593b 100644 --- a/src/main/python/choicemodels/estimate_biogeme_plan_choice.py +++ b/src/main/python/choicemodels/estimate_biogeme_plan_choice.py @@ -9,7 +9,7 @@ import biogeme.models as models from biogeme.expressions import Beta, bioDraws, log, MonteCarlo -from prepare import read_plan_choices, tn_generator +from prepare import read_plan_choices, tn_generator, gumbel_generator ESTIMATE = 0 FIXED = 1 @@ -50,7 +50,8 @@ v = database.variables database.setRandomNumberGenerators({ - "TN": (tn_generator, "truncated normal generator for mixed logit") + "TN": (tn_generator, "truncated normal generator for mixed logit"), + "GUMBEL": (gumbel_generator, "Gumbel generator for mixed logit") }) fixed_ascs = {x: float(y) for x, y in args.ascs} @@ -111,12 +112,18 @@ print("Estimating car asc, instead of daily utility") B_CAR = 0 + EC = {} if args.est_error_component: print("Estimating error component") - B_ERROR_S = Beta('B_ERROR_S', 0.5, None, None, ESTIMATE) - B_ERROR = B_ERROR_S * bioDraws('B_ERROR_RND', 'NORMAL') - else: - B_ERROR = 0 + + # Draw modes x trips random terms + for m in ds.modes: + EC[m] = [] + + for j in range(7): + EC[m].append(bioDraws(f"ec_{m}_{j}", "NORMAL_ANTI")) + + EC_S = Beta("ec_s", 0.5, 0, None, ESTIMATE) print("Using MXL modes", args.mxl_modes) U = {} @@ -124,7 +131,6 @@ for i in range(1, ds.k + 1): # Price is already negative - perceived_price = (BETA_CAR_PRICE_PERCEPTION * v[f"plan_{i}_car_price"] + BETA_PT_PRICE_PERCEPTION * v[f"plan_{i}_pt_price"] + v[f"plan_{i}_other_price"]) @@ -138,7 +144,15 @@ u += v[f"plan_{i}_car_used"] * B_CAR - U[i] = u + B_ERROR * v["n_trips"] + if args.est_error_component: + errs = 0 + for mode in ds.modes: + for j in range(7): + errs += v[f"plan_{i}_trip_{j}_mode_{mode}"] * EC[mode][j] + + u += EC_S * errs + + U[i] = u AV[i] = v[f"plan_{i}_valid"] if args.no_mxl: @@ -166,6 +180,8 @@ modelName += "_price_perception_car" if args.est_price_perception_pt: modelName += "_price_perception_pt" + if args.est_error_component: + modelName += "_ec" biogeme.modelName = modelName biogeme.weight = v["weight"] diff --git a/src/main/python/choicemodels/prepare.py b/src/main/python/choicemodels/prepare.py index dbba9227..e2bd3960 100644 --- a/src/main/python/choicemodels/prepare.py +++ b/src/main/python/choicemodels/prepare.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from scipy.stats import truncnorm +from scipy.stats import truncnorm, gumbel_r # Cost parameter from current berlin model daily_costs = defaultdict(lambda: 0.0, car=-14.30, pt=-3) @@ -52,6 +52,23 @@ def read_plan_choices(input_file: str, sample: float = 1, seed: int = 42) -> Pla print("Number of choices:", len(df_wide)) + delete = set() + + # Generate boolean indicator variables for mode usage on each trip + for i in range(1, k + 1): + # Max number of trips per plan, + for j in range(7): + for mode in modes: + df_wide[f"plan_{i}_trip_{j}_mode_{mode}"] = df_wide[f"plan_{i}_trip_{j}_mode"] == mode + + delete.add(f"plan_{i}_trip_{j}_mode") + + # Defragment df + df_wide = df_wide.copy() + + # Only keep the indicator columns + df_wide = df_wide.drop(columns=delete) + df_wide['custom_id'] = np.arange(len(df_wide)) # Add unique identifier df_wide['choice'] = df_wide['choice'].map({1: "plan_1"}) @@ -69,6 +86,11 @@ def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray: """ return TN.rvs((sample_size, number_of_draws)) +def gumbel_generator(sample_size: int, number_of_draws: int) -> np.ndarray: + """ + User-defined random number generator for gumbel distribution + """ + return gumbel_r.rvs(size=(sample_size, number_of_draws)) def calc_plan_variables(df, k, modes, use_util_money=False, add_util_performing=True): """ Calculate utility and costs variables for all alternatives in the dataframe""" @@ -153,4 +175,6 @@ def read_trip_choices(input_file: str) -> TripChoice: dist_weight = df.beelineDist / dists.loc[df.person].dist.to_numpy() df["dist_weight"] = dist_weight + df.dist_weight = df.dist_weight.fillna(1) + return TripChoice(df, modes, varying, read_global_income(input_file))