updates for estimation

matsim-scenarios · Jan 2, 2025 · fbd242e · fbd242e
1 parent 55c925a
commit fbd242e
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 16 deletions.
diff --git a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java
@@ -96,6 +96,7 @@ public static void main(String[] args) {
 	}
 
 	@Override
+	@SuppressWarnings("JavaNCSS")
 	public Integer call() throws Exception {
 
 		if (!output.getFileName().toString().contains(".csv")) {
@@ -217,6 +218,11 @@ public Integer call() throws Exception {
 					header.add(String.format("plan_%d_%s_ride_hours", i, mode));
 					header.add(String.format("plan_%d_%s_n_switches", i, mode));
 				}
+
+				for (int j = 0; j < maxPlanLength; j++) {
+					header.add("plan_%d_trip_%d_mode".formatted(i, j));
+				}
+
 				header.add(String.format("plan_%d_act_util", i));
 				header.add(String.format("plan_%d_valid", i));
 			}
@@ -335,22 +341,36 @@ private List<Object> convert(@Nullable Plan plan, PseudoScorer scorer) {
 			for (String ignored : modes) {
 				row.addAll(List.of(0, 0, 0, 0, 0));
 			}
+
+			for (int j = 0; j < maxPlanLength; j++) {
+				row.add(-1);
+			}
+
 			row.add(0);
 
 			return row;
 		}
 
-		Map<String, ModeStats> stats = collect(plan);
+		AggrModeInfo info = collect(plan);
 
 		for (String mode : modes) {
-			ModeStats modeStats = stats.get(mode);
+			ModeStats modeStats = info.stats.get(mode);
 			row.add(modeStats.usage);
 			row.add(modeStats.travelDistance / 1000);
 			row.add(modeStats.travelTime / 3600);
 			row.add(modeStats.rideTime / 3600);
 			row.add(modeStats.numSwitches);
 		}
 
+		// Fill information of used modes
+		for (int j = 0; j < maxPlanLength; j++) {
+			if (j < info.modes.size()) {
+				row.add(info.modes.get(j));
+			} else {
+				row.add(-1);
+			}
+		}
+
 		if (calcScores)
 			row.add(scorer.score(plan).getDouble("score"));
 		else
@@ -362,10 +382,12 @@ private List<Object> convert(@Nullable Plan plan, PseudoScorer scorer) {
 	/**
 	 * Collect aggregated mode stats.
 	 */
-	private Map<String, ModeStats> collect(Plan plan) {
+	private AggrModeInfo collect(Plan plan) {
 
+		List<String> usedModes = new ArrayList<>();
 		Map<String, ModeStats> stats = new HashMap<>();
 
+		List<TripStructureUtils.Trip> trips = TripStructureUtils.getTrips(plan);
 		for (String mode : modes) {
 
 			int usage = 0;
@@ -374,7 +396,7 @@ private Map<String, ModeStats> collect(Plan plan) {
 			double travelDistance = 0;
 			long switches = 0;
 
-			for (TripStructureUtils.Trip trip : TripStructureUtils.getTrips(plan)) {
+			for (TripStructureUtils.Trip trip : trips) {
 				List<Leg> legs = trip.getLegsOnly();
 				String mainMode = mmi.identifyMainMode(legs);
 				if (mode.equals(mainMode)) {
@@ -392,7 +414,11 @@ private Map<String, ModeStats> collect(Plan plan) {
 			stats.put(mode, new ModeStats(usage, travelTime, travelDistance, rideTime, switches));
 		}
 
-		return stats;
+		for (TripStructureUtils.Trip trip : trips) {
+			usedModes.add(mmi.identifyMainMode(trip.getLegsOnly()));
+		}
+
+		return new AggrModeInfo(usedModes, stats);
 	}
 
 	/**
@@ -402,6 +428,9 @@ public enum PlanCandidates {
 		bestK, diverse, random, carAlternative, subtour
 	}
 
+	private record AggrModeInfo(List<String> modes, Map<String, ModeStats> stats) {
+	}
+
 	private record ModeStats(int usage, double travelTime, double travelDistance, double rideTime, long numSwitches) {
 	}
 

diff --git a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java
@@ -91,7 +91,11 @@ public enum LoadPreferences {
 	 * Variate values with random draw from specific distribution.
 	 */
 	public enum VariationType {
-		fixed, normal, truncatedNormal, gumbel
+		fixed, normal, truncatedNormal, gumbel,
+		/**
+		 * Normal with mean at 0.5772 (Euler–Mascheroni constant). Can be used as coarse approximation to gumbel.
+		 */
+		eulerGammaNormal
 	}
 
 	/**

diff --git a/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java b/src/main/java/org/matsim/run/scoring/PseudoRandomScorer.java
@@ -63,6 +63,7 @@ public double scoreTrip(Id<Person> personId, String mainMode, TripStructureUtils
 		return switch (distribution) {
 			case gumbel -> sampleGumbel(rng, 0, scale);
 			case normal -> sampleNormal(rng, 0, scale);
+			case eulerGammaNormal -> sampleNormal(rng, 0.5772156649015329, scale);
 			default -> throw new IllegalStateException("Unsupported distribution: " + distribution);
 		};
 	}

diff --git a/src/main/python/choicemodels/biogeme.toml b/src/main/python/choicemodels/biogeme.toml
@@ -70,7 +70,7 @@ optimization_algorithm = "simple_bounds" # str: optimization algorithm to be use
                                          # 'simple_bounds_BFGS']
 
 [MonteCarlo]
-number_of_draws = 1500 # int: Number of draws for Monte-Carlo integration.
+number_of_draws = 2000 # int: Number of draws for Monte-Carlo integration.
 seed = 42 # int: Seed used for the pseudo-random number generation. It is useful
          # only when each run should generate the exact same result. If 0, a new
          # seed is used at each run.

diff --git a/src/main/python/choicemodels/estimate_biogeme_plan_choice.py b/src/main/python/choicemodels/estimate_biogeme_plan_choice.py
@@ -9,7 +9,7 @@
 import biogeme.models as models
 from biogeme.expressions import Beta, bioDraws, log, MonteCarlo
 
-from prepare import read_plan_choices, tn_generator
+from prepare import read_plan_choices, tn_generator, gumbel_generator
 
 ESTIMATE = 0
 FIXED = 1
@@ -50,7 +50,8 @@
     v = database.variables
 
     database.setRandomNumberGenerators({
-        "TN": (tn_generator, "truncated normal generator for mixed logit")
+        "TN": (tn_generator, "truncated normal generator for mixed logit"),
+        "GUMBEL": (gumbel_generator, "Gumbel generator for mixed logit")
     })
 
     fixed_ascs = {x: float(y) for x, y in args.ascs}
@@ -111,20 +112,25 @@
         print("Estimating car asc, instead of daily utility")
         B_CAR = 0
 
+    EC = {}
     if args.est_error_component:
         print("Estimating error component")
-        B_ERROR_S = Beta('B_ERROR_S', 0.5, None, None, ESTIMATE)
-        B_ERROR = B_ERROR_S * bioDraws('B_ERROR_RND', 'NORMAL')
-    else:
-        B_ERROR = 0
+
+        # Draw modes x trips random terms
+        for m in ds.modes:
+            EC[m] = []
+
+            for j in range(7):
+                EC[m].append(bioDraws(f"ec_{m}_{j}", "NORMAL_ANTI"))
+
+        EC_S = Beta("ec_s", 0.5, 0, None, ESTIMATE)
 
     print("Using MXL modes", args.mxl_modes)
     U = {}
     AV = {}
 
     for i in range(1, ds.k + 1):
         # Price is already negative
-
         perceived_price = (BETA_CAR_PRICE_PERCEPTION * v[f"plan_{i}_car_price"] +
                            BETA_PT_PRICE_PERCEPTION * v[f"plan_{i}_pt_price"] +
                            v[f"plan_{i}_other_price"])
@@ -138,7 +144,15 @@
 
         u += v[f"plan_{i}_car_used"] * B_CAR
 
-        U[i] = u + B_ERROR * v["n_trips"]
+        if args.est_error_component:
+            errs = 0
+            for mode in ds.modes:
+                for j in range(7):
+                    errs += v[f"plan_{i}_trip_{j}_mode_{mode}"] * EC[mode][j]
+
+            u += EC_S * errs
+
+        U[i] = u
         AV[i] = v[f"plan_{i}_valid"]
 
     if args.no_mxl:
@@ -166,6 +180,8 @@
         modelName += "_price_perception_car"
     if args.est_price_perception_pt:
         modelName += "_price_perception_pt"
+    if args.est_error_component:
+        modelName += "_ec"
 
     biogeme.modelName = modelName
     biogeme.weight = v["weight"]

diff --git a/src/main/python/choicemodels/prepare.py b/src/main/python/choicemodels/prepare.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import pandas as pd
-from scipy.stats import truncnorm
+from scipy.stats import truncnorm, gumbel_r
 
 # Cost parameter from current berlin model
 daily_costs = defaultdict(lambda: 0.0, car=-14.30, pt=-3)
@@ -52,6 +52,23 @@ def read_plan_choices(input_file: str, sample: float = 1, seed: int = 42) -> Pla
 
     print("Number of choices:", len(df_wide))
 
+    delete = set()
+
+    # Generate boolean indicator variables for mode usage on each trip
+    for i in range(1, k + 1):
+        # Max number of trips per plan,
+        for j in range(7):
+            for mode in modes:
+                df_wide[f"plan_{i}_trip_{j}_mode_{mode}"] = df_wide[f"plan_{i}_trip_{j}_mode"] == mode
+
+            delete.add(f"plan_{i}_trip_{j}_mode")
+
+        # Defragment df
+        df_wide = df_wide.copy()
+
+    # Only keep the indicator columns
+    df_wide = df_wide.drop(columns=delete)
+
     df_wide['custom_id'] = np.arange(len(df_wide))  # Add unique identifier
     df_wide['choice'] = df_wide['choice'].map({1: "plan_1"})
 
@@ -69,6 +86,11 @@ def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray:
     """
     return TN.rvs((sample_size, number_of_draws))
 
+def gumbel_generator(sample_size: int, number_of_draws: int) -> np.ndarray:
+    """
+    User-defined random number generator for gumbel distribution
+    """
+    return gumbel_r.rvs(size=(sample_size, number_of_draws))
 
 def calc_plan_variables(df, k, modes, use_util_money=False, add_util_performing=True):
     """ Calculate utility and costs variables for all alternatives in the dataframe"""
@@ -153,4 +175,6 @@ def read_trip_choices(input_file: str) -> TripChoice:
     dist_weight = df.beelineDist / dists.loc[df.person].dist.to_numpy()
     df["dist_weight"] = dist_weight
 
+    df.dist_weight = df.dist_weight.fillna(1)
+
     return TripChoice(df, modes, varying, read_global_income(input_file))