diff --git a/src/main/java/org/matsim/prepare/population/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/population/ComputePlanChoices.java index d9ed8f97..c80ff9bb 100644 --- a/src/main/java/org/matsim/prepare/population/ComputePlanChoices.java +++ b/src/main/java/org/matsim/prepare/population/ComputePlanChoices.java @@ -25,10 +25,11 @@ import org.matsim.core.router.*; import org.matsim.core.utils.timing.TimeInterpretation; import org.matsim.modechoice.*; +import org.matsim.modechoice.constraints.RelaxedMassConservationConstraint; import org.matsim.modechoice.estimators.DefaultLegScoreEstimator; +import org.matsim.modechoice.estimators.FixedCostsEstimator; import org.matsim.modechoice.search.TopKChoicesGenerator; import picocli.CommandLine; -import scala.util.parsing.combinator.testing.Str; import javax.annotation.Nullable; import java.nio.file.Files; @@ -93,14 +94,18 @@ public Integer call() throws Exception { Controler controler = this.scenario.createControler(); controler.addOverridingModule(InformedModeChoiceModule.newBuilder() + .withFixedCosts(FixedCostsEstimator.DailyConstant.class, "car") .withLegEstimator(DefaultLegScoreEstimator.class, ModeOptions.ConsiderIfCarAvailable.class, "car") .withLegEstimator(DefaultLegScoreEstimator.class, ModeOptions.AlwaysAvailable.class, "bike", "walk", "pt", "ride") + .withConstraint(RelaxedMassConservationConstraint.class) .build()); InformedModeChoiceConfigGroup imc = ConfigUtils.addOrGetModule(config, InformedModeChoiceConfigGroup.class); imc.setTopK(topK); imc.setModes(modes); + controler.run(); + Injector injector = controler.getInjector(); PlanBuilder.addVehiclesToScenario(injector.getInstance(Scenario.class)); diff --git a/src/main/java/org/matsim/prepare/population/ComputeTripChoices.java b/src/main/java/org/matsim/prepare/population/ComputeTripChoices.java index f3e2c56c..8cbe7f47 100644 --- a/src/main/java/org/matsim/prepare/population/ComputeTripChoices.java +++ b/src/main/java/org/matsim/prepare/population/ComputeTripChoices.java @@ -94,7 +94,7 @@ public Integer call() throws Exception { Controler controler = this.scenario.createControler(); // Run for one iteration to collect travel times -// controler.run(); + controler.run(); Injector injector = controler.getInjector(); diff --git a/src/main/java/org/matsim/run/OpenBerlinScenario.java b/src/main/java/org/matsim/run/OpenBerlinScenario.java index 1007b8d1..d30cb9f1 100644 --- a/src/main/java/org/matsim/run/OpenBerlinScenario.java +++ b/src/main/java/org/matsim/run/OpenBerlinScenario.java @@ -19,6 +19,8 @@ import org.matsim.core.scoring.functions.ScoringParametersForPerson; import org.matsim.prepare.RunOpenBerlinCalibration; import org.matsim.prepare.population.AssignIncome; +import org.matsim.run.scoring.VspScoringConfigGroup; +import org.matsim.run.scoring.VspScoringModule; import org.matsim.simwrapper.SimWrapperConfigGroup; import org.matsim.simwrapper.SimWrapperModule; import picocli.CommandLine; @@ -116,8 +118,9 @@ protected void prepareControler(Controler controler) { controler.addOverridingModule(new TravelTimeBinding()); -// controler.addOverridingModule(new VspScoringModule()); - + if (ConfigUtils.hasModule(controler.getConfig(), VspScoringConfigGroup.class)) { + controler.addOverridingModule(new VspScoringModule()); + } } /** diff --git a/src/main/python/estimate_mixed_trip_choice.py b/src/main/python/estimate_mixed_trip_choice.py new file mode 100644 index 00000000..53002d13 --- /dev/null +++ b/src/main/python/estimate_mixed_trip_choice.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from collections import defaultdict + +import argparse + +from xlogit import MixedLogit +from xlogit.utils import wide_to_long + + +import numpy as np +import pandas as pd +from scipy.special import softmax + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Estimate the trip choice model") + parser.add_argument("--input", help="Path to the input file", type=str, default="../../../trip-choices.csv") + + args = parser.parse_args() + + df_wide = pd.read_csv(args.input) + + #df_wide = df_wide.sample(frac=0.1) + + modes = list(df_wide.columns.str.extract(r"([a-zA-z]+)_valid", expand=False).dropna().unique()) + print("Modes:", modes) + print("Number of choices:", len(df_wide)) + + df_wide['custom_id'] = np.arange(len(df_wide)) # Add unique identifier + df_wide['choice'] = df_wide['choice'].map(dict(enumerate(modes, 1))) + + km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149) + + for mode in modes: + # Base asc + df_wide[f"{mode}_costs"] = df_wide[f"{mode}_km"] * km_costs[mode] + + # Add the time costs + if mode == "ride": + df_wide[f"{mode}_costs"] += -6.88 * df_wide[f"{mode}_hours"] + + df_wide = df_wide.drop(columns=[f"{mode}_walk_km"]) + + df_wide["p_id"] = df_wide["p_id"].str.replace(r"_\d+$", "", regex=True) + df_wide["p_id"] = df_wide["p_id"].astype('category').cat.codes + + varying = list(df_wide.columns.str.extract(r"walk_([a-zA-z]+)", expand=False).dropna().unique()) + + print("Varying:", varying) + + df = wide_to_long(df_wide, id_col='custom_id', alt_name='alt', sep='_', + alt_list=modes, empty_val=0, + varying=varying, alt_is_prefix=True) + + for mode in modes: + df[f'asc_{mode}'] = np.ones(len(df))*(df['alt'] == mode) + + MixedLogit.check_if_gpu_available() + + model = MixedLogit() + + varnames=['asc_car', 'asc_pt', 'asc_bike', 'asc_ride'] + + model.fit(X=df[varnames], y=df['choice'], varnames=varnames, + alts=df['alt'], ids=df['custom_id'], avail=df['valid'], + panels=df["p_id"], randvars={'asc_car': 'n'}, n_draws=1500, + optim_method='L-BFGS-B') + + model.summary() \ No newline at end of file diff --git a/src/main/python/estimate_plan_choice.py b/src/main/python/estimate_plan_choice.py index fd753a30..475cfd1d 100644 --- a/src/main/python/estimate_plan_choice.py +++ b/src/main/python/estimate_plan_choice.py @@ -21,7 +21,7 @@ df.drop(columns=["person"], inplace=True) # TODO: for testing only - df = df.sample(frac=0.05) + df = df.sample(frac=0.1) # Convert all the columns to numeric df = df * 1 @@ -43,9 +43,8 @@ asc = Beta(f"ASC_{mode}", 0, None, None, 1 if mode == "walk" else 0) # Pt does not have its own random parameter - if mode == "walk" or mode == "pt": + if True or mode == "walk" or mode == "pt": ASC[mode] = asc - else: # The random parameter asc_s = Beta(f"ASC_{mode}_s", 1, None, None, 0) @@ -54,7 +53,7 @@ # TODO: distance specific utility parameters # B_TIME = Beta('B_TIME', 0, None, None, 0) - B_COST = Beta('B_COST', 0.5, 0, 1, 0) + B_COST = Beta('B_COST', 0.5, 0, 0, 0) U = {} AV = {} @@ -63,11 +62,11 @@ # fixed_costs = defaultdict(lambda: 0.0, car=-14.13, pt=-3) fixed_costs = defaultdict(lambda: 0.0, car=-3, pt=-3) - km_costs = defaultdict(lambda: 0.0, car=-0.149) + km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149) time_cost = -6.88 - for i in range(1, k): + for i in range(1, k + 1): u = 0 for mode in modes: # Is 1 if a mode was used once @@ -78,18 +77,24 @@ u += ASC[mode] * v[f"plan_{i}_{mode}_usage"] u += time_cost * v[f"plan_{i}_{mode}_hours"] - u += FIXED_COSTS * B_COST * bioDraws('B_TIME_RND', 'UNIFORM_ANTI') + # u += FIXED_COSTS * B_COST * bioDraws('B_TIME_RND', 'NORMAL_ANTI') + u += FIXED_COSTS u += KM_COSTS if mode == "ride": u += time_cost * v[f"plan_{i}_{mode}_hours"] + if mode != "walk": + slope = Beta(f"B_{mode}_DIST", 0, None, None, 0) + u += slope * v[f"plan_{i}_{mode}_km"] + U[i] = u AV[i] = v[f"plan_{i}_valid"] - prob = models.logit(U, AV, v["choice"]) + # prob = models.logit(U, AV, v["choice"]) + # logprob = log(MonteCarlo(prob)) - logprob = log(MonteCarlo(prob)) + logprob = models.loglogit(U, AV, v["choice"]) biogeme = bio.BIOGEME(database, logprob) diff --git a/src/main/python/estimate_trip_choice.py b/src/main/python/estimate_trip_choice.py index 6932d4cc..7e4e9253 100644 --- a/src/main/python/estimate_trip_choice.py +++ b/src/main/python/estimate_trip_choice.py @@ -1,11 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from collections import defaultdict + import argparse import biogeme.biogeme as bio import biogeme.database as db -import biogeme.expressions as exp import biogeme.models as models +from biogeme.expressions import Beta + import numpy as np import pandas as pd from scipy.special import softmax @@ -13,8 +16,6 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Estimate the trip choice model") parser.add_argument("--input", help="Path to the input file", type=str, default="../../../trip-choices.csv") - parser.add_argument("--modes", nargs="+", help="Modes to include in the model", type=str, - default=["walk", "pt", "car", "bike"]) args = parser.parse_args() @@ -25,38 +26,43 @@ # Convert all the columns to numeric df = df * 1 + modes = list(df.columns.str.extract(r"([a-zA-z]+)_valid", expand=False).dropna().unique()) + print("Modes: ", modes) + print("Number of choices: ", len(df)) + database = db.Database("data/choices", df) v = database.variables database.remove(v["choice"] == 0) - # Variables - WALK_TT_SCALED = database.DefineVariable('WALK_TT_SCALED', v["walk_travelTime"] / 1000) - WALK_COST_SCALED = database.DefineVariable('WALK_COST_SCALED', v["walk_walkDist"] / 1000) - PT_TT_SCALED = database.DefineVariable('PT_TT_SCALED', v["pt_travelTime"] / 1000) - PT_COST_SCALED = database.DefineVariable('PT_COST_SCALED', v["pt_travelTime"] / 1000) - CAR_TT_SCALED = database.DefineVariable('CAR_TT_SCALED', v["car_travelTime"] / 1000) - CAR_CO_SCALED = database.DefineVariable('CAR_CO_SCALED', v["car_travelTime"] / 1000) - BIKE_TT_SCALED = database.DefineVariable('BIKE_TT_SCALED', v["bike_travelTime"] / 1000) - BIKE_CO_SCALED = database.DefineVariable('BIKE_CO_SCALED', v["bike_travelTime"] / 1000) + fixed_costs = defaultdict(lambda: 0.0) + km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149) + + ASC = {} + for mode in modes: + # Base asc + ASC[mode] = Beta(f"ASC_{mode}", 0, None, None, 1 if mode == "walk" else 0) + + U = {} + AV = {} + + # B_TIME = Beta('B_TIME', 0, None, None, 0) + B_TIME = -6.88 + + # TODO: use person id for mixed logit - # Coefficients - ASC_PT = exp.Beta('ASC_PT', 0, None, None, 0) - ASC_CAR = exp.Beta('ASC_CAR', 0, None, None, 0) - ASC_BIKE = exp.Beta('ASC_BIKE', 0, None, None, 0) + for i, mode in enumerate(modes, 1): - B_TIME = exp.Beta('B_TIME', 0, None, None, 0) - # B_COST = exp.Beta('B_COST', 0, None, None, 0) + u = ASC[mode] + B_TIME * v[f"{mode}_hours"] + (fixed_costs[mode] + km_costs[mode] * v[f"{mode}_km"]) - V_WALK = B_TIME * WALK_TT_SCALED - V_PT = ASC_PT + B_TIME * PT_TT_SCALED - V_CAR = ASC_CAR + B_TIME * CAR_TT_SCALED - V_BIKE = ASC_BIKE + B_TIME * BIKE_TT_SCALED + if mode != "walk": + slope = Beta(f"B_{mode}_DIST", 0, None, None, 0) + u += slope * v[f"{mode}_km"] - V = {1: V_WALK, 2: V_PT, 3: V_CAR, 4: V_BIKE} - av = {1: v["walk_valid"], 2: v["pt_valid"], 3: v["car_valid"], 4: v["bike_valid"]} + U[i] = u + AV[i] = v[f"{mode}_valid"] - logprob = models.loglogit(V, av, v["choice"]) + logprob = models.loglogit(U, AV, v["choice"]) biogeme = bio.BIOGEME(database, logprob) @@ -68,27 +74,17 @@ pandas_results = results.getEstimatedParameters() print(pandas_results) - simulate = { - # 'prob': logprob, - 'Utility walk': V_WALK, - 'Utility PT': V_PT, - 'Utility car': V_CAR, - 'Utility bike': V_BIKE, - } - - sim = bio.BIOGEME(database, simulate) - - sim_results = sim.simulate(results.getBetaValues()) - - print(sim_results) - - print("Given shares") - - # TODO: might not respect the availability of modes - - dist = df.groupby("choice").count()["seq"] - print(dist / dist.sum()) - - print("Simulated shares") - choices = softmax(sim_results, axis=1) - print(np.mean(choices, axis=0)) + sim_results = biogeme.simulate(results.getBetaValues()) + + # print(sim_results) + # + # print("Given shares") + # + # # TODO: might not respect the availability of modes + # + # dist = df.groupby("choice").count()["seq"] + # print(dist / dist.sum()) + # + # print("Simulated shares") + # choices = softmax(sim_results, axis=1) + # print(np.mean(choices, axis=0))