Skip to content

Commit

Permalink
improvements related to choice computation
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Feb 27, 2024
1 parent 6991318 commit e0cd228
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@
import org.matsim.core.router.*;
import org.matsim.core.utils.timing.TimeInterpretation;
import org.matsim.modechoice.*;
import org.matsim.modechoice.constraints.RelaxedMassConservationConstraint;
import org.matsim.modechoice.estimators.DefaultLegScoreEstimator;
import org.matsim.modechoice.estimators.FixedCostsEstimator;
import org.matsim.modechoice.search.TopKChoicesGenerator;
import picocli.CommandLine;
import scala.util.parsing.combinator.testing.Str;

import javax.annotation.Nullable;
import java.nio.file.Files;
Expand Down Expand Up @@ -93,14 +94,18 @@ public Integer call() throws Exception {
Controler controler = this.scenario.createControler();

controler.addOverridingModule(InformedModeChoiceModule.newBuilder()
.withFixedCosts(FixedCostsEstimator.DailyConstant.class, "car")
.withLegEstimator(DefaultLegScoreEstimator.class, ModeOptions.ConsiderIfCarAvailable.class, "car")
.withLegEstimator(DefaultLegScoreEstimator.class, ModeOptions.AlwaysAvailable.class, "bike", "walk", "pt", "ride")
.withConstraint(RelaxedMassConservationConstraint.class)
.build());

InformedModeChoiceConfigGroup imc = ConfigUtils.addOrGetModule(config, InformedModeChoiceConfigGroup.class);
imc.setTopK(topK);
imc.setModes(modes);

controler.run();

Injector injector = controler.getInjector();

PlanBuilder.addVehiclesToScenario(injector.getInstance(Scenario.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public Integer call() throws Exception {
Controler controler = this.scenario.createControler();

// Run for one iteration to collect travel times
// controler.run();
controler.run();

Injector injector = controler.getInjector();

Expand Down
7 changes: 5 additions & 2 deletions src/main/java/org/matsim/run/OpenBerlinScenario.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.matsim.core.scoring.functions.ScoringParametersForPerson;
import org.matsim.prepare.RunOpenBerlinCalibration;
import org.matsim.prepare.population.AssignIncome;
import org.matsim.run.scoring.VspScoringConfigGroup;
import org.matsim.run.scoring.VspScoringModule;
import org.matsim.simwrapper.SimWrapperConfigGroup;
import org.matsim.simwrapper.SimWrapperModule;
import picocli.CommandLine;
Expand Down Expand Up @@ -116,8 +118,9 @@ protected void prepareControler(Controler controler) {

controler.addOverridingModule(new TravelTimeBinding());

// controler.addOverridingModule(new VspScoringModule());

if (ConfigUtils.hasModule(controler.getConfig(), VspScoringConfigGroup.class)) {
controler.addOverridingModule(new VspScoringModule());
}
}

/**
Expand Down
70 changes: 70 additions & 0 deletions src/main/python/estimate_mixed_trip_choice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict

import argparse

from xlogit import MixedLogit
from xlogit.utils import wide_to_long


import numpy as np
import pandas as pd
from scipy.special import softmax

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Estimate the trip choice model")
parser.add_argument("--input", help="Path to the input file", type=str, default="../../../trip-choices.csv")

args = parser.parse_args()

df_wide = pd.read_csv(args.input)

#df_wide = df_wide.sample(frac=0.1)

modes = list(df_wide.columns.str.extract(r"([a-zA-z]+)_valid", expand=False).dropna().unique())
print("Modes:", modes)
print("Number of choices:", len(df_wide))

df_wide['custom_id'] = np.arange(len(df_wide)) # Add unique identifier
df_wide['choice'] = df_wide['choice'].map(dict(enumerate(modes, 1)))

km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149)

for mode in modes:
# Base asc
df_wide[f"{mode}_costs"] = df_wide[f"{mode}_km"] * km_costs[mode]

# Add the time costs
if mode == "ride":
df_wide[f"{mode}_costs"] += -6.88 * df_wide[f"{mode}_hours"]

df_wide = df_wide.drop(columns=[f"{mode}_walk_km"])

df_wide["p_id"] = df_wide["p_id"].str.replace(r"_\d+$", "", regex=True)
df_wide["p_id"] = df_wide["p_id"].astype('category').cat.codes

varying = list(df_wide.columns.str.extract(r"walk_([a-zA-z]+)", expand=False).dropna().unique())

print("Varying:", varying)

df = wide_to_long(df_wide, id_col='custom_id', alt_name='alt', sep='_',
alt_list=modes, empty_val=0,
varying=varying, alt_is_prefix=True)

for mode in modes:
df[f'asc_{mode}'] = np.ones(len(df))*(df['alt'] == mode)

MixedLogit.check_if_gpu_available()

model = MixedLogit()

varnames=['asc_car', 'asc_pt', 'asc_bike', 'asc_ride']

model.fit(X=df[varnames], y=df['choice'], varnames=varnames,
alts=df['alt'], ids=df['custom_id'], avail=df['valid'],
panels=df["p_id"], randvars={'asc_car': 'n'}, n_draws=1500,
optim_method='L-BFGS-B')

model.summary()
23 changes: 14 additions & 9 deletions src/main/python/estimate_plan_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
df.drop(columns=["person"], inplace=True)

# TODO: for testing only
df = df.sample(frac=0.05)
df = df.sample(frac=0.1)

# Convert all the columns to numeric
df = df * 1
Expand All @@ -43,9 +43,8 @@
asc = Beta(f"ASC_{mode}", 0, None, None, 1 if mode == "walk" else 0)

# Pt does not have its own random parameter
if mode == "walk" or mode == "pt":
if True or mode == "walk" or mode == "pt":
ASC[mode] = asc

else:
# The random parameter
asc_s = Beta(f"ASC_{mode}_s", 1, None, None, 0)
Expand All @@ -54,7 +53,7 @@
# TODO: distance specific utility parameters

# B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0.5, 0, 1, 0)
B_COST = Beta('B_COST', 0.5, 0, 0, 0)

U = {}
AV = {}
Expand All @@ -63,11 +62,11 @@

# fixed_costs = defaultdict(lambda: 0.0, car=-14.13, pt=-3)
fixed_costs = defaultdict(lambda: 0.0, car=-3, pt=-3)
km_costs = defaultdict(lambda: 0.0, car=-0.149)
km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149)

time_cost = -6.88

for i in range(1, k):
for i in range(1, k + 1):
u = 0
for mode in modes:
# Is 1 if a mode was used once
Expand All @@ -78,18 +77,24 @@
u += ASC[mode] * v[f"plan_{i}_{mode}_usage"]
u += time_cost * v[f"plan_{i}_{mode}_hours"]

u += FIXED_COSTS * B_COST * bioDraws('B_TIME_RND', 'UNIFORM_ANTI')
# u += FIXED_COSTS * B_COST * bioDraws('B_TIME_RND', 'NORMAL_ANTI')
u += FIXED_COSTS
u += KM_COSTS

if mode == "ride":
u += time_cost * v[f"plan_{i}_{mode}_hours"]

if mode != "walk":
slope = Beta(f"B_{mode}_DIST", 0, None, None, 0)
u += slope * v[f"plan_{i}_{mode}_km"]

U[i] = u
AV[i] = v[f"plan_{i}_valid"]

prob = models.logit(U, AV, v["choice"])
# prob = models.logit(U, AV, v["choice"])
# logprob = log(MonteCarlo(prob))

logprob = log(MonteCarlo(prob))
logprob = models.loglogit(U, AV, v["choice"])

biogeme = bio.BIOGEME(database, logprob)

Expand Down
94 changes: 45 additions & 49 deletions src/main/python/estimate_trip_choice.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict

import argparse
import biogeme.biogeme as bio
import biogeme.database as db
import biogeme.expressions as exp
import biogeme.models as models
from biogeme.expressions import Beta

import numpy as np
import pandas as pd
from scipy.special import softmax

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Estimate the trip choice model")
parser.add_argument("--input", help="Path to the input file", type=str, default="../../../trip-choices.csv")
parser.add_argument("--modes", nargs="+", help="Modes to include in the model", type=str,
default=["walk", "pt", "car", "bike"])

args = parser.parse_args()

Expand All @@ -25,38 +26,43 @@
# Convert all the columns to numeric
df = df * 1

modes = list(df.columns.str.extract(r"([a-zA-z]+)_valid", expand=False).dropna().unique())
print("Modes: ", modes)
print("Number of choices: ", len(df))

database = db.Database("data/choices", df)
v = database.variables

database.remove(v["choice"] == 0)

# Variables
WALK_TT_SCALED = database.DefineVariable('WALK_TT_SCALED', v["walk_travelTime"] / 1000)
WALK_COST_SCALED = database.DefineVariable('WALK_COST_SCALED', v["walk_walkDist"] / 1000)
PT_TT_SCALED = database.DefineVariable('PT_TT_SCALED', v["pt_travelTime"] / 1000)
PT_COST_SCALED = database.DefineVariable('PT_COST_SCALED', v["pt_travelTime"] / 1000)
CAR_TT_SCALED = database.DefineVariable('CAR_TT_SCALED', v["car_travelTime"] / 1000)
CAR_CO_SCALED = database.DefineVariable('CAR_CO_SCALED', v["car_travelTime"] / 1000)
BIKE_TT_SCALED = database.DefineVariable('BIKE_TT_SCALED', v["bike_travelTime"] / 1000)
BIKE_CO_SCALED = database.DefineVariable('BIKE_CO_SCALED', v["bike_travelTime"] / 1000)
fixed_costs = defaultdict(lambda: 0.0)
km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149)

ASC = {}
for mode in modes:
# Base asc
ASC[mode] = Beta(f"ASC_{mode}", 0, None, None, 1 if mode == "walk" else 0)

U = {}
AV = {}

# B_TIME = Beta('B_TIME', 0, None, None, 0)
B_TIME = -6.88

# TODO: use person id for mixed logit

# Coefficients
ASC_PT = exp.Beta('ASC_PT', 0, None, None, 0)
ASC_CAR = exp.Beta('ASC_CAR', 0, None, None, 0)
ASC_BIKE = exp.Beta('ASC_BIKE', 0, None, None, 0)
for i, mode in enumerate(modes, 1):

B_TIME = exp.Beta('B_TIME', 0, None, None, 0)
# B_COST = exp.Beta('B_COST', 0, None, None, 0)
u = ASC[mode] + B_TIME * v[f"{mode}_hours"] + (fixed_costs[mode] + km_costs[mode] * v[f"{mode}_km"])

V_WALK = B_TIME * WALK_TT_SCALED
V_PT = ASC_PT + B_TIME * PT_TT_SCALED
V_CAR = ASC_CAR + B_TIME * CAR_TT_SCALED
V_BIKE = ASC_BIKE + B_TIME * BIKE_TT_SCALED
if mode != "walk":
slope = Beta(f"B_{mode}_DIST", 0, None, None, 0)
u += slope * v[f"{mode}_km"]

V = {1: V_WALK, 2: V_PT, 3: V_CAR, 4: V_BIKE}
av = {1: v["walk_valid"], 2: v["pt_valid"], 3: v["car_valid"], 4: v["bike_valid"]}
U[i] = u
AV[i] = v[f"{mode}_valid"]

logprob = models.loglogit(V, av, v["choice"])
logprob = models.loglogit(U, AV, v["choice"])

biogeme = bio.BIOGEME(database, logprob)

Expand All @@ -68,27 +74,17 @@
pandas_results = results.getEstimatedParameters()
print(pandas_results)

simulate = {
# 'prob': logprob,
'Utility walk': V_WALK,
'Utility PT': V_PT,
'Utility car': V_CAR,
'Utility bike': V_BIKE,
}

sim = bio.BIOGEME(database, simulate)

sim_results = sim.simulate(results.getBetaValues())

print(sim_results)

print("Given shares")

# TODO: might not respect the availability of modes

dist = df.groupby("choice").count()["seq"]
print(dist / dist.sum())

print("Simulated shares")
choices = softmax(sim_results, axis=1)
print(np.mean(choices, axis=0))
sim_results = biogeme.simulate(results.getBetaValues())

# print(sim_results)
#
# print("Given shares")
#
# # TODO: might not respect the availability of modes
#
# dist = df.groupby("choice").count()["seq"]
# print(dist / dist.sum())
#
# print("Simulated shares")
# choices = softmax(sim_results, axis=1)
# print(np.mean(choices, axis=0))

0 comments on commit e0cd228

Please sign in to comment.