Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use estimated travel distances instead of euclidian distance #80

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ optional_columns = [
]
n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "OA"
use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
Expand Down
78 changes: 78 additions & 0 deletions config/base_all_msoa.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
[parameters]
seed = 0
# this is used to query poi data from osm and to load in SPC data
region = "leeds"
# how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton
number_of_households = 25000
# "OA21CD": OA level, "MSOA11CD": MSOA level
zone_id = "MSOA21CD"
# Only set to true if you have travel time matrix at the level specified in boundary_geography
travel_times = false
boundary_geography = "MSOA"
# NTS years to use
nts_years = [2019, 2021, 2022]
# NTS regions to use
nts_regions = [
'Yorkshire and the Humber',
'North West',
'North East',
'East Midlands',
'West Midlands',
'East of England',
'South East',
'South West']
# nts day of the week to use
# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday
nts_day_of_week = 3
# what crs do we want the output to be in? (just add the number, e.g. 3857)
output_crs = 3857

[matching]
# for optional and required columns, see the [iterative_match_categorical](https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/ca181c54d7484ebe44706ff4b43c26286b22aceb/src/acbm/matching.py#L110) function
# Do not add any column not listed below. You can only move a column from optional to require (or vise versa)
required_columns = [
"number_adults",
"number_children",
"num_pension_age",
]
optional_columns = [
"number_cars",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
# What is the maximum number of NTS matches we want for each SPC household?
n_matches = 10

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "MSOA"
# if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
use_percentages = true
# weights to add for each objective in the optimization problem
weight_max_dev = 0.2
weight_total_dev = 0.8
# maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution)
max_zones = 10

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
# TODO: more sophisticated approach
pt_subscription_age = 66
# to define if a person is a student:
# eveyone below this age is a student
student_age_base = 16
# everyone below this age that has at least one "education" activity is a student
student_age_upper = 30
# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
modes_passenger = ['car_passenger', 'taxi']
# yearly state pension: for getting hhlIncome of pensioners
state_pension = 11502
14 changes: 11 additions & 3 deletions scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def main(config_file):
logger.info("Creating estimated travel times matrix")
# Create a new travel time matrix based on distances between zones
travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, time_units="m"
zones=boundaries,
id_col=config.zone_id,
time_units="m",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)
logger.info("Travel time estimates created")

Expand Down Expand Up @@ -222,7 +226,9 @@ def main(config_file):
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="education_type",
time_tolerance=0.3,
time_tolerance=0.2,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for education activities")
Expand All @@ -249,7 +255,9 @@ def main(config_file):
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="dact",
time_tolerance=0.3,
time_tolerance=0.2,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for work activities")
Expand Down
12 changes: 9 additions & 3 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,10 @@ def main(config_file):
x_col="TripDisIncSW",
y_col="length",
x_label="Reported Travel Distance (km)",
y_label="Actual Distance - Euclidian (km)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=50,
crs=f"EPSG:{config.output_crs}",
title_prefix=f"Scatter plot of TripDisIncSW vs. Length for {activity_type}",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand All @@ -344,8 +347,11 @@ def main(config_file):
activity_type_col="destination activity",
x_col="TripTotalTime",
y_col="length",
x_label="Reported Travel TIme (min)",
y_label="Actual Distance - Euclidian (km)",
x_label="Reported Travel Time (min)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Length",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand Down
2 changes: 2 additions & 0 deletions scripts/4_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def main(config_file):
start_wkt_col="start_location_geometry_wkt",
end_wkt_col="end_location_geometry_wkt",
crs_epsg=config.output_crs,
detour_factor=1.56,
decay_rate=0.0001,
)

# Plot: Aggregate
Expand Down
17 changes: 15 additions & 2 deletions src/acbm/assigning/feasible_zones_primary.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def get_possible_zones(
travel_times: Optional[pd.DataFrame] = None,
filter_by_activity: bool = False,
time_tolerance: float = 0.2,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> dict:
"""
Get possible zones for all activity chains in the dataset. This function loops over the travel_times dataframe and filters by mode, time of day and weekday/weekend.
Expand Down Expand Up @@ -104,6 +106,13 @@ def get_possible_zones(
activity chain's travel time (which is stored in "TripTotalTime"). Allowable travel_times are those that fall in the range of:
travel_time_reported * (1 - time_tolerance) <= travel_time_reported <= travel_time_reported * (1 + time_tolerance)
Default = 0.2
detour_factor: float
The detour factor used to calculate travel distances between zones from euclidian distances.
Default = 1.56
decay_rate: float
The decay rate used to calculate travel times between zones from travel distances. Detours make up a smaller portion of
longer distance trips. Default = 0.0001


Returns
-------
Expand Down Expand Up @@ -135,7 +144,11 @@ def get_possible_zones(
if travel_times is None:
logger.info("Travel time matrix not provided: Creating travel times estimates")
travel_times = zones_to_time_matrix(
zones=boundaries, id_col=zone_id, time_units="m"
zones=boundaries,
id_col=zone_id,
time_units="m",
detour_factor=detour_factor,
decay_rate=decay_rate,
)

list_of_modes = activity_chains["mode"].unique()
Expand Down Expand Up @@ -224,7 +237,7 @@ def get_possible_zones(
travel_times_filtered_mode_time_day = travel_times_filtered_mode
# if mode = car_passenger, we compare to the travel time for car (we don't
# have travel times for car_passenger)
if mode == "car_passenger":
if mode in ("car_passenger", "taxi"):
activity_chains_filtered = activity_chains[
(activity_chains["mode"] == "car")
]
Expand Down
19 changes: 19 additions & 0 deletions src/acbm/assigning/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from matplotlib import patches as mpatches
from shapely.geometry import LineString

from acbm.assigning.utils import _adjust_distance


def plot_workzone_assignment_line(
assignment_results: pd.DataFrame,
Expand Down Expand Up @@ -387,7 +389,10 @@ def plot_scatter_actual_reported(
title_prefix: str,
activity_type: str,
activity_type_col: str,
detour_factor: float,
decay_rate: float,
crs: str,
x_axis_max: float,
save_dir: str | Path | None = None,
display: bool = False,
):
Expand All @@ -404,7 +409,10 @@ def plot_scatter_actual_reported(
- title_prefix: Prefix for the plot titles.
- activity_type: Type of activity to plot.
- activity_type_col: Column name for the activity type.
- detour_factor: Detour factor for the distance adjustment. (see `_adjust_distance`)
- decay_rate: Decay rate for the distance adjustment.
- crs: Coordinate reference system (CRS) of the activities data.
- x_axis_max: Maximum value for the x-axis. (use to remove very high reported distances / times)
- save_dir: Directory to save the plots. If None, plots are not saved.
- display: Whether to display plots by calling `plt.show()`.
"""
Expand Down Expand Up @@ -433,6 +441,13 @@ def plot_scatter_actual_reported(
# calculate the length of the line_geometry in meters
activity_chains_plot["length"] = activity_chains_plot["line_geometry"].length

# Create a column for estimated distance
activity_chains_plot["length"] = activity_chains_plot["length"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# Calculate the number of rows and columns for the subplots. It is a function of the number of modes
nrows = math.ceil(len(activity_chains_plot["mode"].unique()) / 2)
ncols = 2
Expand All @@ -459,6 +474,10 @@ def plot_scatter_actual_reported(
p = np.poly1d(z)
ax.plot(subset_mode[x_col], p(subset_mode[x_col]), "r--")

# Set x-axis limit
if x_axis_max:
ax.set_xlim(0, x_axis_max)

ax.set_title(f"{title_prefix} for mode: {mode}")
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
Expand Down
39 changes: 38 additions & 1 deletion src/acbm/assigning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,39 @@ def _get_activities_per_zone_df(activities_per_zone: dict) -> pd.DataFrame:
return pd.concat(activities_per_zone.values())


def _adjust_distance(
distance: float,
detour_factor: float,
decay_rate: float,
) -> float:
"""
Adjusts euclidian distances by adding a detour factor. We use minkowski distance
and a decay rate, as longer detour makes up a smaller proportion of the total
distance as the distance increases.

Parameters
----------
distance : float
The original distance.
detour_factor : float
The detour factor to be applied.
decay_rate : float
The decay rate to be applied.

Returns
-------
float
The adjusted distance.
"""
return distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))


def zones_to_time_matrix(
zones: gpd.GeoDataFrame,
time_units: str,
id_col: Optional[str] = None,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculates the distance matrix between the centroids of the given zones and returns it as a DataFrame. The matrix also adds
Expand All @@ -305,6 +334,8 @@ def zones_to_time_matrix(
The name of the column in the zones GeoDataFrame to use as the ID. If None, the index values are used. Default is None.
time_units: str, optional
The units to use for the travel time. Options are 's' for seconds and 'm' for minutes.
detour_factor: float, optional
The detour factor to apply to the distance. Default is 1.56.
Returns
-------
pd.DataFrame
Expand All @@ -321,6 +352,11 @@ def zones_to_time_matrix(
distances = distances.stack().reset_index()
distances.columns = [f"{id_col}_from", f"{id_col}_to", "distance"]

# adjust distance column by adding a detour factor
distances["distance"] = distances["distance"].apply(
lambda d: _adjust_distance(d, detour_factor, decay_rate)
)

# replace the index values with the id_col values if id_col is specified
if id_col is not None:
# create a mapping from index to id_col values
Expand Down Expand Up @@ -447,7 +483,8 @@ def intrazone_time(zones: gpd.GeoDataFrame, key_column: str) -> dict:
# Calculate the area of each zone
zones["area"] = zones["geometry"].area
# Calculate the average distance within each zone
zones["average_dist"] = np.sqrt(zones["area"]) / 2
# sqrt(area) / 2 would be radius
zones["average_dist"] = np.sqrt(zones["area"]) / 1.5

# Mode speeds in m/s
mode_speeds_mps = {
Expand Down
11 changes: 10 additions & 1 deletion src/acbm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ class MatchingParams(BaseModel):
chunk_size: int = 50_000


@dataclass(frozen=True)
class FeasibleAssignmentParams(BaseModel):
detour_factor: float
decay_rate: float


@dataclass(frozen=True)
class WorkAssignmentParams(BaseModel):
use_percentages: bool
Expand All @@ -51,10 +57,13 @@ class Postprocessing(BaseModel):

class Config(BaseModel):
parameters: Parameters = Field(description="Config: parameters.")
matching: MatchingParams = Field(description="Config: parameters for matching.")
feasible_assignment: FeasibleAssignmentParams = Field(
description="Config: parameters for assignment of feasible zones."
)
work_assignment: WorkAssignmentParams = Field(
description="Config: parameters for work assignment."
)
matching: MatchingParams = Field(description="Config: parameters for matching.")
postprocessing: Postprocessing = Field(
description="Config: parameters for postprocessing."
)
Expand Down
Loading
Loading