-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revise travel day #82
base: 53-paths-with-79
Are you sure you want to change the base?
Changes from 16 commits
6164a28
0cea61f
88f4911
e0fc2f9
ed45b92
ce5a261
ced648d
5f17c56
4b6db8c
1f5e0b9
4ce4b1a
46946d1
4301493
be2eb6b
748260a
5105dab
32dfe63
6ebd5be
1dd281b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
from acbm.assigning.utils import cols_for_assignment_all | ||
from acbm.cli import acbm_cli | ||
from acbm.config import load_and_setup_config | ||
from acbm.matching import MatcherExact, match_individuals | ||
from acbm.matching import MatcherExact, match_individuals, match_remaining_individuals | ||
from acbm.preprocessing import ( | ||
count_per_group, | ||
nts_filter_by_region, | ||
|
@@ -16,6 +16,10 @@ | |
transform_by_group, | ||
truncate_values, | ||
) | ||
from acbm.utils import ( | ||
households_with_common_travel_days, | ||
households_with_travel_days_in_nts_weeks, | ||
) | ||
|
||
|
||
@acbm_cli | ||
|
@@ -222,23 +226,48 @@ def get_interim_path( | |
|
||
logger.info("Filtering NTS data by specified year(s)") | ||
|
||
logger.info(f"Total NTS households: {nts_households.shape[0]:,.0f}") | ||
years = config.parameters.nts_years | ||
|
||
nts_individuals = nts_filter_by_year(nts_individuals, psu, years) | ||
nts_households = nts_filter_by_year(nts_households, psu, years) | ||
nts_trips = nts_filter_by_year(nts_trips, psu, years) | ||
|
||
logger.info( | ||
f"Total NTS households (after year filtering): {nts_households.shape[0]:,.0f}" | ||
) | ||
# #### Filter by geography | ||
# | ||
|
||
regions = config.parameters.nts_regions | ||
|
||
nts_individuals = nts_filter_by_region(nts_individuals, psu, regions) | ||
nts_households = nts_filter_by_region(nts_households, psu, regions) | ||
nts_trips = nts_filter_by_region(nts_trips, psu, regions) | ||
|
||
# Create dictionaries of key value pairs | ||
logger.info( | ||
f"Total NTS households (after region filtering): {nts_households.shape[0]:,.0f}" | ||
) | ||
|
||
# Ensure that the households have at least one day in `nts_days_of_week` that | ||
# all household members have trips for | ||
if config.parameters.common_household_day: | ||
hids = households_with_common_travel_days( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gets the subset of households where all individuals have a common |
||
nts_trips, config.parameters.nts_days_of_week | ||
) | ||
else: | ||
hids = households_with_travel_days_in_nts_weeks( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gets the subset of households where all individuals have any |
||
nts_trips, config.parameters.nts_days_of_week | ||
) | ||
|
||
# Subset individuals and households given filtering of trips | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subset to the households subset above before matching to ensure matches have required |
||
nts_trips = nts_trips[ | ||
nts_trips["HouseholdID"].isin(hids) | ||
& nts_trips["TravDay"].isin(config.parameters.nts_days_of_week) | ||
] | ||
nts_individuals = nts_individuals[nts_individuals["HouseholdID"].isin(hids)] | ||
nts_households = nts_households[nts_households["HouseholdID"].isin(hids)] | ||
|
||
# Create dictionaries of key value pairs | ||
""" | ||
guide to the dictionaries: | ||
|
||
|
@@ -924,6 +953,19 @@ def get_interim_path( | |
show_progress=True, | ||
) | ||
|
||
# match remaining individuals | ||
remaining_ids = spc_edited.loc[ | ||
~spc_edited.index.isin(matches_ind.keys()), "id" | ||
].to_list() | ||
matches_remaining_ind = match_remaining_individuals( | ||
df1=spc_edited, | ||
df2=nts_individuals, | ||
matching_columns=["age_group", "sex"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could update the matching_columns here to enable more precision when not using households: e.g. for employment status and urban rural classification. |
||
remaining_ids=remaining_ids, | ||
show_progress=True, | ||
) | ||
matches_ind.update(matches_remaining_ind) | ||
|
||
Comment on lines
+956
to
+968
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add matching for any remaining individuals that were part of unmatched households. It might be worth considering if this should be more configurable. |
||
# save random sample | ||
with open( | ||
get_interim_path("matches_ind_level_categorical_random_sample.pkl"), "wb" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
from acbm.assigning.utils import ( | ||
activity_chains_for_assignment, | ||
get_activities_per_zone, | ||
get_chosen_day, | ||
intrazone_time, | ||
replace_intrazonal_travel_time, | ||
zones_to_time_matrix, | ||
|
@@ -28,11 +29,15 @@ def main(config_file): | |
activity_chains = activity_chains_for_assignment(config) | ||
logger.info("Activity chains loaded") | ||
|
||
# Filter to a specific day of the week | ||
logger.info("Filtering activity chains to a specific day of the week") | ||
activity_chains = activity_chains[ | ||
activity_chains["TravDay"] == config.parameters.nts_day_of_week | ||
] | ||
|
||
# Generate random sample of days by household | ||
get_chosen_day(config).to_parquet( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Get a chosen day for each individual to represent a "sample" day given the configured days of the week and whether the household is configured to share a common day. |
||
config.output_path / "interim" / "assigning" / "chosen_trav_day.parquet" | ||
) | ||
|
||
# Filter to chosen day | ||
activity_chains = activity_chains_for_assignment(config, subset_to_chosen_day=True) | ||
|
||
# --- Study area boundaries | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import geopandas as gpd | ||
import numpy as np | ||
import pandas as pd | ||
import polars as pl | ||
|
||
from acbm.config import Config | ||
|
||
|
@@ -11,25 +12,26 @@ def cols_for_assignment_all() -> list[str]: | |
"""Gets activity chains with subset of columns required for assignment.""" | ||
return [ | ||
*cols_for_assignment_edu(), | ||
"household", | ||
"oact", | ||
"nts_ind_id", | ||
"nts_hh_id", | ||
"age_years", | ||
"TripDisIncSW", | ||
"tet", | ||
"DayID", | ||
] | ||
|
||
|
||
def cols_for_assignment_edu() -> list[str]: | ||
"""Gets activity chains with subset of columns required for assignment.""" | ||
return [ | ||
"id", | ||
"household", | ||
"TravDay", | ||
"OA11CD", | ||
"dact", | ||
"mode", | ||
"tst", | ||
"id", | ||
"seq", | ||
"TripTotalTime", | ||
"education_type", | ||
|
@@ -42,16 +44,26 @@ def cols_for_assignment_work() -> list[str]: | |
|
||
|
||
def activity_chains_for_assignment( | ||
config: Config, columns: list[str] | None = None | ||
config: Config, columns: list[str] | None = None, subset_to_chosen_day: bool = False | ||
) -> pd.DataFrame: | ||
"""Gets activity chains with subset of columns required for assignment.""" | ||
if columns is None: | ||
columns = cols_for_assignment_all() | ||
|
||
return pd.read_parquet( | ||
activity_chains = pd.read_parquet( | ||
config.spc_with_nts_trips_filepath, | ||
columns=columns, | ||
) | ||
if not subset_to_chosen_day: | ||
return activity_chains | ||
|
||
return activity_chains.merge( | ||
pd.read_parquet( | ||
config.output_path / "interim" / "assigning" / "chosen_trav_day.parquet" | ||
), | ||
on=["id", "TravDay"], | ||
how="inner", | ||
) | ||
|
||
|
||
def _map_time_to_day_part( | ||
|
@@ -562,3 +574,115 @@ def replace_intrazonal_travel_time( | |
|
||
# Return the modified DataFrame | ||
return travel_times_copy | ||
|
||
|
||
def get_chosen_day(config: Config) -> pd.DataFrame: | ||
"""Gets the chosen day for population given config.""" | ||
acs = pl.DataFrame(activity_chains_for_assignment(config)) | ||
|
||
if config.parameters.common_household_day: | ||
return ( | ||
acs.join( | ||
acs.group_by("household") | ||
.agg(pl.col("TravDay").unique().sample(1, with_replacement=True)) | ||
.explode("TravDay"), | ||
on=["household", "TravDay"], | ||
how="inner", | ||
) | ||
.select(["id", "TravDay"]) | ||
.unique() | ||
.sort("id") | ||
.to_pandas() | ||
) | ||
|
||
# For any TravDay and modelling increased households | ||
work_days = ( | ||
acs.filter(pl.col("dact").eq("work")) | ||
.group_by("id") | ||
.agg(pl.col("TravDay").unique()) | ||
.select(["id", pl.col("TravDay").list.drop_nulls().list.sample(n=1)]) | ||
.explode("TravDay") | ||
.rename({"TravDay": "TravDayWork"}) | ||
) | ||
non_work_days = ( | ||
acs.filter(~pl.col("dact").eq("work")) | ||
.group_by("id") | ||
.agg(pl.col("TravDay").unique()) | ||
.select(["id", pl.col("TravDay").list.drop_nulls().list.sample(n=1)]) | ||
.explode("TravDay") | ||
.rename({"TravDay": "TravDayNonWork"}) | ||
) | ||
|
||
any_days = ( | ||
acs.group_by("id") | ||
.agg(pl.col("TravDay").unique()) | ||
.select(["id", pl.col("TravDay").list.drop_nulls()]) | ||
.select( | ||
[ | ||
"id", | ||
pl.when(pl.col("TravDay").list.len() > 0) | ||
# Note: this has to be set to with_replacement despite non-empty check | ||
.then(pl.col("TravDay").list.sample(n=1, with_replacement=True)) | ||
.otherwise(None), | ||
] | ||
) | ||
.explode("TravDay") | ||
.rename({"TravDay": "TravDayAny"}) | ||
).sort("id") | ||
|
||
# Combine day choices for different conditions | ||
acs_combine = ( | ||
acs.join(work_days, on="id", how="left", coalesce=True) | ||
.join(non_work_days, on="id", how="left", coalesce=True) | ||
.join(any_days, on="id", how="left", coalesce=True) | ||
.join( | ||
pl.scan_parquet(config.spc_combined_filepath) | ||
.select(["id", "pwkstat"]) | ||
.collect(), | ||
on="id", | ||
) | ||
) | ||
|
||
# Choose a day given pwkstat | ||
acs_combine = acs_combine.with_columns( | ||
[ | ||
# If pwkstat = 1 (full time) | ||
# and a work travel day is available | ||
pl.when(pl.col("pwkstat").eq(1) & pl.col("TravDayWork").is_not_null()) | ||
.then(pl.col("TravDayWork")) | ||
.otherwise( | ||
# If pwkstat = 1 (full time) | ||
# and a work travel day is NOT available | ||
pl.when(pl.col("pwkstat").eq(1) & pl.col("TravDayWork").is_null()) | ||
.then(pl.col("TravDayAny")) | ||
.otherwise( | ||
# If pwkstat = 2 (part time) | ||
# and a work travel day is available | ||
# and a non-work travel day is available | ||
pl.when( | ||
pl.col("pwkstat").eq(2) | ||
& pl.col("TravDayWork").is_not_null() | ||
& pl.col("TravDayNonWork").is_not_null() | ||
) | ||
.then( | ||
# Sample either TravDayWork or TravDayNonWork | ||
# stochastically given config | ||
pl.col("TravDayWork") | ||
# TODO: update from config | ||
if np.random.random() < 1 | ||
else pl.col("TravDayNonWork") | ||
) | ||
.otherwise(pl.col("TravDayAny")) | ||
) | ||
) | ||
.alias("ChosenTravDay") | ||
Comment on lines
+649
to
+678
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Samples a chosen day given an individual's |
||
] | ||
) | ||
|
||
return ( | ||
acs_combine.select(["id", "ChosenTravDay"]) | ||
.unique() | ||
.rename({"ChosenTravDay": "TravDay"}) | ||
.sort("id") | ||
.to_pandas() | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new parameter boolean
common_household_day
determines whether all individuals of the household need to have aTravDay
in common.