Skip to content

Commit

Permalink
write grouped shares in create ref data
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Dec 28, 2023
1 parent 1b35bb7 commit e0af1fe
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
8 changes: 4 additions & 4 deletions matsim/scenariogen/data/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re

# set car avail
df.loc[df.age < 17, "driving_license"] = Availability.NO
_fill(df, "driving_license", Availability.UNKNOWN)
fill(df, "driving_license", Availability.UNKNOWN)

df["car_avail"] = (df.n_cars > 0) & (df.driving_license == Availability.YES)
df["bike_avail"] = (df.n_bikes > 0) | (df.bike_avail == Availability.YES)

# small children don't have pt abo
df.loc[df.age < 6, "pt_abo_avail"] = Availability.NO
_fill(df, "pt_abo_avail", Availability.UNKNOWN)
fill(df, "pt_abo_avail", Availability.UNKNOWN)

# Replace unknown income group
_fill(df, "economic_status", EconomicStatus.UNKNOWN)
fill(df, "economic_status", EconomicStatus.UNKNOWN)

# Large households are underrepresented and capped
df.n_persons = np.minimum(df.n_persons, max_hh_size)
Expand Down Expand Up @@ -121,7 +121,7 @@ def prepare_trips(pp, trips, core_weekday=True):
return df[df.columns[::-1]]


def _fill(df, col, val=None):
def fill(df, col, val=None):
""" Fill null values with dist of the rest (or replace val)"""
if val is not None:
df.loc[df[col] == val, col] = None
Expand Down
33 changes: 20 additions & 13 deletions matsim/scenariogen/data/run_create_ref_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def default_person_filter(df):
def create(survey_dirs, transform_persons, transform_trips,
invalid_trip_handling: InvalidHandling = InvalidHandling.REMOVE_TRIPS,
dist_groups=[0, 1000, 2000, 5000, 10000, 20000, np.inf],
ref_groups: List[Union[str, Tuple[str]]] = None,
ref_groups: List[str] = None,
output_prefix="") -> AggregationResult:
""" Create reference data from survey data.
:param survey_dirs: Directories with survey data
Expand Down Expand Up @@ -161,31 +161,38 @@ def create(survey_dirs, transform_persons, transform_trips,
groups = None
if ref_groups:

groups = []
overall = share.groupby("main_mode").sum().reset_index()

for g in ref_groups:
groups = [overall]

if type(g) is str:
g = [g]
for g in ref_groups:

for x in g:
if x not in persons.columns:
raise ValueError("Column %s not found in persons" % x)
if g not in persons.columns:
raise ValueError("Column %s not found in persons" % g)

aggr = trips.groupby(g + ["main_mode"]).apply(weighted)
aggr = trips.groupby([g] + ["main_mode"]).apply(weighted)
aggr["share"] = aggr.n / aggr.n.sum()
aggr["share"].fillna(0, inplace=True)
aggr.drop(columns=["n"], inplace=True)

# todo only works with one subgroup level
# Normalize per group
for group in aggr.index.get_level_values(0).categories:
for group in set(aggr.index.get_level_values(0)):
sub = aggr.loc[group, :]
sub.share /= sub.share.sum()

groups.append(aggr)
groups.append(aggr.reset_index())

groups = pd.concat(groups, sort=False)

# Reorder columns
groups = groups[ref_groups + ["main_mode", "share"]]

groups.to_csv(output_prefix + "mode_share_per_group_ref.csv", index=False)

# TODO: long format, which might be easier to plot

# TODO groups also by distance group

groups = pd.concat(groups, axis=1)

return AggregationResult(persons, trips, share.groupby("main_mode").sum(), groups=groups)

Expand Down

0 comments on commit e0af1fe

Please sign in to comment.