From e0af1fef96425f952618ba8d5507c6f9ceee245b Mon Sep 17 00:00:00 2001 From: rakow Date: Thu, 28 Dec 2023 11:35:03 +0100 Subject: [PATCH] write grouped shares in create ref data --- matsim/scenariogen/data/preparation.py | 8 ++--- .../scenariogen/data/run_create_ref_data.py | 33 +++++++++++-------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index a2ac3b2..52fc328 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -22,17 +22,17 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re # set car avail df.loc[df.age < 17, "driving_license"] = Availability.NO - _fill(df, "driving_license", Availability.UNKNOWN) + fill(df, "driving_license", Availability.UNKNOWN) df["car_avail"] = (df.n_cars > 0) & (df.driving_license == Availability.YES) df["bike_avail"] = (df.n_bikes > 0) | (df.bike_avail == Availability.YES) # small children don't have pt abo df.loc[df.age < 6, "pt_abo_avail"] = Availability.NO - _fill(df, "pt_abo_avail", Availability.UNKNOWN) + fill(df, "pt_abo_avail", Availability.UNKNOWN) # Replace unknown income group - _fill(df, "economic_status", EconomicStatus.UNKNOWN) + fill(df, "economic_status", EconomicStatus.UNKNOWN) # Large households are underrepresented and capped df.n_persons = np.minimum(df.n_persons, max_hh_size) @@ -121,7 +121,7 @@ def prepare_trips(pp, trips, core_weekday=True): return df[df.columns[::-1]] -def _fill(df, col, val=None): +def fill(df, col, val=None): """ Fill null values with dist of the rest (or replace val)""" if val is not None: df.loc[df[col] == val, col] = None diff --git a/matsim/scenariogen/data/run_create_ref_data.py b/matsim/scenariogen/data/run_create_ref_data.py index aefc0ff..66c8046 100644 --- a/matsim/scenariogen/data/run_create_ref_data.py +++ b/matsim/scenariogen/data/run_create_ref_data.py @@ -98,7 +98,7 @@ def default_person_filter(df): def create(survey_dirs, transform_persons, transform_trips, invalid_trip_handling: InvalidHandling = InvalidHandling.REMOVE_TRIPS, dist_groups=[0, 1000, 2000, 5000, 10000, 20000, np.inf], - ref_groups: List[Union[str, Tuple[str]]] = None, + ref_groups: List[str] = None, output_prefix="") -> AggregationResult: """ Create reference data from survey data. :param survey_dirs: Directories with survey data @@ -161,31 +161,38 @@ def create(survey_dirs, transform_persons, transform_trips, groups = None if ref_groups: - groups = [] + overall = share.groupby("main_mode").sum().reset_index() - for g in ref_groups: + groups = [overall] - if type(g) is str: - g = [g] + for g in ref_groups: - for x in g: - if x not in persons.columns: - raise ValueError("Column %s not found in persons" % x) + if g not in persons.columns: + raise ValueError("Column %s not found in persons" % g) - aggr = trips.groupby(g + ["main_mode"]).apply(weighted) + aggr = trips.groupby([g] + ["main_mode"]).apply(weighted) aggr["share"] = aggr.n / aggr.n.sum() aggr["share"].fillna(0, inplace=True) aggr.drop(columns=["n"], inplace=True) - # todo only works with one subgroup level # Normalize per group - for group in aggr.index.get_level_values(0).categories: + for group in set(aggr.index.get_level_values(0)): sub = aggr.loc[group, :] sub.share /= sub.share.sum() - groups.append(aggr) + groups.append(aggr.reset_index()) + + groups = pd.concat(groups, sort=False) + + # Reorder columns + groups = groups[ref_groups + ["main_mode", "share"]] + + groups.to_csv(output_prefix + "mode_share_per_group_ref.csv", index=False) + + # TODO: long format, which might be easier to plot + + # TODO groups also by distance group - groups = pd.concat(groups, axis=1) return AggregationResult(persons, trips, share.groupby("main_mode").sum(), groups=groups)