From 5da9b72a451d8ab765dd5ba31494feddadddf0a2 Mon Sep 17 00:00:00 2001 From: rakow Date: Tue, 26 Mar 2024 17:47:47 +0100 Subject: [PATCH] use income column in prepare persons --- matsim/scenariogen/data/formats/srv.py | 2 +- matsim/scenariogen/data/preparation.py | 104 +++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 7 deletions(-) diff --git a/matsim/scenariogen/data/formats/srv.py b/matsim/scenariogen/data/formats/srv.py index 42dde72..8151049 100644 --- a/matsim/scenariogen/data/formats/srv.py +++ b/matsim/scenariogen/data/formats/srv.py @@ -446,7 +446,7 @@ def parse_zone(h, prefix=""): ob = parse_int_str(getattr(h, prefix + "OBERBEZIRK")) zone = pd.NA - if ob: + if ob and hasattr(h, prefix + "UNTERBEZIRK"): zone = ob ub = parse_int_str(getattr(h, prefix + "UNTERBEZIRK")) if ub: diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index ebb118d..2029a75 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -15,6 +15,15 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re """ Cleans common data errors and fill missing values """ df = pp.join(hh, on="hh_id", lsuffix="hh_") + # Replace unknown income group + fill(df, "income", -1) + + # Replace unknown economic status + df["economic_status"] = df.apply( + lambda x: income_to_economic_status(x.income, df[df.hh_id == x.hh_id]) + if x.economic_status == EconomicStatus.UNKNOWN else x.economic_status, axis=1 + ) + # Augment data using p_weight if augment > 1: df = augment_persons(df, augment) @@ -31,9 +40,6 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re df.loc[df.age < 6, "pt_abo_avail"] = Availability.NO fill(df, "pt_abo_avail", Availability.UNKNOWN) - # Replace unknown income group - fill(df, "economic_status", EconomicStatus.UNKNOWN) - # Large households are underrepresented and capped df.n_persons = np.minimum(df.n_persons, max_hh_size) @@ -64,19 +70,22 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re return df + def bins_to_labels(bins): """ Convert bins to labels """ - res = ["%.0f - %.0f" % (bins[i], bins[i + 1]) for i in range(len(bins) - 1)] + res = ["%.0f - %.0f" % (bins[i], bins[i + 1]) for i in range(len(bins) - 1)] if bins[-1] == np.inf: res[-1] = "%.0f+" % bins[-2] return res + def cut(x, bins): """ Cut x into bind and return labels """ return pd.cut(x, bins, labels=bins_to_labels(bins), right=False) + def augment_persons(pp, factor=1, permute_age=0.5): """ Augment persons using p weight @@ -101,6 +110,88 @@ def augment_persons(pp, factor=1, permute_age=0.5): return duplicated[check_age_employment(None, duplicated)] +def income_to_economic_status(income, persons): + """ Convert income to economic status + + :param income: income in Euro + :param persons: persons table + """ + + if income < 0: + return EconomicStatus.UNKNOWN + + # Calculated according to Srv 2018 + # https://tu-dresden.de/bu/verkehr/ivs/srv/ressourcen/dateien/SrV2018_Tabellenbericht_Oberzentren_500TEW-_flach.pdf?lang=de + + children = (persons.age < 14).sum() + rest = len(persons) - children - 1 + + w = 0.3 * children + 1 + 0.5 * rest + + if income < 1500: + if w < 1.3: + return EconomicStatus.LOW + + return EconomicStatus.VERY_LOW + + elif income < 2000: + if w < 1.3: + return EconomicStatus.MEDIUM + elif w < 1.6: + return EconomicStatus.LOW + + return EconomicStatus.VERY_LOW + + elif income < 2600: + if w < 1.6: + return EconomicStatus.MEDIUM + elif w < 2.3: + return EconomicStatus.LOW + + return EconomicStatus.VERY_LOW + + elif income < 3000: + if w < 1.3: + return EconomicStatus.HIGH + elif w < 2.3: + return EconomicStatus.MEDIUM + elif w < 3.0: + return EconomicStatus.LOW + + return EconomicStatus.VERY_LOW + + elif income < 3600: + if w < 1.6: + return EconomicStatus.HIGH + elif w < 2.3: + return EconomicStatus.MEDIUM + elif w < 3.5: + return EconomicStatus.LOW + + return EconomicStatus.VERY_LOW + + elif income < 4600: + if w < 2.1: + return EconomicStatus.HIGH + elif w < 3.0: + return EconomicStatus.MEDIUM + + return EconomicStatus.LOW + + elif income < 5600: + if w < 1.3: + return EconomicStatus.VERY_HIGH + if w < 2.8: + return EconomicStatus.HIGH + return EconomicStatus.MEDIUM + + else: + if w < 2.5: + return EconomicStatus.VERY_HIGH + + return EconomicStatus.HIGH + + def prepare_trips(pp, trips, core_weekday=True): """ Create trip data frame """ @@ -286,7 +377,8 @@ def calc_commute(pp, tt): edu.groupby("p_id").agg(commute_dist=("gis_length", "mean"), weight=("t_weight", "max")) -def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.DataFrame, max_dist=1000) -> Tuple[float, int]: +def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.DataFrame, max_dist=1000) -> Tuple[ + float, int]: """ Calculate number of short distance trips needed to add to match required share """ target_share = float(ref_trips[ref_trips.gis_length < (max_dist / 1000)].t_weight.sum() / ref_trips.t_weight.sum()) @@ -296,4 +388,4 @@ def calc_needed_short_distance_trips(ref_trips: pd.DataFrame, sim_trips: pd.Data current_share = len(short_trips) / len(sim_trips) num_trips = (len(short_trips) - len(sim_trips) * target_share) / (target_share - 1) - return target_share, num_trips \ No newline at end of file + return target_share, num_trips