From 8132db9101f465e462c057830d670b2ce607b00a Mon Sep 17 00:00:00 2001 From: rakow Date: Mon, 16 Dec 2024 22:05:00 +0100 Subject: [PATCH] add household equivalent_size and corresponding calculations --- matsim/scenariogen/data/__init__.py | 6 ++++ matsim/scenariogen/data/formats/srv.py | 35 ++++++++++++---------- matsim/scenariogen/data/preparation.py | 41 ++++++++++++++++---------- 3 files changed, 51 insertions(+), 31 deletions(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 5c6229e..c04c272 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -355,6 +355,12 @@ class Household: location: str zone: str = pd.NA """ A detailed zone, which can be more accurate than location. """ + equivalent_size: float = pd.NA + """ Weighted household size, considering the number of persons > 14 years. + May be used to calculate equivalized income for each person. + + See https://de.wikipedia.org/wiki/%C3%84quivalenzeinkommen + """ income: float = pd.NA geom: object = pd.NA diff --git a/matsim/scenariogen/data/formats/srv.py b/matsim/scenariogen/data/formats/srv.py index 8151049..a0c8378 100644 --- a/matsim/scenariogen/data/formats/srv.py +++ b/matsim/scenariogen/data/formats/srv.py @@ -6,6 +6,7 @@ import pandas as pd from .. import * +from ..preparation import equivalent_household_size # Has households, persons and trips INPUT_FILES = 3 @@ -79,6 +80,8 @@ def convert(data: tuple, regio=None): continue hh_id = str(int(h.HHNR)) + hh_persons = ps[ps.hh_id == hh_id] + hhs.append( Household( hh_id, @@ -89,11 +92,12 @@ def convert(data: tuple, regio=None): pint(h.V_ANZ_MOT125 + h.V_ANZ_MOPMOT + h.V_ANZ_SONST), SrV2018.parking_position(h.V_STELLPL1), SrV2018.economic_status(h.E_OEK_STATUS if "E_OEK_STATUS" in hh.keys() else -1, h.V_EINK, - ps[ps.hh_id == hh_id]), + hh_persons), SrV2018.household_type(h.E_HHTYP), SrV2018.region_type(h, regio, random_state), h.ST_CODE_NAME, zone=SrV2018.parse_zone(h), + equivalent_size=equivalent_household_size(hh_persons), income=SrV2018.income(h.V_EINK), ) ) @@ -205,10 +209,7 @@ def economic_status(status, eink, persons): if eink == 1 or eink == 2: return EconomicStatus.VERY_LOW - children = (persons.age < 14).sum() - rest = len(persons) - children - 1 - - w = 0.3 * children + 1 + 0.5 * rest + w = equivalent_household_size(persons) if eink == 3: if w < 1.3: @@ -418,26 +419,30 @@ def region_type(d, regio, random_state): @staticmethod def income(x): + # Original groups are + # 0, 500, 900, 1500, 2000, 2600, 3000, 3600, 4600, 5600 + + # Return the mean between each grop if x == 1: - return 0 + return 250 elif x == 2: - return 500 + return 700 elif x == 3: - return 900 + return 1200 elif x == 4: - return 1500 + return 1750 elif x == 5: - return 2000 + return 2300 elif x == 6: - return 2600 + return 2800 elif x == 7: - return 3000 + return 3300 elif x == 8: - return 3600 + return 4100 elif x == 9: - return 4600 + return 5100 elif x == 10: - return 5600 + return 6100 return -1 diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index eb3c5a0..0788171 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -16,15 +16,6 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re """ df = pp.join(hh, on="hh_id", lsuffix="hh_") - # Replace unknown income group - fill(df, "income", -1) - - # Replace unknown economic status - df["economic_status"] = df.apply( - lambda x: income_to_economic_status(x.income, df[df.hh_id == x.hh_id]) - if x.economic_status == EconomicStatus.UNKNOWN else x.economic_status, axis=1 - ) - # Augment data using p_weight if augment > 1: df = augment_persons(df, augment) @@ -143,12 +134,35 @@ def augment_persons(pp, factor=1, permute_age=0.5): # Filter invalid options return duplicated[check_age_employment(None, duplicated)] +def equivalent_household_size(persons): + """ Calculate equivalent household size, see https://stat.fi/meta/kas/ekvivalentti_tu_en.html + + :param persons: persons of one household + """ + children = (persons.age < 14).sum() + rest = len(persons) - children - 1 + + w = 0.3 * children + 1 + 0.5 * rest + + return w -def income_to_economic_status(income, persons): +def compute_economic_status(df): + """ Compute missing economic status value. + + :param df: joined table of persons and households + """ + + # Replace unknown economic status + df["economic_status"] = df.apply( + lambda x: income_to_economic_status(x.income, x.equivalent_size) + if x.economic_status == EconomicStatus.UNKNOWN else x.economic_status, axis=1 + ) + +def income_to_economic_status(income, w): """ Convert income to economic status :param income: income in Euro - :param persons: persons table + :param w: equivalent_size """ if income < 0: @@ -157,11 +171,6 @@ def income_to_economic_status(income, persons): # Calculated according to Srv 2018 # https://tu-dresden.de/bu/verkehr/ivs/srv/ressourcen/dateien/SrV2018_Tabellenbericht_Oberzentren_500TEW-_flach.pdf?lang=de - children = (persons.age < 14).sum() - rest = len(persons) - children - 1 - - w = 0.3 * children + 1 + 0.5 * rest - if income < 1500: if w < 1.3: return EconomicStatus.LOW