From 461c9a273e05d1751affdd99a561c1edb297cb80 Mon Sep 17 00:00:00 2001 From: rakow Date: Mon, 9 Oct 2023 13:45:51 +0200 Subject: [PATCH] methods for create ref data and some fixes --- matsim/scenariogen/data/__init__.py | 5 ++-- matsim/scenariogen/data/formats/srv.py | 3 +++ .../scenariogen/data/run_create_ref_data.py | 26 ++++++++++++------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 38c2dfb..15da0be 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -42,13 +42,12 @@ def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame]: for d in dirs: - files = [] - for format in (srv, mid): + files = [] + # Collect all files for each format for f in os.scandir(d): - fp = f.name if not f.is_file(): continue if format.is_format(f): diff --git a/matsim/scenariogen/data/formats/srv.py b/matsim/scenariogen/data/formats/srv.py index 3ed4d99..9c5bf25 100644 --- a/matsim/scenariogen/data/formats/srv.py +++ b/matsim/scenariogen/data/formats/srv.py @@ -2,6 +2,9 @@ import os +import pandas as pd +import numpy as np + from .. import * # Has households, persons and trips diff --git a/matsim/scenariogen/data/run_create_ref_data.py b/matsim/scenariogen/data/run_create_ref_data.py index 2769f54..6043f12 100644 --- a/matsim/scenariogen/data/run_create_ref_data.py +++ b/matsim/scenariogen/data/run_create_ref_data.py @@ -65,21 +65,21 @@ def summarize_mode_usage(x, trips): def setup(parser: argparse.ArgumentParser): parser.add_argument("dirs", nargs="+", help="Directories with survey data") -def create(hh, persons, trips): - pass - # TODO +def default_person_filter(df): + """ Default person filter for reference data. """ + return df[df.present_on_day & (df.reporting_day <= 4)] -def main(args): - all_hh, all_persons, all_trips = read_all(args.dirs) + +def create(survey_dirs, transform_persons): + """ Create reference data from survey data. """ + + all_hh, all_persons, all_trips = read_all(survey_dirs) # Filter person ad trips for area df = all_persons.join(all_hh, on="hh_id") - # TODO: configurable filter - persons = df[df.present_on_day & - (df.reporting_day <= 4) & - (df.region_type == 1)] + persons = transform_persons(df) if transform_persons is not None else df # TODO: configurable attributes persons["age_group"] = pd.cut(persons.age, [0, 18, 66, np.inf], labels=["0 - 17", "18 - 65", "65+"], right=False) @@ -103,7 +103,8 @@ def main(args): aggr["share"] = aggr.n / aggr.n.sum() aggr["share"].fillna(0, inplace=True) - aggr = aggr.drop(columns=["n"]) + share = aggr.drop(columns=["n"]) + aggr = share.copy() # TODO: configurable output @@ -124,3 +125,8 @@ def main(args): aggr.to_csv("mode_users_ref.csv") # TODO: ref data per attribute ? + return persons, trips, share.groupby("main_mode").sum().drop(columns=["mean_dist"]) + + +def main(args): + create(args.dirs, default_person_filter)