From 89209d0796c8d1c7c8e411627ac3968e5b998f42 Mon Sep 17 00:00:00 2001 From: simei94 Date: Wed, 6 Dec 2023 11:53:03 -0600 Subject: [PATCH 01/16] mexico city activity + persons sampling from survey data --- matsim/scenariogen/__main__.py | 28 +- matsim/scenariogen/data/__init__.py | 18 +- matsim/scenariogen/data/formats/eodmx.py | 415 ++++++++++++++++++ matsim/scenariogen/data/preparation.py | 19 +- .../data/run_extract_activities.py | 3 +- requirements.txt | 8 +- 6 files changed, 463 insertions(+), 28 deletions(-) create mode 100644 matsim/scenariogen/data/formats/eodmx.py diff --git a/matsim/scenariogen/__main__.py b/matsim/scenariogen/__main__.py index 90ffdee..4de8c16 100644 --- a/matsim/scenariogen/__main__.py +++ b/matsim/scenariogen/__main__.py @@ -3,14 +3,14 @@ import argparse -from .data import run_create_ref_data +# from matsim.scenariogen.data import run_create_ref_data from .data import run_extract_activities -from .data import run_lookup_regiostar -from .network import run_collect_results -from .network import run_edges as sumo_edges -from .network import run_intersections as sumo_intersections -from .network import run_routes as sumo_routes -from .network import run_train_model +# from matsim.scenariogen.data import run_lookup_regiostar +# from matsim.scenariogen.network import run_collect_results +# from matsim.scenariogen.network import run_edges as sumo_edges +# from matsim.scenariogen.network import run_intersections as sumo_intersections +# from matsim.scenariogen.network import run_routes as sumo_routes +# from matsim.scenariogen.network import run_train_model def _add(subparsers, m): @@ -28,11 +28,11 @@ def main(): subparsers = parser.add_subparsers(title="Subcommands") - _add(subparsers, sumo_edges) - _add(subparsers, sumo_routes) - _add(subparsers, sumo_intersections) - _add(subparsers, run_train_model) - _add(subparsers, run_collect_results) + # _add(subparsers, sumo_edges) + # _add(subparsers, sumo_routes) + # _add(subparsers, sumo_intersections) + # _add(subparsers, run_train_model) + # _add(subparsers, run_collect_results) try: from .network import run_opt_freespeed @@ -41,8 +41,8 @@ def main(): print("Opt freespeed not available", e) _add(subparsers, run_extract_activities) - _add(subparsers, run_create_ref_data) - _add(subparsers, run_lookup_regiostar) + # _add(subparsers, run_create_ref_data) + # _add(subparsers, run_lookup_regiostar) args = parser.parse_args() diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index cd58298..b4b94c0 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -30,7 +30,7 @@ def _batch(iterable: list, max_batch_size: int): def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame]: """ Scan directories and read everything into one dataframe """ - from .formats import srv, mid + from .formats import srv, mid, eodmx hh = [] pp = [] @@ -42,7 +42,7 @@ def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame]: for d in dirs: - for format in (srv, mid): + for format in (srv, mid, eodmx): files = [] @@ -112,14 +112,16 @@ class HouseholdType(AutoNameLowerStrEnum): MULTI_W_CHILDREN = auto() MULTI_WO_CHILDREN = auto() SINGLE = auto() + UNKNOWN = auto() class EconomicStatus(AutoNameLowerStrEnum): - VERY_LOW = auto() + # VERY_LOW = auto() LOW = auto() - MEDIUM = auto() + MEDIUMLOW= auto() + MEDIUMHIGH= auto() HIGH = auto() - VERY_HIGH = auto() + # VERY_HIGH = auto() UNKNOWN = auto() @@ -262,6 +264,7 @@ class SourceDestinationGroup(AutoNameLowerStrEnum): OTHER_WORK = auto() WORK_OTHER = auto() OTHER_OTHER = auto() + VISIT_OTHER = auto() UNKNOWN = auto() @@ -327,6 +330,8 @@ class Trip: purpose: Purpose sd_group: SourceDestinationGroup valid: bool + dep_district: str = "" + arr_district: str = "" @dataclass @@ -340,3 +345,6 @@ class Activity: leg_dist: float leg_duration: float leg_mode: TripMode + dep_district: str = "" + arr_district: str = "" + departure: int = 0 diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py new file mode 100644 index 0000000..4dd1e0e --- /dev/null +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -0,0 +1,415 @@ +# -*- coding: utf-8 -*- +import math +import os +import random +import sys + +import pandas as pd +import numpy as np + +from .. import * + +# Has households (hogares), persons, trips and viviendas +INPUT_FILES = 4 + + +def is_format(f: os.DirEntry): + fp = f.name + if not f.path.endswith(".csv"): + return False + if "MiD" in f.name or "SrV" in f.name: + return False + + return "thogar" in fp or "tsdem" in fp or "tviaje" or "tvivienda" in fp + + +def read_raw(household_file, person_file, trip_file, vivienda_file): + """ Read the input files into format used by conversion """ + + hh = pd.read_csv(household_file, encoding="utf-8", delimiter=",", decimal=".", low_memory=False, quoting=2) + + p = pd.read_csv(person_file, encoding="utf-8", delimiter=",", decimal=".", low_memory=False, quoting=2) + + t = pd.read_csv(trip_file, encoding="utf-8", delimiter=",", decimal=".", low_memory=False, quoting=2) + + vv = pd.read_csv(vivienda_file, encoding="utf-8", delimiter=",", decimal=".", low_memory=False, quoting=2) + + return hh, p, t, vv + + +def convert(data: tuple, regio=None): + """ Convert eod data to standardized survey format """ + + (hh, pp, tt, vv) = data + + ps = [] + for p in pp.itertuples(): + ps.append( + Person( + p_id=str(int(p.id_soc)), + p_weight=p.factor, + hh_id=str(int(p.id_hog)), + age=int(p.edad), + gender=EOD2017.gender(int(p.sexo)), + employment=EOD2017.employment(None if math.isnan(p.p3_7) else int(p.p3_7), int(p.edad)), + # no data on if person is able to be mobile or not, so always mobile + restricted_mobility=False, + driving_license=EOD2017.driv_lic_avail(p.edad), + car_avail=EOD2017.veh_avail(pint(hh[hh.id_hog == p.id_hog].p2_1_1)), + bike_avail=EOD2017.veh_avail(pint(hh[hh.id_hog == p.id_hog].p2_1_3)), + # pt abo always available as there are no abos in cdmx / zmvm + pt_abo_avail=Availability.YES, + # always true as mobile persons without trips will be filtered out anyway (later) + mobile_on_day= True if p.p4_2 == 1 else False, + # same as mobile_on_day as there is no specific question on this + present_on_day= True if p.p4_2 == 1 else False, + # there is no data on which weekday it is, so we just assume it always is a tuesday = valid + reporting_day= 2, + n_trips= 0 if math.isnan(p.p5_4) else pint(p.p5_4) + ) + ) + + ps = pd.DataFrame(ps).set_index("p_id") + + hhs = [] + for h in hh.itertuples(): + + hh_id = str(pint(h.id_hog)) + hhs.append( + Household( + hh_id=hh_id, + h_weight=h.factor, + n_persons=pint(vv[vv.id_viv == h.id_viv].p1_1), + n_cars=pint(h.p2_1_1), + n_bikes=pint(h.p2_1_3), + n_other_vehicles=pint(h.p2_1_2), + # no data on parking position -> all NA + car_parking=ParkingPosition.NA, + economic_status=EOD2017.economic_status(pint(h.estrato)), + # no info on hh type given in data + type=HouseholdType.UNKNOWN, + region_type=EOD2017.region_type(str(pint(h.ent)).zfill(3)), + location="ZMVM", + ) + ) + + ts = [] + for t in tt.itertuples(): + + # only trips mo-thu are relevant + if t.p5_3 != 1: + continue + + hh_id = str(pint(ps[ps.index == str(pint(t.id_soc))].hh_id)) + # hh_id = "10" + departure = EOD2017.calc_minutes(t.p5_9_1, t.p5_9_2) + duration = EOD2017.calc_minutes(t.p5_10_1, t.p5_10_2) - departure + ts.append( + Trip( + t_id=hh_id + "_" + str(pint(t.id_soc)) + "_" + str(pint(t.id_via)), + t_weight=t.factor, + p_id=str(pint(t.id_soc)), + hh_id=hh_id, + n=pint(t.n_via), + # map "during the week" as tue, everything else to sat + day_of_week=2 if t.p5_3 == 1 else 6, + departure=departure, + duration=duration, + # this is a dummy trip length. It will be calculated later on, as it is not given as a survey var + gis_length=float(0), + main_mode=EOD2017.trip_mode(t), + purpose=EOD2017.trip_purpose(t.p5_13), + sd_group=EOD2017.determine_sdGroup(int(t.p5_6)), + # Trip is valid if length and duration are present + valid=(str(pint(t.dto_origen)).zfill(3) != "999" and str(pint(t.dto_dest)).zfill(3) != "999") and duration > 0, + dep_district=str(int(t.dto_origen)).zfill(3), + arr_district=str(int(t.dto_dest)).zfill(3) + ) + ) + + return pd.DataFrame(hhs).set_index("hh_id"), ps, pd.DataFrame(ts).set_index("t_id") + + +def pint(x): + """ Convert to positive integer""" + return max(0, int(x)) + + +class EOD2017: + """ Maps EOD data to standard format""" + + # Modal split CDMX hard coded + # https://semovi.cdmx.gob.mx/storage/app/media/diagnostico-tecnico-de-movilidad-pim.pdf p 41 + MODALSPLIT = { + # car + 'p5_14_01': 0.2199, + # colectivo / microbus + 'p5_14_02': 0.3678, + # taxi internet + 'p5_14_03': 0.0062, + #taxi street + 'p5_14_04': 0.0482, + #metro + 'p5_14_05': 0.2175, + # autobus rtp + 'p5_14_06': 0.0202, + # bike + 'p5_14_07': 0.0129, + #autobus + 'p5_14_08': 0.0321, + # motorbike + 'p5_14_09': 0.0087, + #trolebus + 'p5_14_10': 0.0074, + #metrobus + 'p5_14_11': 0.0409, + #train ligero + 'p5_14_12': 0.0057, + #suburban train + 'p5_14_13': 0.0080, + #walk + 'p5_14_14': 0.2324, + #mexicable + 'p5_14_15' : 0.00036, + # bike taxi + 'p5_14_16': 0.0021, + # mototaxi + 'p5_14_17': 0.0055, + # school transport + 'p5_14_18': 0.0076, + # transporte personal + 'p5_14_19': 0.00036, + # other: 0.0011 split into other, mexicable and transporte personal + 'p5_14_20': 0.00036, + } + + # @staticmethod + # def parking_position(x): + # + # if x == 1: + # return ParkingPosition.PRIVATE + # elif x == 2: + # return ParkingPosition.PUBLIC + # elif x == 3: + # return ParkingPosition.DIFFERENT + # + # return ParkingPosition.NA + + @staticmethod + def economic_status(status): + + if status == 1: + return EconomicStatus.LOW + elif status == 2: + return EconomicStatus.MEDIUMLOW + elif status == 3: + return EconomicStatus.MEDIUMHIGH + elif status == 4: + return EconomicStatus.HIGH + + return EconomicStatus.UNKNOWN + + @staticmethod + def gender(x): + if x == 1: + return Gender.M + elif x == 2: + return Gender.F + + return Gender.OTHER + + @staticmethod + def employment(x, age): + # no data available on part_time_jobs and trainees + if x == 1 or x == 2: + return Employment.JOB_FULL_TIME + elif x == 3: + return Employment.UNEMPLOYED + elif x == 4 and age < 18: + return Employment.SCHOOL + elif x == 4 and age >= 18: + return Employment.STUDENT + elif x == 5: + return Employment.HOMEMAKER + elif x == 6: + return Employment.RETIREE + elif x == 7: + return Employment.UNEMPLOYED + elif x == 8: + return Employment.UNEMPLOYED + # the dataset appears to fill all children until 11 years old with employment == "" or None + # the same goes for people with age 99 + # we do the distribution ourselves according to mexican law + elif (x is None) and age < 3: + return Employment.CHILD + elif (x is None) and 3 < age <= 11: + return Employment.SCHOOL + elif (x is None) and age == 99: + return Employment.RETIREE + + return Employment.OTHER + + @staticmethod + def calc_minutes(hours, minutes): + # time values of travel begin / end are required in minutes + + time = int(hours) * 60 + int(minutes) + + return time + + @staticmethod + def driv_lic_avail(age): + if age < 18: + return Availability.NO + elif age >= 18: + return Availability.YES + + return Availability.UNKNOWN + + @staticmethod + def veh_avail(x): + # 9 is "I do not know" for some questions + if 9 > x > 0: + return Availability.YES + elif x == 0: + return Availability.NO + + return Availability.UNKNOWN + + @staticmethod + def trip_mode(trip): + + modes = [] + modesCount = [] + + # the numerics in range() are the indexes of variables for transport mode usage of the survey + for i in range(18, 58): + if i % 2 == 0: + modes.append(pint(trip[i])) + if i % 2 != 0: + modesCount.append(0 if math.isnan(trip[i]) else pint(trip[i])) + + if modes.count(1) == 1: + # only one mode was used + return EOD2017.mode_from_var_name("p5_14_" + str(modes.index(1) + 1).zfill(2)) + + elif modes.count(1) > 1 and modesCount.count(max(modesCount)) == 1: + # set mode with heaviest usage as main mode + index = modesCount.index(max(modesCount)) + + if modes[index] != 1: + sys.exit("Error in dataset. The transport mode seems to have been used (p5_15_x = 1), but the pair variable (p5_14_x = 2) says it has not been used!") + + varName = "p5_14_" + str(index + 1).zfill(2) + + return EOD2017.mode_from_var_name(varName) + + elif modes.count(1) > 1 and modesCount.count(max(modesCount)) > 1: + # several modes are used with the same number of legs -> use general modal share of cdmx + varNames = [] + + indexWalk = 13 + + # if e.g. walk has 1 leg + metro has one leg: walk is access / egress mode -> walk must not be the main mode + if modes[indexWalk] == 1: + del modesCount[indexWalk] + modesCount.insert(indexWalk,0) + + j = 0 + maxCount = modesCount.count(max(modesCount)) + + for item in modesCount: + if item == max(modesCount): + varNames.append("p5_14_" + str(modesCount.index(item) + 1).zfill(2)) + modesCount.insert(modesCount.index(item),0) + modesCount.remove(item) + j = j+1 + + if j == maxCount: + break + + weights = [] + + for var in varNames: + weights.append(EOD2017.MODALSPLIT[var]) + + # ramdom.choices() appears to produce a list with 1 single element. hence the [0] + randomVarName = random.choices(varNames, weights)[0] + + return EOD2017.mode_from_var_name(randomVarName) + + + @staticmethod + def mode_from_var_name(varName): + if varName == "p5_14_01": + return TripMode.CAR + elif (varName == "p5_14_02" or varName == "p5_14_03" or varName == "p5_14_04" or varName == "p5_14_05" or varName == "p5_14_06" or + varName == "p5_14_08" or varName == "p5_14_10" or varName == "p5_14_11" or varName == "p5_14_12" or varName == "p5_14_13" or + varName == "p5_14_15" or varName == "p5_14_16" or varName == "p5_14_17"): + return TripMode.PT + elif varName == "p5_14_07": + return TripMode.BIKE + elif varName == "p5_14_09": + return TripMode.MOTORCYCLE + elif varName == "p5_14_14": + return TripMode.WALK + elif varName == "p5_14_18" or varName == "p5_14_19": + return TripMode.RIDE + return TripMode.OTHER + + @staticmethod + def trip_purpose(x): + + if x == 1: + return Purpose.HOME + elif x == 2: + return Purpose.WORK + elif x == 3: + return Purpose.EDU + elif x == 4: + return Purpose.SHOPPING + elif x == 5: + return Purpose.LEISURE + elif x == 6: + return Purpose.TRANSPORT + elif x == 7 or x == 8: + return Purpose.PERSONAL_BUSINESS + elif x == 9: + return Purpose.LEISURE + elif x == 10: + Purpose.OTHER + + return Purpose.OTHER + + @staticmethod + def region_type(ent): + if ent == "009": + # cdmx + return 1 + elif ent == "013" or ent == "015": + # outside of cdmx = hidalgo or edomex + return 3 + + return 0 + + @staticmethod + def determine_sdGroup(x): + # only destination will be assigned as it is needed for the first act of the day + # the following is based on assumptions, as e.g. not all trips from / to a shopping center do have the purpose "shopping" + # but also could be "work" or other purposes + if x == 1: + return SourceDestinationGroup.HOME_OTHER + if x == 2: + return SourceDestinationGroup.EDU_HOME + if x == 3 or x == 4 or x == 12: + return SourceDestinationGroup.WORK_OTHER + if x == 5: + return SourceDestinationGroup.SHOP_HOME + if x == 6 or x == 9 or x == 10 or x == 14: + return SourceDestinationGroup.LEISURE_HOME + if x == 7: + return SourceDestinationGroup.VISIT_OTHER + + return SourceDestinationGroup.OTHER_OTHER + + diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 2caa432..a7c6c6f 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -17,11 +17,13 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re # Augment data using p_weight if augment > 1: - df = augment_persons(df, augment) + # in the cdmx case we dont need to do p_weight * augment = 5 (see method augment_persons) + # the sum of all person weights already sums up to the total (more or less) no of inhab. of ZMVM (~21 mio) + df = augment_persons(df) df = shuffle(df, random_state=0).reset_index(drop=True) # set car avail - df.loc[df.age < 17, "driving_license"] = Availability.NO + df.loc[df.age < 18, "driving_license"] = Availability.NO _fill(df, "driving_license", Availability.UNKNOWN) df["car_avail"] = (df.n_cars > 0) & (df.driving_license == Availability.YES) @@ -57,6 +59,7 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re # Regions other than 1 and 3 are massively under-represented # Regions are reduced to 1 (Berlin) and 3 (Outside Berlin) + # for the mexico city metropolitan area dataset this is already done in previous steps: 1 mexico city, 3 outside of mexico city df.loc[df.region_type != 1, "region_type"] = 3 # Maximum age is 99, also to be able to encode age with two tokens @@ -161,11 +164,11 @@ def a_id(t_i): return "%s_%d" % (p.Index, t_i) if len(trips) == 0: - acts.append(Activity(a_id(0), p.Index, 0, Purpose.HOME, 1440, 0, 0, TripMode.OTHER)) + acts.append(Activity(a_id(0), p.Index, 0, Purpose.HOME, 1440, 0, 0, TripMode.OTHER, "999", "999", 0)) else: acts.append( Activity(a_id(0), p.Index, 0, trips.iloc[0].sd_group.source(), trips.iloc[0].departure, 0, 0, - TripMode.OTHER)) + TripMode.OTHER, trips.iloc[0].dep_district, trips.iloc[0].arr_district, trips.iloc[0].departure)) for i in range(len(trips) - 1): t0 = trips.iloc[i] @@ -177,7 +180,7 @@ def a_id(t_i): valid = False acts.append(Activity(a_id(i + 1), p.Index, i + 1, t0.purpose, - duration, t0.gis_length, t0.duration, t0.main_mode)) + duration, t0.gis_length, t0.duration, t0.main_mode, t0.dep_district, t0.arr_district, t0.departure)) if len(trips) > 1: i += 1 @@ -189,15 +192,17 @@ def a_id(t_i): # Duration is set to rest of day acts.append( - Activity(a_id(i + 1), p.Index, i + 1, tl.purpose, 1440, tl.gis_length, tl.duration, tl.main_mode)) + Activity(a_id(i + 1), p.Index, i + 1, tl.purpose + , 1440, tl.gis_length, tl.duration, tl.main_mode, tl.dep_district, tl.arr_district, tl.departure)) if valid: res.extend(acts) return res - with mp.Pool(8) as pool: + with mp.Pool(16) as pool: docs = pool.map(convert, np.array_split(all_persons, 16)) + # docs = pool.map(convert, np.array_split(all_persons, 64)) result = functools.reduce(lambda a, b: a + b, docs) activities = pd.DataFrame(result).set_index("a_id") diff --git a/matsim/scenariogen/data/run_extract_activities.py b/matsim/scenariogen/data/run_extract_activities.py index ee2f646..4ac2c7a 100644 --- a/matsim/scenariogen/data/run_extract_activities.py +++ b/matsim/scenariogen/data/run_extract_activities.py @@ -25,10 +25,11 @@ def main(args): print("Written survey csvs") - df = prepare_persons(hh, persons, trips, augment=5, core_weekday=True, remove_with_invalid_trips=True) + df = prepare_persons(hh, persons, trips, augment=5, max_hh_size=7, core_weekday=True, remove_with_invalid_trips=False) df.to_csv(args.output + "-persons.csv", index_label="idx") print("Created %d synthetics persons" % len(df)) activities = create_activities(df, trips, include_person_context=False, cut_groups=False) + print("About to write %d activities to csv, this might take a while." % len(activities)) activities.to_csv(args.output + "-activities.csv", index=False) diff --git a/requirements.txt b/requirements.txt index 9cb49ac..02acd3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,10 @@ geopandas>=0.6.0 shapely>=1.8.0 xopen>=1.7.0 protobuf>=3.20.0 -optuna>=3.3.0 \ No newline at end of file +optuna>=3.3.0 +numpy~=1.26.2 +pyproj~=3.6.1 +scikit-learn~=1.3.2 +tqdm~=4.66.1 +PyYAML~=6.0 +setuptools~=63.2.0 \ No newline at end of file From f1856fe55af1a1e2a26417396729ac90ba48d613 Mon Sep 17 00:00:00 2001 From: simei94 Date: Wed, 6 Dec 2023 12:24:38 -0600 Subject: [PATCH 02/16] add mexico city metropolitan area format --- matsim/scenariogen/data/formats/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/data/formats/__init__.py b/matsim/scenariogen/data/formats/__init__.py index 9d826bc..5c4b392 100644 --- a/matsim/scenariogen/data/formats/__init__.py +++ b/matsim/scenariogen/data/formats/__init__.py @@ -1,3 +1,3 @@ # -*- coding: utf-8 -*- -__all__ = ["srv", "mid"] \ No newline at end of file +__all__ = ["srv", "mid", "eodmx"] \ No newline at end of file From 271a7df0b4314036ab20d8200c0a9e15305ad36e Mon Sep 17 00:00:00 2001 From: simei94 Date: Wed, 6 Dec 2023 13:45:31 -0600 Subject: [PATCH 03/16] hotfix --- matsim/scenariogen/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/__main__.py b/matsim/scenariogen/__main__.py index 4de8c16..f42aa31 100644 --- a/matsim/scenariogen/__main__.py +++ b/matsim/scenariogen/__main__.py @@ -4,7 +4,7 @@ import argparse # from matsim.scenariogen.data import run_create_ref_data -from .data import run_extract_activities +from data import run_extract_activities # from matsim.scenariogen.data import run_lookup_regiostar # from matsim.scenariogen.network import run_collect_results # from matsim.scenariogen.network import run_edges as sumo_edges From c2a02bd8a1fe380eaf59711435434a3b99af9c17 Mon Sep 17 00:00:00 2001 From: simei94 Date: Thu, 7 Dec 2023 15:40:25 -0600 Subject: [PATCH 04/16] delete non-necessary code segment --- matsim/scenariogen/data/preparation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index a7c6c6f..884bd3b 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -17,7 +17,7 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re # Augment data using p_weight if augment > 1: - # in the cdmx case we dont need to do p_weight * augment = 5 (see method augment_persons) + # in the cdmx case we do not need to do p_weight * augment = 5 (see method augment_persons) # the sum of all person weights already sums up to the total (more or less) no of inhab. of ZMVM (~21 mio) df = augment_persons(df) df = shuffle(df, random_state=0).reset_index(drop=True) @@ -202,7 +202,6 @@ def a_id(t_i): with mp.Pool(16) as pool: docs = pool.map(convert, np.array_split(all_persons, 16)) - # docs = pool.map(convert, np.array_split(all_persons, 64)) result = functools.reduce(lambda a, b: a + b, docs) activities = pd.DataFrame(result).set_index("a_id") From b6c8ebe23879be37cd51d6c1e63824e2e1a4d588 Mon Sep 17 00:00:00 2001 From: simei94 Date: Mon, 11 Dec 2023 19:15:11 -0600 Subject: [PATCH 05/16] some added data + more detailed trip validation --- matsim/scenariogen/data/__init__.py | 2 ++ matsim/scenariogen/data/formats/eodmx.py | 35 ++++++++++++++---------- matsim/scenariogen/data/preparation.py | 15 +++++----- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index b4b94c0..6c45d95 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -332,6 +332,7 @@ class Trip: valid: bool dep_district: str = "" arr_district: str = "" + arrival: int = 0 @dataclass @@ -348,3 +349,4 @@ class Activity: dep_district: str = "" arr_district: str = "" departure: int = 0 + start_time: int = 0 diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index 4dd1e0e..a329f43 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -103,7 +103,8 @@ def convert(data: tuple, regio=None): hh_id = str(pint(ps[ps.index == str(pint(t.id_soc))].hh_id)) # hh_id = "10" departure = EOD2017.calc_minutes(t.p5_9_1, t.p5_9_2) - duration = EOD2017.calc_minutes(t.p5_10_1, t.p5_10_2) - departure + arrival = EOD2017.calc_minutes(t.p5_10_1, t.p5_10_2) + duration = arrival - departure ts.append( Trip( t_id=hh_id + "_" + str(pint(t.id_soc)) + "_" + str(pint(t.id_via)), @@ -121,9 +122,10 @@ def convert(data: tuple, regio=None): purpose=EOD2017.trip_purpose(t.p5_13), sd_group=EOD2017.determine_sdGroup(int(t.p5_6)), # Trip is valid if length and duration are present - valid=(str(pint(t.dto_origen)).zfill(3) != "999" and str(pint(t.dto_dest)).zfill(3) != "999") and duration > 0, + valid=EOD2017.trip_valid(str(pint(t.dto_origen)).zfill(3), str(pint(t.dto_dest)).zfill(3), duration), dep_district=str(int(t.dto_origen)).zfill(3), - arr_district=str(int(t.dto_dest)).zfill(3) + arr_district=str(int(t.dto_dest)).zfill(3), + arrival=arrival ) ) @@ -183,18 +185,6 @@ class EOD2017: 'p5_14_20': 0.00036, } - # @staticmethod - # def parking_position(x): - # - # if x == 1: - # return ParkingPosition.PRIVATE - # elif x == 2: - # return ParkingPosition.PUBLIC - # elif x == 3: - # return ParkingPosition.DIFFERENT - # - # return ParkingPosition.NA - @staticmethod def economic_status(status): @@ -412,4 +402,19 @@ def determine_sdGroup(x): return SourceDestinationGroup.OTHER_OTHER + @staticmethod + def trip_valid(dep_district, arr_district, duration): + # validation: trip must not be from or to unknown district, nor have a duration of 0 or lower (invalid) + if dep_district == "999": + return False + if arr_district == "999": + return False + if duration <= 0: + return False + # it is assumed that trips to another district take at least 10 minutes. This filters out a share of about 0.4% (205314 / 49144928) of trips. + if dep_district != arr_district and duration < 10: + return False + + return True + diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 884bd3b..44199d5 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -164,11 +164,12 @@ def a_id(t_i): return "%s_%d" % (p.Index, t_i) if len(trips) == 0: - acts.append(Activity(a_id(0), p.Index, 0, Purpose.HOME, 1440, 0, 0, TripMode.OTHER, "999", "999", 0)) + acts.append(Activity(a_id=a_id(0), p_id=p.Index, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, + leg_duration=0, leg_mode=TripMode.OTHER, dep_district="999", arr_district="999", departure=0, start_time=0)) else: acts.append( - Activity(a_id(0), p.Index, 0, trips.iloc[0].sd_group.source(), trips.iloc[0].departure, 0, 0, - TripMode.OTHER, trips.iloc[0].dep_district, trips.iloc[0].arr_district, trips.iloc[0].departure)) + Activity(a_id=a_id(0), p_id=p.Index, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, + leg_mode=TripMode.OTHER, dep_district=trips.iloc[0].dep_district, arr_district=trips.iloc[0].arr_district, departure=trips.iloc[0].departure, start_time=0)) for i in range(len(trips) - 1): t0 = trips.iloc[i] @@ -179,8 +180,8 @@ def a_id(t_i): if duration < 0 or t0.gis_length < 0: valid = False - acts.append(Activity(a_id(i + 1), p.Index, i + 1, t0.purpose, - duration, t0.gis_length, t0.duration, t0.main_mode, t0.dep_district, t0.arr_district, t0.departure)) + acts.append(Activity(a_id=a_id(i + 1), p_id=p.Index, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, + leg_duration=t0.duration, leg_mode=t0.main_mode, dep_district=t0.dep_district, arr_district=t0.arr_district, departure=t0.departure, start_time=t0.arrival)) if len(trips) > 1: i += 1 @@ -192,8 +193,8 @@ def a_id(t_i): # Duration is set to rest of day acts.append( - Activity(a_id(i + 1), p.Index, i + 1, tl.purpose - , 1440, tl.gis_length, tl.duration, tl.main_mode, tl.dep_district, tl.arr_district, tl.departure)) + Activity(a_id=a_id(i + 1), p_id=p.Index, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, + leg_duration=tl.duration, leg_mode=tl.main_mode, dep_district=tl.dep_district, arr_district=tl.arr_district, departure=tl.departure, start_time=tl.arrival, end_time=1440)) if valid: res.extend(acts) From b6c15750eca16c923811ca8405b3b7547b2c7675 Mon Sep 17 00:00:00 2001 From: simei94 Date: Tue, 12 Dec 2023 10:30:53 -0600 Subject: [PATCH 06/16] delete end_time --- matsim/scenariogen/data/preparation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 44199d5..ecaf953 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -194,7 +194,7 @@ def a_id(t_i): # Duration is set to rest of day acts.append( Activity(a_id=a_id(i + 1), p_id=p.Index, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, - leg_duration=tl.duration, leg_mode=tl.main_mode, dep_district=tl.dep_district, arr_district=tl.arr_district, departure=tl.departure, start_time=tl.arrival, end_time=1440)) + leg_duration=tl.duration, leg_mode=tl.main_mode, dep_district=tl.dep_district, arr_district=tl.arr_district, departure=tl.departure, start_time=tl.arrival)) if valid: res.extend(acts) From 38c95e6a8f60acb46e89a50f790a20042a906d42 Mon Sep 17 00:00:00 2001 From: simei94 Date: Fri, 15 Dec 2023 15:23:19 -0600 Subject: [PATCH 07/16] correction of id assignment --- matsim/scenariogen/data/preparation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index ecaf953..25c2e48 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -142,7 +142,8 @@ def convert(persons): if "present_on_day" in persons.keys() and not p.present_on_day: continue - p_id = p.p_id if "p_id" in persons.keys() else p.Index + # p_id = p.p_id if "p_id" in persons.keys() else p.Index + p_id = p.p_id try: # trips = tt[tt.p_id == p_id] @@ -164,11 +165,11 @@ def a_id(t_i): return "%s_%d" % (p.Index, t_i) if len(trips) == 0: - acts.append(Activity(a_id=a_id(0), p_id=p.Index, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, + acts.append(Activity(a_id=a_id(0), p_id=p_id, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, dep_district="999", arr_district="999", departure=0, start_time=0)) else: acts.append( - Activity(a_id=a_id(0), p_id=p.Index, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, + Activity(a_id=a_id(0), p_id=p_id, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, dep_district=trips.iloc[0].dep_district, arr_district=trips.iloc[0].arr_district, departure=trips.iloc[0].departure, start_time=0)) for i in range(len(trips) - 1): @@ -180,7 +181,7 @@ def a_id(t_i): if duration < 0 or t0.gis_length < 0: valid = False - acts.append(Activity(a_id=a_id(i + 1), p_id=p.Index, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, + acts.append(Activity(a_id=a_id(i + 1), p_id=p_id, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, leg_duration=t0.duration, leg_mode=t0.main_mode, dep_district=t0.dep_district, arr_district=t0.arr_district, departure=t0.departure, start_time=t0.arrival)) if len(trips) > 1: @@ -193,7 +194,7 @@ def a_id(t_i): # Duration is set to rest of day acts.append( - Activity(a_id=a_id(i + 1), p_id=p.Index, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, + Activity(a_id=a_id(i + 1), p_id=p_id, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, leg_duration=tl.duration, leg_mode=tl.main_mode, dep_district=tl.dep_district, arr_district=tl.arr_district, departure=tl.departure, start_time=tl.arrival)) if valid: From 7db47b0d62d72e4923dc4f00f02d1721f6008949 Mon Sep 17 00:00:00 2001 From: simei94 Date: Mon, 18 Dec 2023 16:10:23 -0600 Subject: [PATCH 08/16] comments + corrections of data handling --- matsim/scenariogen/data/__init__.py | 9 +++++---- matsim/scenariogen/data/formats/eodmx.py | 5 ++--- matsim/scenariogen/data/preparation.py | 16 ++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 6c45d95..93dc1d1 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -338,15 +338,16 @@ class Trip: @dataclass class Activity: """ Activity information (including leg) """ + # all leg information relates to the leg leading to the activity a_id: str - p_id: str + p_index: str n: int type: Purpose duration: int leg_dist: float leg_duration: float leg_mode: TripMode - dep_district: str = "" - arr_district: str = "" - departure: int = 0 + leg_dep_district: str = "" + leg_arr_district: str = "" + leg_departure: int = 0 start_time: int = 0 diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index a329f43..9209052 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -101,13 +101,12 @@ def convert(data: tuple, regio=None): continue hh_id = str(pint(ps[ps.index == str(pint(t.id_soc))].hh_id)) - # hh_id = "10" departure = EOD2017.calc_minutes(t.p5_9_1, t.p5_9_2) arrival = EOD2017.calc_minutes(t.p5_10_1, t.p5_10_2) duration = arrival - departure ts.append( Trip( - t_id=hh_id + "_" + str(pint(t.id_soc)) + "_" + str(pint(t.id_via)), + t_id=str(pint(t.id_via)), t_weight=t.factor, p_id=str(pint(t.id_soc)), hh_id=hh_id, @@ -384,7 +383,7 @@ def region_type(ent): @staticmethod def determine_sdGroup(x): - # only destination will be assigned as it is needed for the first act of the day + # here, only source will be assigned as it is needed for the first act of the day # the following is based on assumptions, as e.g. not all trips from / to a shopping center do have the purpose "shopping" # but also could be "work" or other purposes if x == 1: diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 25c2e48..77a289e 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -165,12 +165,12 @@ def a_id(t_i): return "%s_%d" % (p.Index, t_i) if len(trips) == 0: - acts.append(Activity(a_id=a_id(0), p_id=p_id, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, - leg_duration=0, leg_mode=TripMode.OTHER, dep_district="999", arr_district="999", departure=0, start_time=0)) + acts.append(Activity(a_id=a_id(0), p_index=p.Index, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, + leg_duration=0, leg_mode=TripMode.OTHER, leg_dep_district="999", leg_arr_district="999", leg_departure=0, start_time=0)) else: acts.append( - Activity(a_id=a_id(0), p_id=p_id, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, - leg_mode=TripMode.OTHER, dep_district=trips.iloc[0].dep_district, arr_district=trips.iloc[0].arr_district, departure=trips.iloc[0].departure, start_time=0)) + Activity(a_id=a_id(0), p_index=p.Index, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, + leg_mode=TripMode.OTHER, leg_dep_district=trips.iloc[0].dep_district, leg_arr_district=trips.iloc[0].dep_district, leg_departure=0, start_time=0)) for i in range(len(trips) - 1): t0 = trips.iloc[i] @@ -181,8 +181,8 @@ def a_id(t_i): if duration < 0 or t0.gis_length < 0: valid = False - acts.append(Activity(a_id=a_id(i + 1), p_id=p_id, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, - leg_duration=t0.duration, leg_mode=t0.main_mode, dep_district=t0.dep_district, arr_district=t0.arr_district, departure=t0.departure, start_time=t0.arrival)) + acts.append(Activity(a_id=a_id(i + 1), p_index=p.Index, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, + leg_duration=t0.duration, leg_mode=t0.main_mode, leg_dep_district=t0.dep_district, leg_arr_district=t0.arr_district, leg_departure=t0.departure, start_time=t0.arrival)) if len(trips) > 1: i += 1 @@ -194,8 +194,8 @@ def a_id(t_i): # Duration is set to rest of day acts.append( - Activity(a_id=a_id(i + 1), p_id=p_id, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, - leg_duration=tl.duration, leg_mode=tl.main_mode, dep_district=tl.dep_district, arr_district=tl.arr_district, departure=tl.departure, start_time=tl.arrival)) + Activity(a_id=a_id(i + 1), p_index=p.Index, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, + leg_duration=tl.duration, leg_mode=tl.main_mode, leg_dep_district=tl.dep_district, leg_arr_district=tl.arr_district, leg_departure=tl.departure, start_time=tl.arrival)) if valid: res.extend(acts) From a513794b20e8b735a8da5e6d442705d51bbe9466 Mon Sep 17 00:00:00 2001 From: simei94 Date: Mon, 18 Dec 2023 17:47:21 -0600 Subject: [PATCH 09/16] add column --- matsim/scenariogen/data/__init__.py | 1 + matsim/scenariogen/data/formats/eodmx.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 93dc1d1..0839371 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -312,6 +312,7 @@ class Person: present_on_day: bool reporting_day: int n_trips: int + home_district: str = "" @dataclass diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index 9209052..eaba8cf 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -64,8 +64,9 @@ def convert(data: tuple, regio=None): # same as mobile_on_day as there is no specific question on this present_on_day= True if p.p4_2 == 1 else False, # there is no data on which weekday it is, so we just assume it always is a tuesday = valid - reporting_day= 2, - n_trips= 0 if math.isnan(p.p5_4) else pint(p.p5_4) + reporting_day=2, + n_trips=0 if math.isnan(p.p5_4) else pint(p.p5_4), + home_district=str(int(p.distrito)).zfill(3) ) ) From 617de189c3ca20a2cae78da9fbc3c22586b88b5c Mon Sep 17 00:00:00 2001 From: simei94 Date: Tue, 19 Dec 2023 17:22:32 -0600 Subject: [PATCH 10/16] do not save persons which have only invalid trips --- matsim/scenariogen/data/preparation.py | 24 ++++++++++++------- .../data/run_extract_activities.py | 10 ++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 77a289e..74da243 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -153,6 +153,7 @@ def convert(persons): trips = tt.iloc[:0, :].copy() if (~trips.valid).any(): + persons.drop(p.Index, inplace=True) continue if core_weekday: @@ -162,14 +163,14 @@ def convert(persons): # id generator def a_id(t_i): - return "%s_%d" % (p.Index, t_i) + return "%s_%d" % (p.idx, t_i) if len(trips) == 0: - acts.append(Activity(a_id=a_id(0), p_index=p.Index, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, + acts.append(Activity(a_id=a_id(0), p_index=p.idx, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, leg_dep_district="999", leg_arr_district="999", leg_departure=0, start_time=0)) else: acts.append( - Activity(a_id=a_id(0), p_index=p.Index, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, + Activity(a_id=a_id(0), p_index=p.idx, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, leg_dep_district=trips.iloc[0].dep_district, leg_arr_district=trips.iloc[0].dep_district, leg_departure=0, start_time=0)) for i in range(len(trips) - 1): @@ -181,7 +182,7 @@ def a_id(t_i): if duration < 0 or t0.gis_length < 0: valid = False - acts.append(Activity(a_id=a_id(i + 1), p_index=p.Index, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, + acts.append(Activity(a_id=a_id(i + 1), p_index=p.idx, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, leg_duration=t0.duration, leg_mode=t0.main_mode, leg_dep_district=t0.dep_district, leg_arr_district=t0.arr_district, leg_departure=t0.departure, start_time=t0.arrival)) if len(trips) > 1: @@ -194,17 +195,22 @@ def a_id(t_i): # Duration is set to rest of day acts.append( - Activity(a_id=a_id(i + 1), p_index=p.Index, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, + Activity(a_id=a_id(i + 1), p_index=p.idx, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, leg_duration=tl.duration, leg_mode=tl.main_mode, leg_dep_district=tl.dep_district, leg_arr_district=tl.arr_district, leg_departure=tl.departure, start_time=tl.arrival)) if valid: res.extend(acts) - return res + return res, persons + result = [] + cleaned = pd.DataFrame() with mp.Pool(16) as pool: docs = pool.map(convert, np.array_split(all_persons, 16)) - result = functools.reduce(lambda a, b: a + b, docs) + + for element in docs: + result.extend(element[0]) + cleaned = pd.concat([cleaned, element[1]], ignore_index=True) activities = pd.DataFrame(result).set_index("a_id") # Reverse columns because it will be reversed again at the end @@ -212,8 +218,10 @@ def a_id(t_i): persons = all_persons.iloc[:, ::-1].drop(columns=["p_id"], errors="ignore") if include_person_context: + persons = cleaned.iloc[:, ::-1].drop(columns=["p_id"], errors="ignore") df = activities.join(persons, on="p_id", rsuffix="_p") else: + persons = cleaned df = activities df = df.drop(columns=["mobile_on_day", "p_weight", "hh_id", "present_on_day"], errors="ignore") @@ -225,7 +233,7 @@ def a_id(t_i): df.leg_dist = DistanceGroup.cut(df.leg_dist) # reverse columns so activities are at the end - return df.iloc[:, ::-1] + return df.iloc[:, ::-1], persons def check_age_employment(column_names, df): diff --git a/matsim/scenariogen/data/run_extract_activities.py b/matsim/scenariogen/data/run_extract_activities.py index 4ac2c7a..bea5a77 100644 --- a/matsim/scenariogen/data/run_extract_activities.py +++ b/matsim/scenariogen/data/run_extract_activities.py @@ -26,10 +26,12 @@ def main(args): print("Written survey csvs") df = prepare_persons(hh, persons, trips, augment=5, max_hh_size=7, core_weekday=True, remove_with_invalid_trips=False) + df = df.reset_index(names=["idx"]) - df.to_csv(args.output + "-persons.csv", index_label="idx") - print("Created %d synthetics persons" % len(df)) - - activities = create_activities(df, trips, include_person_context=False, cut_groups=False) + activities, persons = create_activities(df, trips, include_person_context=False, cut_groups=False) print("About to write %d activities to csv, this might take a while." % len(activities)) activities.to_csv(args.output + "-activities.csv", index=False) + persons = persons.set_index("idx") + print("Created %d synthetics persons. About to write them to csv, this might take a while." % len(persons)) + persons.to_csv(args.output + "-persons.csv", index_label="idx") + From fcb0bf9917e8fcfb502c87ee37d01e11192f15da Mon Sep 17 00:00:00 2001 From: simei94 Date: Wed, 20 Dec 2023 19:05:15 -0600 Subject: [PATCH 11/16] colectivo handled separately from other pt modes --- matsim/scenariogen/data/__init__.py | 1 + matsim/scenariogen/data/formats/eodmx.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 0839371..e427365 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -191,6 +191,7 @@ class TripMode(AutoNameLowerStrEnum): PT = auto() MOTORCYCLE = auto() OTHER = auto() + COLECTIVO = auto() class DistanceGroup(AutoNameLowerStrEnum): diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index eaba8cf..ba5cdef 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -333,7 +333,10 @@ def trip_mode(trip): def mode_from_var_name(varName): if varName == "p5_14_01": return TripMode.CAR - elif (varName == "p5_14_02" or varName == "p5_14_03" or varName == "p5_14_04" or varName == "p5_14_05" or varName == "p5_14_06" or + # colectivo will be handled as single mode from now on, as it needs to be routed separately from "normal" pt -sme1223 + elif varName == "p5_14_02": + return TripMode.COLECTIVO + elif (varName == "p5_14_03" or varName == "p5_14_04" or varName == "p5_14_05" or varName == "p5_14_06" or varName == "p5_14_08" or varName == "p5_14_10" or varName == "p5_14_11" or varName == "p5_14_12" or varName == "p5_14_13" or varName == "p5_14_15" or varName == "p5_14_16" or varName == "p5_14_17"): return TripMode.PT From 17321069f3f43f6d2b8f53669f99cce44c30a513 Mon Sep 17 00:00:00 2001 From: simei94 Date: Thu, 18 Jan 2024 14:19:13 -0600 Subject: [PATCH 12/16] changes after comments on PR --- matsim/scenariogen/__main__.py | 28 ++++++++++++------------ matsim/scenariogen/data/__init__.py | 28 ++++++++++++------------ matsim/scenariogen/data/formats/eodmx.py | 2 +- matsim/scenariogen/data/preparation.py | 16 +++++++++----- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/matsim/scenariogen/__main__.py b/matsim/scenariogen/__main__.py index f42aa31..3a56118 100644 --- a/matsim/scenariogen/__main__.py +++ b/matsim/scenariogen/__main__.py @@ -3,14 +3,14 @@ import argparse -# from matsim.scenariogen.data import run_create_ref_data +from matsim.scenariogen.data import run_create_ref_data from data import run_extract_activities -# from matsim.scenariogen.data import run_lookup_regiostar -# from matsim.scenariogen.network import run_collect_results -# from matsim.scenariogen.network import run_edges as sumo_edges -# from matsim.scenariogen.network import run_intersections as sumo_intersections -# from matsim.scenariogen.network import run_routes as sumo_routes -# from matsim.scenariogen.network import run_train_model +from matsim.scenariogen.data import run_lookup_regiostar +from matsim.scenariogen.network import run_collect_results +from matsim.scenariogen.network import run_edges as sumo_edges +from matsim.scenariogen.network import run_intersections as sumo_intersections +from matsim.scenariogen.network import run_routes as sumo_routes +from matsim.scenariogen.network import run_train_model def _add(subparsers, m): @@ -28,11 +28,11 @@ def main(): subparsers = parser.add_subparsers(title="Subcommands") - # _add(subparsers, sumo_edges) - # _add(subparsers, sumo_routes) - # _add(subparsers, sumo_intersections) - # _add(subparsers, run_train_model) - # _add(subparsers, run_collect_results) + _add(subparsers, sumo_edges) + _add(subparsers, sumo_routes) + _add(subparsers, sumo_intersections) + _add(subparsers, run_train_model) + _add(subparsers, run_collect_results) try: from .network import run_opt_freespeed @@ -41,8 +41,8 @@ def main(): print("Opt freespeed not available", e) _add(subparsers, run_extract_activities) - # _add(subparsers, run_create_ref_data) - # _add(subparsers, run_lookup_regiostar) + _add(subparsers, run_create_ref_data) + _add(subparsers, run_lookup_regiostar) args = parser.parse_args() diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index e427365..627c6ef 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -116,12 +116,12 @@ class HouseholdType(AutoNameLowerStrEnum): class EconomicStatus(AutoNameLowerStrEnum): - # VERY_LOW = auto() + VERY_LOW = auto() LOW = auto() - MEDIUMLOW= auto() - MEDIUMHIGH= auto() + MEDIUM_LOW = auto() + MEDIUM_HIGH = auto() HIGH = auto() - # VERY_HIGH = auto() + VERY_HIGH = auto() UNKNOWN = auto() @@ -191,7 +191,8 @@ class TripMode(AutoNameLowerStrEnum): PT = auto() MOTORCYCLE = auto() OTHER = auto() - COLECTIVO = auto() + # This transport mode represents what in english is known as shared taxi / taxibus / minibus / colectivo in spanish: + TAXIBUS = auto() class DistanceGroup(AutoNameLowerStrEnum): @@ -313,7 +314,7 @@ class Person: present_on_day: bool reporting_day: int n_trips: int - home_district: str = "" + home_district: str = None @dataclass @@ -332,24 +333,23 @@ class Trip: purpose: Purpose sd_group: SourceDestinationGroup valid: bool - dep_district: str = "" - arr_district: str = "" - arrival: int = 0 + dep_district: str = None + arr_district: str = None @dataclass class Activity: - """ Activity information (including leg) """ - # all leg information relates to the leg leading to the activity + """ Activity information (including leg) + all leg information relates to the leg leading to the activity """ a_id: str - p_index: str + p_id: str n: int type: Purpose duration: int leg_dist: float leg_duration: float leg_mode: TripMode - leg_dep_district: str = "" - leg_arr_district: str = "" + leg_dep_district: str = None + leg_arr_district: str = None leg_departure: int = 0 start_time: int = 0 diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index ba5cdef..719efd1 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -335,7 +335,7 @@ def mode_from_var_name(varName): return TripMode.CAR # colectivo will be handled as single mode from now on, as it needs to be routed separately from "normal" pt -sme1223 elif varName == "p5_14_02": - return TripMode.COLECTIVO + return TripMode.TAXIBUS elif (varName == "p5_14_03" or varName == "p5_14_04" or varName == "p5_14_05" or varName == "p5_14_06" or varName == "p5_14_08" or varName == "p5_14_10" or varName == "p5_14_11" or varName == "p5_14_12" or varName == "p5_14_13" or varName == "p5_14_15" or varName == "p5_14_16" or varName == "p5_14_17"): diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py index 74da243..9309ef7 100644 --- a/matsim/scenariogen/data/preparation.py +++ b/matsim/scenariogen/data/preparation.py @@ -166,11 +166,11 @@ def a_id(t_i): return "%s_%d" % (p.idx, t_i) if len(trips) == 0: - acts.append(Activity(a_id=a_id(0), p_index=p.idx, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, + acts.append(Activity(a_id=a_id(0), p_id=p.idx, n=0, type=Purpose.HOME, duration=1440, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, leg_dep_district="999", leg_arr_district="999", leg_departure=0, start_time=0)) else: acts.append( - Activity(a_id=a_id(0), p_index=p.idx, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, + Activity(a_id=a_id(0), p_id=p.idx, n=0, type=trips.iloc[0].sd_group.source(), duration=trips.iloc[0].departure, leg_dist=0, leg_duration=0, leg_mode=TripMode.OTHER, leg_dep_district=trips.iloc[0].dep_district, leg_arr_district=trips.iloc[0].dep_district, leg_departure=0, start_time=0)) for i in range(len(trips) - 1): @@ -182,21 +182,25 @@ def a_id(t_i): if duration < 0 or t0.gis_length < 0: valid = False - acts.append(Activity(a_id=a_id(i + 1), p_index=p.idx, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, - leg_duration=t0.duration, leg_mode=t0.main_mode, leg_dep_district=t0.dep_district, leg_arr_district=t0.arr_district, leg_departure=t0.departure, start_time=t0.arrival)) + t0_arrival = t0.departure + t0.duration + + acts.append(Activity(a_id=a_id(i + 1), p_id=p.idx, n=i + 1, type=t0.purpose, duration=duration, leg_dist=t0.gis_length, + leg_duration=t0.duration, leg_mode=t0.main_mode, leg_dep_district=t0.dep_district, leg_arr_district=t0.arr_district, leg_departure=t0.departure, start_time=t0_arrival)) if len(trips) > 1: i += 1 # last trip tl = trips.iloc[i] + tl_arrival = tl.departure + tl.duration + if tl.gis_length < 0: valid = False # Duration is set to rest of day acts.append( - Activity(a_id=a_id(i + 1), p_index=p.idx, n=i + 1, type=tl.purpose, duration=1440 - tl.arrival, leg_dist=tl.gis_length, - leg_duration=tl.duration, leg_mode=tl.main_mode, leg_dep_district=tl.dep_district, leg_arr_district=tl.arr_district, leg_departure=tl.departure, start_time=tl.arrival)) + Activity(a_id=a_id(i + 1), p_id=p.idx, n=i + 1, type=tl.purpose, duration=1440 - tl_arrival, leg_dist=tl.gis_length, + leg_duration=tl.duration, leg_mode=tl.main_mode, leg_dep_district=tl.dep_district, leg_arr_district=tl.arr_district, leg_departure=tl.departure, start_time=tl_arrival)) if valid: res.extend(acts) From 4174380b0f3483593303f73c7e987956be4b4914 Mon Sep 17 00:00:00 2001 From: simei94 Date: Thu, 1 Feb 2024 16:13:13 -0600 Subject: [PATCH 13/16] delete ride from trip mode determination --- matsim/scenariogen/data/formats/eodmx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index 719efd1..2667253 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -347,7 +347,7 @@ def mode_from_var_name(varName): elif varName == "p5_14_14": return TripMode.WALK elif varName == "p5_14_18" or varName == "p5_14_19": - return TripMode.RIDE + return TripMode.OTHER return TripMode.OTHER @staticmethod From d093741cd23964e6368f6563fc1a557e1b8f4819 Mon Sep 17 00:00:00 2001 From: simei94 Date: Fri, 2 Feb 2024 12:22:35 -0600 Subject: [PATCH 14/16] bugfix --- matsim/scenariogen/data/formats/eodmx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index 2667253..b856289 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -191,9 +191,9 @@ def economic_status(status): if status == 1: return EconomicStatus.LOW elif status == 2: - return EconomicStatus.MEDIUMLOW + return EconomicStatus.MEDIUM_LOW elif status == 3: - return EconomicStatus.MEDIUMHIGH + return EconomicStatus.MEDIUM_HIGH elif status == 4: return EconomicStatus.HIGH From f2d1d52b82ccdce146a3ad5692118ebd8893227b Mon Sep 17 00:00:00 2001 From: simei94 Date: Fri, 2 Feb 2024 16:10:18 -0600 Subject: [PATCH 15/16] change trip purpose transport to accomp --- matsim/scenariogen/data/__init__.py | 1 + matsim/scenariogen/data/formats/eodmx.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 627c6ef..3f1b1ae 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -181,6 +181,7 @@ class Purpose(AutoNameLowerStrEnum): HOME = auto() WAYBACK = auto() OTHER = auto() + ACCOMP_OTHER = auto() class TripMode(AutoNameLowerStrEnum): diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index b856289..8356440 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -364,7 +364,7 @@ def trip_purpose(x): elif x == 5: return Purpose.LEISURE elif x == 6: - return Purpose.TRANSPORT + return Purpose.ACCOMP_OTHER elif x == 7 or x == 8: return Purpose.PERSONAL_BUSINESS elif x == 9: From 925950edcaea69dd2682f7ec515a362fac97368e Mon Sep 17 00:00:00 2001 From: simei94 Date: Fri, 9 Feb 2024 13:55:01 -0600 Subject: [PATCH 16/16] fix deleted arrival --- matsim/scenariogen/data/formats/eodmx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matsim/scenariogen/data/formats/eodmx.py b/matsim/scenariogen/data/formats/eodmx.py index 8356440..fd2a4f4 100644 --- a/matsim/scenariogen/data/formats/eodmx.py +++ b/matsim/scenariogen/data/formats/eodmx.py @@ -124,8 +124,7 @@ def convert(data: tuple, regio=None): # Trip is valid if length and duration are present valid=EOD2017.trip_valid(str(pint(t.dto_origen)).zfill(3), str(pint(t.dto_dest)).zfill(3), duration), dep_district=str(int(t.dto_origen)).zfill(3), - arr_district=str(int(t.dto_dest)).zfill(3), - arrival=arrival + arr_district=str(int(t.dto_dest)).zfill(3) ) )