Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor ml code + Facility package in scenariogen #29

Merged
merged 13 commits into from
Jun 18, 2024
12 changes: 11 additions & 1 deletion matsim/scenariogen/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
__all__ = ["read_all", "ParkingPosition", "HouseholdType", "EconomicStatus", "Gender", "Employment", "Availability",
"Purpose",
"TripMode", "DistanceGroup", "DurationGroup", "SourceDestinationGroup",
"Household", "Person", "Trip", "Activity"]
"Household", "Person", "Trip", "Activity", "Visitations"]

import os
from dataclasses import dataclass
Expand Down Expand Up @@ -119,6 +119,7 @@ def sort_idx(cls, series):
v = list(cls)
return series.map(v.index)


def _df_to_categorical(df, clazz):
""" Convert columns to categorical types """

Expand Down Expand Up @@ -378,3 +379,12 @@ class Activity:
leg_mode: TripMode
location: str = pd.NA
zone: str = pd.NA


@dataclass
class Visitations:
""" Aggregated visitation information """
location: str
n: int
purpose: Purpose = pd.NA
time: pd.Timestamp = pd.NA
52 changes: 52 additions & 0 deletions matsim/scenariogen/data/formats/netcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-

import os

import pandas as pd

from .. import *


def read_visitations(folder):
""" Read all visits from folder """

visits = []

for f in os.listdir(folder):

if not f.endswith(".csv"):
continue

print("Reading", f)

t = pd.read_csv(os.path.join(folder, f), parse_dates=[0])
t.timestamps = t.timestamps.str.split(",")
t["idx"] = t.index

t = t.explode("timestamps")
t.timestamps = pd.to_datetime(t.timestamps, format="%H:%M:%S")
delta = (t.timestamps.dt.hour * 60 + t.timestamps.dt.minute) * 60 + t.timestamps.dt.second
delta = pd.to_timedelta(delta, unit="s")

t["ts"] = t.day + delta

t = t.groupby(["device_id", "osm_id", "idx"]).agg(start=("ts", "min"), end=("ts", "max"),
home=("distance_to_home", "mean"),
work=("distance_to_work", "mean")).reset_index()

t["purpose"] = Purpose.OTHER
t.loc[t.home == 0, "purpose"] = Purpose.HOME
t.loc[t.work == 0, "purpose"] = Purpose.WORK

visits.append(t)

df = pd.concat(visits)

aggr = df.groupby(["osm_id", "purpose"]).agg(count=("idx", "count")).reset_index()

total = []

for v in aggr.itertuples():
total.append(Visitations(v.osm_id, v.count, v.purpose))

return pd.DataFrame(total)
Empty file.
5 changes: 5 additions & 0 deletions matsim/scenariogen/ml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-

__all__ = ['MLRegressor']

from .train import MLRegressor
Loading
Loading