Skip to content

Commit

Permalink
sort variables when grouping
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Mar 25, 2024
1 parent 8746ce0 commit ca79152
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
32 changes: 30 additions & 2 deletions matsim/scenariogen/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
from dataclasses import dataclass
from enum import Enum, auto
from typing import List, Union, Tuple
from typing import List, Union, Tuple, get_type_hints

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -71,16 +71,25 @@ def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame, pd.

hh = pd.concat(hh, axis=0)
hh = hh[~hh.index.duplicated(keep='first')]
hh = hh.dropna(axis=1, how='all')
# _df_to_categorical(hh, Household)

print("Households: ", len(hh))

pp = pd.concat(pp, axis=0)
pp = pp[~pp.index.duplicated(keep='first')]
pp = pp.dropna(axis=1, how='all')
pp.sort_index(inplace=True)
# _df_to_categorical(pp, Person)

print("Persons: ", len(pp))

tt = pd.concat(tt, axis=0)
tt = tt[~tt.index.duplicated(keep='first')]
tt = tt.dropna(axis=1, how='all')
tt.sort_values(["p_id", "n"], inplace=True)
# _df_to_categorical(tt, Trip)

print("Trips: ", len(tt))

return hh, pp, tt
Expand All @@ -98,7 +107,26 @@ def _generate_next_value_(name, start, count, last_values):


class AutoNameLowerStrEnum(AutoNameLower):
pass

@classmethod
def dtype(cls):
"""Returns a pandas CategoricalDtype with the enum values as categories."""
return pd.api.types.CategoricalDtype(categories=list(cls), ordered=True)

@classmethod
def sort_idx(cls, series):
""" Return index needed for sorting"""
v = list(cls)
return series.map(v.index)

def _df_to_categorical(df, clazz):
""" Convert columns to categorical types """

for k, v in get_type_hints(clazz).items():
if hasattr(v, "dtype") and k in df.columns:
df[k] = df[k].astype(v.dtype())

return df


class ParkingPosition(AutoNameLowerStrEnum):
Expand Down
4 changes: 2 additions & 2 deletions matsim/scenariogen/data/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ def prepare_trips(pp, trips, core_weekday=True):
return df[df.columns[::-1]]


def fill(df, col, val=None):
def fill(df, col, val=None, random_state=0):
""" Fill null values with dist of the rest (or replace val)"""
if val is not None:
df.loc[df[col] == val, col] = None

isnull = df[col].isnull()
sample = df[col].dropna().sample(isnull.sum(), replace=True).values
sample = df[col].dropna().sample(isnull.sum(), random_state=random_state, replace=True).values
df.loc[isnull, col] = sample


Expand Down
4 changes: 4 additions & 0 deletions matsim/scenariogen/data/run_create_ref_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def grouped_share(df, groups, normalize=True):
sub = aggr.loc[group, :]
sub.share /= sub.share.sum()

# Sort groups that use enum
if aggr.index.dtypes[0] == 'object' and hasattr(aggr.index[0][0], "sort_idx"):
aggr.sort_index(level=0, key=aggr.index[0][0].sort_idx, inplace=True)

return aggr


Expand Down

0 comments on commit ca79152

Please sign in to comment.