write grouped shares in create ref data

matsim-vsp · Dec 28, 2023 · e0af1fe · e0af1fe
1 parent 1b35bb7
commit e0af1fe
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 17 deletions.
diff --git a/matsim/scenariogen/data/preparation.py b/matsim/scenariogen/data/preparation.py
@@ -22,17 +22,17 @@ def prepare_persons(hh, pp, tt, augment=5, max_hh_size=5, core_weekday=False, re
 
     # set car avail
     df.loc[df.age < 17, "driving_license"] = Availability.NO
-    _fill(df, "driving_license", Availability.UNKNOWN)
+    fill(df, "driving_license", Availability.UNKNOWN)
 
     df["car_avail"] = (df.n_cars > 0) & (df.driving_license == Availability.YES)
     df["bike_avail"] = (df.n_bikes > 0) | (df.bike_avail == Availability.YES)
 
     # small children don't have pt abo
     df.loc[df.age < 6, "pt_abo_avail"] = Availability.NO
-    _fill(df, "pt_abo_avail", Availability.UNKNOWN)
+    fill(df, "pt_abo_avail", Availability.UNKNOWN)
 
     # Replace unknown income group
-    _fill(df, "economic_status", EconomicStatus.UNKNOWN)
+    fill(df, "economic_status", EconomicStatus.UNKNOWN)
 
     # Large households are underrepresented and capped
     df.n_persons = np.minimum(df.n_persons, max_hh_size)
@@ -121,7 +121,7 @@ def prepare_trips(pp, trips, core_weekday=True):
     return df[df.columns[::-1]]
 
 
-def _fill(df, col, val=None):
+def fill(df, col, val=None):
     """ Fill null values with dist of the rest (or replace val)"""
     if val is not None:
         df.loc[df[col] == val, col] = None

diff --git a/matsim/scenariogen/data/run_create_ref_data.py b/matsim/scenariogen/data/run_create_ref_data.py
@@ -98,7 +98,7 @@ def default_person_filter(df):
 def create(survey_dirs, transform_persons, transform_trips,
            invalid_trip_handling: InvalidHandling = InvalidHandling.REMOVE_TRIPS,
            dist_groups=[0, 1000, 2000, 5000, 10000, 20000, np.inf],
-           ref_groups: List[Union[str, Tuple[str]]] = None,
+           ref_groups: List[str] = None,
            output_prefix="") -> AggregationResult:
     """ Create reference data from survey data.
     :param survey_dirs: Directories with survey data
@@ -161,31 +161,38 @@ def create(survey_dirs, transform_persons, transform_trips,
     groups = None
     if ref_groups:
 
-        groups = []
+        overall = share.groupby("main_mode").sum().reset_index()
 
-        for g in ref_groups:
+        groups = [overall]
 
-            if type(g) is str:
-                g = [g]
+        for g in ref_groups:
 
-            for x in g:
-                if x not in persons.columns:
-                    raise ValueError("Column %s not found in persons" % x)
+            if g not in persons.columns:
+                raise ValueError("Column %s not found in persons" % g)
 
-            aggr = trips.groupby(g + ["main_mode"]).apply(weighted)
+            aggr = trips.groupby([g] + ["main_mode"]).apply(weighted)
             aggr["share"] = aggr.n / aggr.n.sum()
             aggr["share"].fillna(0, inplace=True)
             aggr.drop(columns=["n"], inplace=True)
 
-            # todo only works with one subgroup level
             # Normalize per group
-            for group in aggr.index.get_level_values(0).categories:
+            for group in set(aggr.index.get_level_values(0)):
                 sub = aggr.loc[group, :]
                 sub.share /= sub.share.sum()
 
-            groups.append(aggr)
+            groups.append(aggr.reset_index())
+
+        groups = pd.concat(groups, sort=False)
+
+        # Reorder columns
+        groups = groups[ref_groups + ["main_mode", "share"]]
+
+        groups.to_csv(output_prefix + "mode_share_per_group_ref.csv", index=False)
+
+        # TODO: long format, which might be easier to plot
+
+        # TODO groups also by distance group
 
-        groups = pd.concat(groups, axis=1)
 
     return AggregationResult(persons, trips, share.groupby("main_mode").sum(), groups=groups)