eqasim-org · sebhoerl · Nov 15, 2024
diff --git a/analysis/bootstrapping.py b/analysis/bootstrapping.py
@@ -2,10 +2,12 @@
 import copy
 import analysis.statistics
 
+
 def get_seeds(number_of_seeds):
     return np.arange(1, number_of_seeds + 1) * 1000
 
-def configure(context, stage, sample_size, parameters = {}, alias = None, ephemeral = True):
+
+def configure(context, stage, sample_size, parameters={}, alias=None, ephemeral=True):
     if alias is None:
         alias = stage
 
@@ -15,11 +17,18 @@ def configure(context, stage, sample_size, parameters = {}, alias = None, epheme
         sample_parameters = copy.copy(parameters)
         sample_parameters["random_seed"] = int(random_seed)
 
-        context.stage(stage, sample_parameters, alias = "bootstrap_%s_%d" % (alias, index), ephemeral = ephemeral)
+        context.stage(
+            stage,
+            sample_parameters,
+            alias="bootstrap_%s_%d" % (alias, index),
+            ephemeral=ephemeral,
+        )
+
 
 def get_stage(context, alias, index):
     return context.stage("bootstrap_%s_%d" % (alias, index))
 
+
 def get_stages(context, alias, sample_size):
     for index in range(sample_size):
         yield get_stage(context, alias, index)
diff --git a/analysis/chains.py b/analysis/chains.py
@@ -9,22 +9,30 @@
     ("chain", "sex"),
     ("chain_length_class", "age_class"),
     ("chain_length_class", "sex"),
-    ("chain",), ("chain_length_class",),
+    ("chain",),
+    ("chain_length_class",),
     ("age_range", "sex", "chain"),
-    ("age_range", "sex", "chain_length_class")
+    ("age_range", "sex", "chain_length_class"),
 ]
 
 PURPOSE_MAPPING = {
-    "home": "h", "work": "w", "education": "e",
-    "shop": "s", "leisure": "l", "other": "o"
+    "home": "h",
+    "work": "w",
+    "education": "e",
+    "shop": "s",
+    "leisure": "l",
+    "other": "o",
 }
 
+
 def aggregate_chains(df_chains):
     current_person_id = None
     current_chain = None
     records = []
 
-    for person_id, purpose in zip(df_chains["person_id"].values, df_chains["purpose"].values):
+    for person_id, purpose in zip(
+        df_chains["person_id"].values, df_chains["purpose"].values
+    ):
         if not person_id == current_person_id:
             if not current_person_id is None:
                 records.append((current_person_id, current_chain))
@@ -36,11 +44,11 @@ def aggregate_chains(df_chains):
 
     records.append((current_person_id, current_chain))
 
-    df_chains = pd.DataFrame.from_records(records, columns = ["person_id", "chain"])
+    df_chains = pd.DataFrame.from_records(records, columns=["person_id", "chain"])
 
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"w+", "w", x))
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"e+", "e", x))
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"h+", "h", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"w+", "w", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"e+", "e", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"h+", "h", x))
 
     df_chains["chain_length"] = df_chains["chain"].str.len()
 

diff --git a/analysis/debug/sc.py b/analysis/debug/sc.py
@@ -1,11 +1,13 @@
 import numpy as np
 import pandas as pd
 
+
 def configure(context):
-    context.stage("data.census.filtered", alias = "census")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.census.filtered", alias="census")
+    context.stage("data.hts.selected", alias="hts")
     context.config("output_path")
 
+
 def execute(context):
     df_census = context.stage("census")
     df_hts = context.stage("hts")[1]
@@ -19,14 +21,16 @@ def execute(context):
         f_census = df_census["socioprofessional_class"] == value
         f_hts = df_hts["socioprofessional_class"] == value
 
-        df_output.append({
-            "value": value,
-            "census_count": np.count_nonzero(f_census),
-            "hts_count": np.count_nonzero(f_hts),
-            "census_weight": df_census[f_census]["weight"].sum(),
-            "hts_weight": df_hts[f_hts]["person_weight"].sum()
-        })
+        df_output.append(
+            {
+                "value": value,
+                "census_count": np.count_nonzero(f_census),
+                "hts_count": np.count_nonzero(f_hts),
+                "census_weight": df_census[f_census]["weight"].sum(),
+                "hts_weight": df_hts[f_hts]["person_weight"].sum(),
+            }
+        )
 
     pd.DataFrame.from_records(df_output).to_csv(
-        "{}/debug_sc.csv".format(context.config("output_path")),
-        sep = ";", index = False)
+        "{}/debug_sc.csv".format(context.config("output_path")), sep=";", index=False
+    )