From c4eae16d6f49bad536315bff0601c3d325d39c88 Mon Sep 17 00:00:00 2001
From: lucasrodes <lucasrodes@users.noreply.github.com>
Date: Wed, 11 Dec 2024 21:11:43 +0100
Subject: [PATCH] fix

---
 .../data/garden/demography/2024-12-06/shared.py    | 14 ++++++++++----
 .../2024-12-06/wittgenstein_human_capital_proj.py  |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/etl/steps/data/garden/demography/2024-12-06/shared.py b/etl/steps/data/garden/demography/2024-12-06/shared.py
index d09d04337df..e3ec4505cde 100644
--- a/etl/steps/data/garden/demography/2024-12-06/shared.py
+++ b/etl/steps/data/garden/demography/2024-12-06/shared.py
@@ -135,15 +135,21 @@ def add_dim_some_education(tb):
 
 def add_dim_15plus(tb):
     # Pivot table to have two columns: "0-14" and "total"
-    tb_adults = tb.loc[tb["age"].isin(["0-14", "total"])]
+    tb_adults = tb.loc[tb["age"].isin(["0-4", "5-9", "10-14", "total"]) & (tb["education"] != "total")]
     cols_index = ["country", "scenario", "sex", "education", "year"]
     tb_adults = tb_adults.pivot(index=cols_index, columns="age", values="pop").reset_index()
-    # Fill with zero NAs of agr group "0-14". NAs mostly come from 'doesn't apply' (e.g. primary education for 0-14)
-    tb_adults["0-14"] = tb_adults["0-14"].fillna(0)
     # Only estimate values for adults when "total" is not NA
     tb_adults = tb_adults.dropna(subset=["total"])
     # Estimate adults as "0-14" - 15+
-    tb_adults["15+"] = tb_adults["total"] - tb_adults["0-14"].fillna(0)
+    # Fill with zero NAs of agr group "0-14". NAs mostly come from 'doesn't apply' (e.g. primary education for 0-14)
+    tb_adults["15+"] = (
+        tb_adults["total"] - tb_adults["0-4"].fillna(0) - tb_adults["5-9"].fillna(0) - tb_adults["10-14"].fillna(0)
+    )
+    # Drop columns
+    tb_adults = tb_adults.drop(columns=["0-4", "5-9", "10-14", "total"])
+    # Replace negative values for zero
+    flag = tb_adults["15+"] < 0
+    tb_adults.loc[flag, "15+"] = 0
     # Shape table
     tb_adults = tb_adults.melt(id_vars=cols_index, value_name="pop", var_name="age")
     # Concatenate with original table
diff --git a/etl/steps/data/garden/demography/2024-12-06/wittgenstein_human_capital_proj.py b/etl/steps/data/garden/demography/2024-12-06/wittgenstein_human_capital_proj.py
index f5f51a679ce..f94a5b6980b 100644
--- a/etl/steps/data/garden/demography/2024-12-06/wittgenstein_human_capital_proj.py
+++ b/etl/steps/data/garden/demography/2024-12-06/wittgenstein_human_capital_proj.py
@@ -2,7 +2,7 @@
 
 from etl.helpers import PathFinder, create_dataset
 
-from .shared import add_dim_15plus, add_dim_some_education, add_prop, make_table
+from shared import add_dim_15plus, add_dim_some_education, add_prop, make_table
 
 # Get paths and naming conventions for current step.
 paths = PathFinder(__file__)