fix

owid · May 28, 2024 · e50da5d · e50da5d
1 parent f51b077
commit e50da5d
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 7 deletions.
diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_mental_health.meta.yml b/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_mental_health.meta.yml
@@ -4,7 +4,7 @@ definitions:
     presentation:
       topic_tags:
         - Mental Health
-    processing_level: minor
+    processing_level: major
   sex: |-
     <% if sex == "Both sexes" %> individuals <% elif sex == "Male" %> male <% elif sex == "Female" %> female <%- endif -%>
   value:

diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.meta.yml b/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.meta.yml
@@ -4,14 +4,16 @@ definitions:
     presentation:
       topic_tags:
         - Global Health
-    processing_level: minor
+    processing_level: major
 
   value_prevalence:
     unit: |-
       <% if metric == "Number" %>
       cases
       <% elif metric == "Rate" %>
       cases per 100,000 people
+      <% elif metric == "Share" %>
+      cases per 100 people
       <%- endif -%>
     title: |-
       <% if metric == "Number"  and (age != 'Age-standardized' and age != 'All ages')%>
@@ -26,6 +28,10 @@ definitions:
       Age-standardized current cases of << cause.lower() >> per 100,000 people
       <% elif metric == "Rate" and age == 'All ages'%>
       Total current cases of << cause.lower() >> per 100,000 people
+      <% elif metric == "Share" and age == 'Age-standardized'%>
+      Age-standardized current cases of << cause.lower() >> per 100 people
+      <% elif metric == "Share" and age == 'All ages'%>
+      Total current cases of << cause.lower() >> per 100 people
       <%- endif -%>
     description_short: |-
       <% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
@@ -40,13 +46,21 @@ definitions:
       The estimated number of age-standardized current cases of << cause.lower() >>, per 100,000 people.
       <% elif metric == "Rate" and age == 'All ages'%>
       The estimated number of current cases of << cause.lower() >>, per 100,000 people.
+      <% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
+      The estimated number of current cases of << cause.lower() >> in those aged << age >>, per 100 people.
+      <% elif metric == "Share" and age == 'Age-standardized'%>
+      The estimated number of age-standardized current cases of << cause.lower() >>, per 100 people.
+      <% elif metric == "Share" and age == 'All ages'%>
+      The estimated number of current cases of << cause.lower() >>, per 100 people.
       <%- endif -%>
   value_incidence:
     unit: |-
       <% if metric == "Number" %>
       new cases
       <% elif metric == "Rate" %>
       new cases per 100,000 people
+      <% elif metric == "Share" %>
+      new cases per 100 people
       <%- endif -%>
     title: |-
       <% if metric == "Number"  and (age != 'Age-standardized' and age != 'All ages')%>
@@ -61,6 +75,12 @@ definitions:
       Age-standardized new cases of << cause.lower() >> per 100,000 people
       <% elif metric == "Rate" and age == 'All ages'%>
       New cases of << cause.lower() >> per 100,000 people
+      <% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
+      New cases of << cause.lower() >>, among individuals aged << age >> per 100 people
+      <% elif metric == "Share" and age == 'Age-standardized'%>
+      Age-standardized new cases of << cause.lower() >> per 100 people
+      <% elif metric == "Share" and age == 'All ages'%>
+      New cases of << cause.lower() >> per 100 people
       <%- endif -%>
     description_short: |-
       <% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
@@ -75,6 +95,12 @@ definitions:
       The estimated number of age-standardized new cases of << cause.lower() >>, per 100,000 people.
       <% elif metric == "Rate" and age == 'All ages'%>
       The estimated number of new cases of << cause.lower() >>, per 100,000 people.
+      <% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
+      The estimated number of new cases of << cause.lower() >> in those aged << age >>, per 100 people.
+      <% elif metric == "Share" and age == 'Age-standardized'%>
+      The estimated number of age-standardized new cases of << cause.lower() >>, per 100 people.
+      <% elif metric == "Share" and age == 'All ages'%>
+      The estimated number of new cases of << cause.lower() >>, per 100 people.
       <%- endif -%>
   footnote: |-
     <% if age == "Age-standardized" %>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<%- endif -%>

diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.py b/etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.py
@@ -1,10 +1,23 @@
 """Load a meadow dataset and create a garden dataset."""
 
+from owid.catalog import Table
+from owid.catalog import processing as pr
+from shared import add_regional_aggregates
+
 from etl.data_helpers import geo
 from etl.helpers import PathFinder, create_dataset
 
 # Get paths and naming conventions for current step.
 paths = PathFinder(__file__)
+REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania"]
+AGE_GROUPS_RANGES = {
+    "All ages": [0, None],
+    "<5 years": [0, 4],
+    "5-14 years": [5, 14],
+    "15-49 years": [15, 49],
+    "50-69 years": [50, 69],
+    "70+ years": [70, None],
+}
 
 
 def run(dest_dir: str) -> None:
@@ -16,11 +29,25 @@ def run(dest_dir: str) -> None:
 
     # Read table from meadow dataset.
     tb = ds_meadow["gbd_prevalence"].reset_index()
+    # Load regions dataset.
+    ds_regions = paths.load_dataset("regions")
 
     #
     # Process data.
     #
     tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path)
+
+    tb = add_regional_aggregates(
+        tb,
+        ds_regions,
+        index_cols=["country", "year", "metric", "measure", "cause", "age"],
+        regions=REGIONS,
+        age_group_mapping=AGE_GROUPS_RANGES,
+    )
+
+    # Add a share of the population column
+    tb = add_share_population(tb)
+
     # Split into two tables: one for prevalence, one for incidence
     tb_prevalence = tb[tb["measure"] == "Prevalence"].copy()
     tb_incidence = tb[tb["measure"] == "Incidence"].copy()
@@ -33,7 +60,6 @@ def run(dest_dir: str) -> None:
     tb_prevalence = tb_prevalence.format(["country", "year", "metric", "age", "cause"], short_name="gbd_prevalence")
     tb_incidence = tb_incidence.format(["country", "year", "metric", "age", "cause"], short_name="gbd_incidence")
 
-    #
     # Save outputs.
     #
     # Create a new garden dataset with the same metadata as the meadow dataset.
@@ -46,3 +72,16 @@ def run(dest_dir: str) -> None:
 
     # Save changes in the new garden dataset.
     ds_garden.save()
+
+
+def add_share_population(tb: Table) -> Table:
+    """
+    Add a share of the population column to the table.
+    The 'Rate' column is the number of cases per 100,000 people, we want the equivalent per 100 people.
+    """
+    tb_share = tb[tb["metric"] == "Rate"].copy()
+    tb_share["metric"] = "Share"
+    tb_share["value"] = tb_share["value"] / 1000
+
+    tb = pr.concat([tb, tb_share], ignore_index=True)
+    return tb
diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/shared.py b/etl/steps/data/garden/ihme_gbd/2024-05-20/shared.py
@@ -42,14 +42,12 @@ def add_regional_aggregates(
     ].copy()
 
     # Calculate rates per 100,000 for regions
-    tb_rate_regions["value"] = tb_number["value"] / tb_number["population"] * 100_000
+    tb_rate_regions["value"] = (tb_rate_regions["value"] / tb_rate_regions["population"]) * 100000
     tb_rate_regions["metric"] = "Rate"
-
     tb_rate = pr.concat([tb_rate, tb_rate_regions], ignore_index=True)
-    tb_rate = tb_rate.drop(columns="population")
-
     tb_out = pr.concat(
         [tb_number_percent, tb_rate],
         ignore_index=True,
     )
+    tb_out = tb_out.drop(columns="population")
     return tb_out