Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
spoonerf committed May 28, 2024
1 parent f51b077 commit e50da5d
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ definitions:
presentation:
topic_tags:
- Mental Health
processing_level: minor
processing_level: major
sex: |-
<% if sex == "Both sexes" %> individuals <% elif sex == "Male" %> male <% elif sex == "Female" %> female <%- endif -%>
value:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ definitions:
presentation:
topic_tags:
- Global Health
processing_level: minor
processing_level: major

value_prevalence:
unit: |-
<% if metric == "Number" %>
cases
<% elif metric == "Rate" %>
cases per 100,000 people
<% elif metric == "Share" %>
cases per 100 people
<%- endif -%>
title: |-
<% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
Expand All @@ -26,6 +28,10 @@ definitions:
Age-standardized current cases of << cause.lower() >> per 100,000 people
<% elif metric == "Rate" and age == 'All ages'%>
Total current cases of << cause.lower() >> per 100,000 people
<% elif metric == "Share" and age == 'Age-standardized'%>
Age-standardized current cases of << cause.lower() >> per 100 people
<% elif metric == "Share" and age == 'All ages'%>
Total current cases of << cause.lower() >> per 100 people
<%- endif -%>
description_short: |-
<% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
Expand All @@ -40,13 +46,21 @@ definitions:
The estimated number of age-standardized current cases of << cause.lower() >>, per 100,000 people.
<% elif metric == "Rate" and age == 'All ages'%>
The estimated number of current cases of << cause.lower() >>, per 100,000 people.
<% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
The estimated number of current cases of << cause.lower() >> in those aged << age >>, per 100 people.
<% elif metric == "Share" and age == 'Age-standardized'%>
The estimated number of age-standardized current cases of << cause.lower() >>, per 100 people.
<% elif metric == "Share" and age == 'All ages'%>
The estimated number of current cases of << cause.lower() >>, per 100 people.
<%- endif -%>
value_incidence:
unit: |-
<% if metric == "Number" %>
new cases
<% elif metric == "Rate" %>
new cases per 100,000 people
<% elif metric == "Share" %>
new cases per 100 people
<%- endif -%>
title: |-
<% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
Expand All @@ -61,6 +75,12 @@ definitions:
Age-standardized new cases of << cause.lower() >> per 100,000 people
<% elif metric == "Rate" and age == 'All ages'%>
New cases of << cause.lower() >> per 100,000 people
<% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
New cases of << cause.lower() >>, among individuals aged << age >> per 100 people
<% elif metric == "Share" and age == 'Age-standardized'%>
Age-standardized new cases of << cause.lower() >> per 100 people
<% elif metric == "Share" and age == 'All ages'%>
New cases of << cause.lower() >> per 100 people
<%- endif -%>
description_short: |-
<% if metric == "Number" and (age != 'Age-standardized' and age != 'All ages')%>
Expand All @@ -75,6 +95,12 @@ definitions:
The estimated number of age-standardized new cases of << cause.lower() >>, per 100,000 people.
<% elif metric == "Rate" and age == 'All ages'%>
The estimated number of new cases of << cause.lower() >>, per 100,000 people.
<% elif metric == "Share" and (age != 'Age-standardized' and age != 'All ages')%>
The estimated number of new cases of << cause.lower() >> in those aged << age >>, per 100 people.
<% elif metric == "Share" and age == 'Age-standardized'%>
The estimated number of age-standardized new cases of << cause.lower() >>, per 100 people.
<% elif metric == "Share" and age == 'All ages'%>
The estimated number of new cases of << cause.lower() >>, per 100 people.
<%- endif -%>
footnote: |-
<% if age == "Age-standardized" %>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<%- endif -%>
Expand Down
41 changes: 40 additions & 1 deletion etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Table
from owid.catalog import processing as pr
from shared import add_regional_aggregates

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)
REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania"]
AGE_GROUPS_RANGES = {
"All ages": [0, None],
"<5 years": [0, 4],
"5-14 years": [5, 14],
"15-49 years": [15, 49],
"50-69 years": [50, 69],
"70+ years": [70, None],
}


def run(dest_dir: str) -> None:
Expand All @@ -16,11 +29,25 @@ def run(dest_dir: str) -> None:

# Read table from meadow dataset.
tb = ds_meadow["gbd_prevalence"].reset_index()
# Load regions dataset.
ds_regions = paths.load_dataset("regions")

#
# Process data.
#
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path)

tb = add_regional_aggregates(
tb,
ds_regions,
index_cols=["country", "year", "metric", "measure", "cause", "age"],
regions=REGIONS,
age_group_mapping=AGE_GROUPS_RANGES,
)

# Add a share of the population column
tb = add_share_population(tb)

# Split into two tables: one for prevalence, one for incidence
tb_prevalence = tb[tb["measure"] == "Prevalence"].copy()
tb_incidence = tb[tb["measure"] == "Incidence"].copy()
Expand All @@ -33,7 +60,6 @@ def run(dest_dir: str) -> None:
tb_prevalence = tb_prevalence.format(["country", "year", "metric", "age", "cause"], short_name="gbd_prevalence")
tb_incidence = tb_incidence.format(["country", "year", "metric", "age", "cause"], short_name="gbd_incidence")

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
Expand All @@ -46,3 +72,16 @@ def run(dest_dir: str) -> None:

# Save changes in the new garden dataset.
ds_garden.save()


def add_share_population(tb: Table) -> Table:
"""
Add a share of the population column to the table.
The 'Rate' column is the number of cases per 100,000 people, we want the equivalent per 100 people.
"""
tb_share = tb[tb["metric"] == "Rate"].copy()
tb_share["metric"] = "Share"
tb_share["value"] = tb_share["value"] / 1000

tb = pr.concat([tb, tb_share], ignore_index=True)
return tb
6 changes: 2 additions & 4 deletions etl/steps/data/garden/ihme_gbd/2024-05-20/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,12 @@ def add_regional_aggregates(
].copy()

# Calculate rates per 100,000 for regions
tb_rate_regions["value"] = tb_number["value"] / tb_number["population"] * 100_000
tb_rate_regions["value"] = (tb_rate_regions["value"] / tb_rate_regions["population"]) * 100000
tb_rate_regions["metric"] = "Rate"

tb_rate = pr.concat([tb_rate, tb_rate_regions], ignore_index=True)
tb_rate = tb_rate.drop(columns="population")

tb_out = pr.concat(
[tb_number_percent, tb_rate],
ignore_index=True,
)
tb_out = tb_out.drop(columns="population")
return tb_out

0 comments on commit e50da5d

Please sign in to comment.