From bbb673598727a5fc5ba56dbd4515c491ed6d1461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Rod=C3=A9s-Guirao?= Date: Tue, 3 Dec 2024 21:34:40 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20births=20per=20delivery=20(#3686?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📊 births per delivery * add ratio children/delivery * add rate --- .../2024-11-26/multiple_births.meta.yml | 5 +++++ .../demography/2024-11-26/multiple_births.py | 20 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml b/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml index 0647cada0c6..207df2e06ee 100644 --- a/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml +++ b/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml @@ -89,6 +89,11 @@ tables: description_short: |- The rate of deliveries that are multiple deliveries, per 1,000 deliveries. children_delivery_ratio: + title: "Children per delivery" + unit: "children per 1,000 deliveries" + description_short: |- + The average number of children born per delivery. This is estimated by dividing the number of children born by the total number deliveries, per 1,000 people. + children_multiple_delivery_ratio: title: "Children per multiple delivery" unit: "children per 1,000 multiple deliveries" description_short: |- diff --git a/etl/steps/data/garden/demography/2024-11-26/multiple_births.py b/etl/steps/data/garden/demography/2024-11-26/multiple_births.py index 107fcecedeb..eafd3129e47 100644 --- a/etl/steps/data/garden/demography/2024-11-26/multiple_births.py +++ b/etl/steps/data/garden/demography/2024-11-26/multiple_births.py @@ -101,17 +101,24 @@ def run(dest_dir: str) -> None: # Estimate singleton_rate tb["singleton_rate"] = (1_000 * tb["singletons"] / tb["total_deliveries"]).round(2) - # Estimate children_per_delivery - tb["children_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3) + # Estimate ratios + tb["children_delivery_ratio"] = ( + 1_000 * (tb["multiple_children"] + tb["singletons"]) / tb["total_deliveries"] + ).round(3) + tb["children_multiple_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3) tb["multiple_to_singleton_ratio"] = (1_000 * tb["multiple_deliveries"] / tb["singletons"]).round(3) # Remove outliers flag = (tb["country"] == "England and Wales") & (tb["year"] == 1938) - assert (tb.loc[flag, "children_delivery_ratio"] >= 4000).all(), "Unexpected outlier for England and Wales in 1938" - tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA + assert ( + tb.loc[flag, "children_multiple_delivery_ratio"] >= 4000 + ).all(), "Unexpected outlier for England and Wales in 1938" + tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio", "children_delivery_ratio"]] = pd.NA flag = (tb["country"] == "England and Wales") & (tb["year"] == 1939) - assert (tb.loc[flag, "children_delivery_ratio"] <= 1500).all(), "Unexpected outlier for England and Wales in 1938" - tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA + assert ( + tb.loc[flag, "children_multiple_delivery_ratio"] <= 1500 + ).all(), "Unexpected outlier for England and Wales in 1938" + tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio", "children_delivery_ratio"]] = pd.NA # Keep relevant columns tb = tb[ @@ -130,6 +137,7 @@ def run(dest_dir: str) -> None: "multiple_rate", # Ratios "children_delivery_ratio", + "children_multiple_delivery_ratio", "multiple_to_singleton_ratio", # Births "multiple_children",