diff --git a/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml b/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml index 0647cada0c6..207df2e06ee 100644 --- a/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml +++ b/etl/steps/data/garden/demography/2024-11-26/multiple_births.meta.yml @@ -89,6 +89,11 @@ tables: description_short: |- The rate of deliveries that are multiple deliveries, per 1,000 deliveries. children_delivery_ratio: + title: "Children per delivery" + unit: "children per 1,000 deliveries" + description_short: |- + The average number of children born per delivery. This is estimated by dividing the number of children born by the total number deliveries, per 1,000 people. + children_multiple_delivery_ratio: title: "Children per multiple delivery" unit: "children per 1,000 multiple deliveries" description_short: |- diff --git a/etl/steps/data/garden/demography/2024-11-26/multiple_births.py b/etl/steps/data/garden/demography/2024-11-26/multiple_births.py index 107fcecedeb..f482ce66f46 100644 --- a/etl/steps/data/garden/demography/2024-11-26/multiple_births.py +++ b/etl/steps/data/garden/demography/2024-11-26/multiple_births.py @@ -101,17 +101,24 @@ def run(dest_dir: str) -> None: # Estimate singleton_rate tb["singleton_rate"] = (1_000 * tb["singletons"] / tb["total_deliveries"]).round(2) - # Estimate children_per_delivery - tb["children_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3) + # Estimate ratios + tb["children_delivery_ratio"] = ( + 1_000 * (tb["multiple_children"] + tb["singletons"]) / tb["multiple_deliveries"] + ).round(3) + tb["children_multiple_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3) tb["multiple_to_singleton_ratio"] = (1_000 * tb["multiple_deliveries"] / tb["singletons"]).round(3) # Remove outliers flag = (tb["country"] == "England and Wales") & (tb["year"] == 1938) - assert (tb.loc[flag, "children_delivery_ratio"] >= 4000).all(), "Unexpected outlier for England and Wales in 1938" - tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA + assert ( + tb.loc[flag, "children_multiple_delivery_ratio"] >= 4000 + ).all(), "Unexpected outlier for England and Wales in 1938" + tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio"]] = pd.NA flag = (tb["country"] == "England and Wales") & (tb["year"] == 1939) - assert (tb.loc[flag, "children_delivery_ratio"] <= 1500).all(), "Unexpected outlier for England and Wales in 1938" - tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA + assert ( + tb.loc[flag, "children_multiple_delivery_ratio"] <= 1500 + ).all(), "Unexpected outlier for England and Wales in 1938" + tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio"]] = pd.NA # Keep relevant columns tb = tb[ @@ -130,6 +137,7 @@ def run(dest_dir: str) -> None: "multiple_rate", # Ratios "children_delivery_ratio", + "children_multiple_delivery_ratio", "multiple_to_singleton_ratio", # Births "multiple_children",