Skip to content

Commit

Permalink
📊 births per delivery (#3686)
Browse files Browse the repository at this point in the history
* 📊 births per delivery

* add ratio children/delivery

* add rate
  • Loading branch information
lucasrodes authored Dec 3, 2024
1 parent a7ee8cf commit bbb6735
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ tables:
description_short: |-
The rate of deliveries that are multiple deliveries, per 1,000 deliveries.
children_delivery_ratio:
title: "Children per delivery"
unit: "children per 1,000 deliveries"
description_short: |-
The average number of children born per delivery. This is estimated by dividing the number of children born by the total number deliveries, per 1,000 people.
children_multiple_delivery_ratio:
title: "Children per multiple delivery"
unit: "children per 1,000 multiple deliveries"
description_short: |-
Expand Down
20 changes: 14 additions & 6 deletions etl/steps/data/garden/demography/2024-11-26/multiple_births.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,24 @@ def run(dest_dir: str) -> None:
# Estimate singleton_rate
tb["singleton_rate"] = (1_000 * tb["singletons"] / tb["total_deliveries"]).round(2)

# Estimate children_per_delivery
tb["children_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3)
# Estimate ratios
tb["children_delivery_ratio"] = (
1_000 * (tb["multiple_children"] + tb["singletons"]) / tb["total_deliveries"]
).round(3)
tb["children_multiple_delivery_ratio"] = (1_000 * tb["multiple_children"] / tb["multiple_deliveries"]).round(3)
tb["multiple_to_singleton_ratio"] = (1_000 * tb["multiple_deliveries"] / tb["singletons"]).round(3)

# Remove outliers
flag = (tb["country"] == "England and Wales") & (tb["year"] == 1938)
assert (tb.loc[flag, "children_delivery_ratio"] >= 4000).all(), "Unexpected outlier for England and Wales in 1938"
tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA
assert (
tb.loc[flag, "children_multiple_delivery_ratio"] >= 4000
).all(), "Unexpected outlier for England and Wales in 1938"
tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio", "children_delivery_ratio"]] = pd.NA
flag = (tb["country"] == "England and Wales") & (tb["year"] == 1939)
assert (tb.loc[flag, "children_delivery_ratio"] <= 1500).all(), "Unexpected outlier for England and Wales in 1938"
tb.loc[flag, ["multiple_children", "children_delivery_ratio"]] = pd.NA
assert (
tb.loc[flag, "children_multiple_delivery_ratio"] <= 1500
).all(), "Unexpected outlier for England and Wales in 1938"
tb.loc[flag, ["multiple_children", "children_multiple_delivery_ratio", "children_delivery_ratio"]] = pd.NA

# Keep relevant columns
tb = tb[
Expand All @@ -130,6 +137,7 @@ def run(dest_dir: str) -> None:
"multiple_rate",
# Ratios
"children_delivery_ratio",
"children_multiple_delivery_ratio",
"multiple_to_singleton_ratio",
# Births
"multiple_children",
Expand Down

0 comments on commit bbb6735

Please sign in to comment.