Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 marriages&divorces: add ONS data on people ever married by age #3869

Merged
merged 15 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions dag/families.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ steps:
- data://garden/ons/2025-01-12/divorces
data://grapher/ons/2025-01-12/divorces_by_year:
- data://garden/ons/2025-01-12/divorces
#
# ONS Marriages in England and Wales
#
data://meadow/ons/2025-01-21/marriages:
- snapshot://ons/2025-01-21/marriages.xlsx
data://garden/ons/2025-01-21/marriages:
- data://meadow/ons/2025-01-21/marriages
data://grapher/ons/2025-01-21/marriages:
- data://garden/ons/2025-01-21/marriages

#
# UN Women of reproductive age who are married or in a union
Expand Down
35 changes: 35 additions & 0 deletions etl/steps/data/garden/ons/2025-01-21/marriages.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Marriages & Divorces


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
marriages:
variables:
cumulative_percentage_per_100:
title: Proportions of men or women who had ever married by a certain age for << birth_cohort >> birth cohort
unit: "%"
short_unit: "%"
description_short: |-
The percentage of men or women born in a specific year who have ever married by a certain age.
description_key:
- Ages are presented as "exact years", meaning "by age 30" includes marriages up to the day before the 30th birthday.
- Since 2014, data includes marriages with opposite- and same-sex partners. Civil partnerships converted into marriages are also counted as "ever married," assuming a prior status of "never married or civil partnered" when prior marital status is unknown.
- The table includes final figures for 2022. For instance, men born in 2002 were tracked for marriages up to their 21st birthday in 2022.
- Marital status estimates are derived from the Labour Force Survey (LFS).
processing_level: minor
presentation:
title_public: Share of men or women ever been married for << birth_cohort >> birth cohort by a certain age
display:
numDecimalPlaces: 1
name: << birth_cohort >>

32 changes: 32 additions & 0 deletions etl/steps/data/garden/ons/2025-01-21/marriages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("marriages")

# Read table from meadow dataset.
tb = ds_meadow.read("marriages")
tb["cumulative_percentage_per_100"] = tb["cumulative_percentage_per_1000"] / 10
tb = tb.drop(columns=["cumulative_percentage_per_1000"])

tb = tb.format(["year", "age", "gender"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
35 changes: 35 additions & 0 deletions etl/steps/data/grapher/ons/2025-01-21/marriages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("marriages")

# Read table from garden dataset.
tb = ds_garden.read("marriages", reset_index=True)

# Filter to keep only years 1900, 1910, 1920, etc.
tb = tb[tb["year"] % 10 == 0]

tb = tb.rename(columns={"year": "birth_cohort", "age": "year", "gender": "country"})

tb = tb.format(["country", "birth_cohort", "year"])

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
63 changes: 63 additions & 0 deletions etl/steps/data/meadow/ons/2025-01-21/marriages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Load a snapshot and create a meadow dataset."""

import owid.catalog.processing as pr

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshot.
snap = paths.load_snapshot("marriages.xlsx")

# Load data from snapshot.
# Load sheets on the proportions of men (14a) and women (14b) who had ever married by certain ages, for birth cohorts, England and Wales.
tb_men = snap.read(sheet_name="14a")
tb_women = snap.read(sheet_name="14b")

#
# Process data.
#
tables = []
for tb, gender in zip([tb_men, tb_women], ["Men", "Women"]):
# Find the row where the first column contains "Year of birth"
header_row = tb[tb.iloc[:, 0].str.contains("Year of birth", na=False)].index[0]

# Set the header row dynamically and drop rows before the header row
tb.columns = tb.iloc[header_row]
tb = tb.drop(index=range(header_row + 1)).reset_index(drop=True)

# Melt the Table to create a 'year_of_birth' column
tb = tb.melt(id_vars=["Year of birth"], var_name="age", value_name="cumulative_percentage_per_1000")

# Keep only numbers in the age column
tb["age"] = tb["age"].str.extract(r"(\d+)").astype(int)

# Add gender column
tb["gender"] = gender

tb = tb.rename(columns={"Year of birth": "year"})
tables.append(tb)
tb = pr.concat(tables)

# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
tb = tb.format(["year", "age", "gender"])

#
# Save outputs.
#
# Create a new meadow dataset with the same metadata as the snapshot.
ds_meadow = create_dataset(
dest_dir,
tables=[tb],
check_variables_metadata=True,
default_metadata=snap.metadata,
)

# Save changes in the new meadow dataset.
ds_meadow.save()
6 changes: 3 additions & 3 deletions snapshots/ons/2025-01-12/divorces.xlsx.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ meta:
date_published: "2024-02-22"

# Citation
producer: Office for National Statistics
producer: UK Office for National Statistics
citation_full: |-
Office for National Statistics (ONS), released22 February 2024, ONS website, dataset, Divorces in England and Wales, UK. Retrieved 12 January 2025.
attribution_short: ONS
Office for National Statistics (ONS), released 22 February 2024, ONS website, dataset, Divorces in England and Wales, UK. Retrieved 12 January 2025.
attribution_short: UK ONS

# Files
url_main: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/divorce/datasets/divorcesinenglandandwales
Expand Down
24 changes: 24 additions & 0 deletions snapshots/ons/2025-01-21/marriages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Script to create a snapshot of dataset."""

from pathlib import Path

import click

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
def main(upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"ons/{SNAPSHOT_VERSION}/marriages.xlsx")

# Download data from source, add file to DVC and upload to S3.
snap.create_snapshot(upload=upload)


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions snapshots/ons/2025-01-21/marriages.xlsx.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Learn more at:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
meta:
origin:
# Data product / Snapshot
title: Marriages in England and Wales
description: |-
Number of marriages that took place in England and Wales by age, sex, previous partnership status and civil or religious ceremony.

date_published: "2024-06-20"

# Citation
producer: UK Office for National Statistics
citation_full: |-
Office for National Statistics (ONS), released 06 June 2020, ONS website, dataset, Marriages in England and Wales, UK. Retrieved 21 January 2025.
attribution_short: UK ONS

# Files
url_main: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013
url_download: https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013/2021and2022/marriagesworkbook20212022final.xlsx
date_accessed: 2025-01-21

# License
license:
name: Open Government Licence v3.0
url: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013
outs:
- md5: 575dae27814b9734c7e7e542ffd67a92
size: 698419
path: marriages.xlsx
Loading