owid · veronikasamborska1994 · Jan 24, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/dag/families.yml b/dag/families.yml
@@ -20,6 +20,15 @@ steps:
     - data://garden/ons/2025-01-12/divorces
   data://grapher/ons/2025-01-12/divorces_by_year:
     - data://garden/ons/2025-01-12/divorces
+  #
+  # ONS Marriages in England and Wales
+  #
+  data://meadow/ons/2025-01-21/marriages:
+    - snapshot://ons/2025-01-21/marriages.xlsx
+  data://garden/ons/2025-01-21/marriages:
+    - data://meadow/ons/2025-01-21/marriages
+  data://grapher/ons/2025-01-21/marriages:
+    - data://garden/ons/2025-01-21/marriages
 
   #
   # UN Women of reproductive age who are married or in a union

diff --git a/etl/steps/data/garden/ons/2025-01-21/marriages.meta.yml b/etl/steps/data/garden/ons/2025-01-21/marriages.meta.yml
@@ -0,0 +1,35 @@
+# NOTE: To learn more about the fields, hover over their names.
+definitions:
+  common:
+    presentation:
+      topic_tags:
+        - Marriages & Divorces
+
+
+# Learn more about the available fields:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+dataset:
+  update_period_days: 365
+
+
+tables:
+  marriages:
+    variables:
+      cumulative_percentage_per_100:
+        title: Proportions of men or women who had ever married by a certain age for << birth_cohort >> birth cohort
+        unit: "%"
+        short_unit: "%"
+        description_short: |-
+          The percentage of men or women born in a specific year who have ever married by a certain age.
+        description_key:
+          - Ages are presented as "exact years", meaning "by age 30" includes marriages up to the day before the 30th birthday.
+          - Since 2014, data includes marriages with opposite- and same-sex partners. Civil partnerships converted into marriages are also counted as "ever married," assuming a prior status of "never married or civil partnered" when prior marital status is unknown.
+          - The table includes final figures for 2022. For instance, men born in 2002 were tracked for marriages up to their 21st birthday in 2022.
+          - Marital status estimates are derived from the Labour Force Survey (LFS).
+        processing_level: minor
+        presentation:
+          title_public: Share of men or women ever been married for << birth_cohort >> birth cohort by a certain age
+        display:
+          numDecimalPlaces: 1
+          name: << birth_cohort >>
+
diff --git a/etl/steps/data/garden/ons/2025-01-21/marriages.py b/etl/steps/data/garden/ons/2025-01-21/marriages.py
@@ -0,0 +1,32 @@
+"""Load a meadow dataset and create a garden dataset."""
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load meadow dataset.
+    ds_meadow = paths.load_dataset("marriages")
+
+    # Read table from meadow dataset.
+    tb = ds_meadow.read("marriages")
+    tb["cumulative_percentage_per_100"] = tb["cumulative_percentage_per_1000"] / 10
+    tb = tb.drop(columns=["cumulative_percentage_per_1000"])
+
+    tb = tb.format(["year", "age", "gender"])
+
+    #
+    # Save outputs.
+    #
+    # Create a new garden dataset with the same metadata as the meadow dataset.
+    ds_garden = create_dataset(
+        dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
+    )
+
+    # Save changes in the new garden dataset.
+    ds_garden.save()
diff --git a/etl/steps/data/grapher/ons/2025-01-21/marriages.py b/etl/steps/data/grapher/ons/2025-01-21/marriages.py
@@ -0,0 +1,35 @@
+"""Load a garden dataset and create a grapher dataset."""
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset.
+    ds_garden = paths.load_dataset("marriages")
+
+    # Read table from garden dataset.
+    tb = ds_garden.read("marriages", reset_index=True)
+
+    # Filter to keep only years 1900, 1910, 1920, etc.
+    tb = tb[tb["year"] % 10 == 0]
+
+    tb = tb.rename(columns={"year": "birth_cohort", "age": "year", "gender": "country"})
+
+    tb = tb.format(["country", "birth_cohort", "year"])
+
+    #
+    # Save outputs.
+    #
+    # Create a new grapher dataset with the same metadata as the garden dataset.
+    ds_grapher = create_dataset(
+        dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
+    )
+
+    # Save changes in the new grapher dataset.
+    ds_grapher.save()
diff --git a/etl/steps/data/meadow/ons/2025-01-21/marriages.py b/etl/steps/data/meadow/ons/2025-01-21/marriages.py
@@ -0,0 +1,63 @@
+"""Load a snapshot and create a meadow dataset."""
+
+import owid.catalog.processing as pr
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Retrieve snapshot.
+    snap = paths.load_snapshot("marriages.xlsx")
+
+    # Load data from snapshot.
+    # Load sheets on the proportions of men (14a) and women (14b) who had ever married by certain ages, for birth cohorts, England and Wales.
+    tb_men = snap.read(sheet_name="14a")
+    tb_women = snap.read(sheet_name="14b")
+
+    #
+    # Process data.
+    #
+    tables = []
+    for tb, gender in zip([tb_men, tb_women], ["Men", "Women"]):
+        # Find the row where the first column contains "Year of birth"
+        header_row = tb[tb.iloc[:, 0].str.contains("Year of birth", na=False)].index[0]
+
+        # Set the header row dynamically and drop rows before the header row
+        tb.columns = tb.iloc[header_row]
+        tb = tb.drop(index=range(header_row + 1)).reset_index(drop=True)
+
+        # Melt the Table to create a 'year_of_birth' column
+        tb = tb.melt(id_vars=["Year of birth"], var_name="age", value_name="cumulative_percentage_per_1000")
+
+        # Keep only numbers in the age column
+        tb["age"] = tb["age"].str.extract(r"(\d+)").astype(int)
+
+        # Add gender column
+        tb["gender"] = gender
+
+        tb = tb.rename(columns={"Year of birth": "year"})
+        tables.append(tb)
+    tb = pr.concat(tables)
+
+    # Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
+    tb = tb.format(["year", "age", "gender"])
+
+    #
+    # Save outputs.
+    #
+    # Create a new meadow dataset with the same metadata as the snapshot.
+    ds_meadow = create_dataset(
+        dest_dir,
+        tables=[tb],
+        check_variables_metadata=True,
+        default_metadata=snap.metadata,
+    )
+
+    # Save changes in the new meadow dataset.
+    ds_meadow.save()
diff --git a/snapshots/ons/2025-01-12/divorces.xlsx.dvc b/snapshots/ons/2025-01-12/divorces.xlsx.dvc
@@ -10,10 +10,10 @@ meta:
     date_published: "2024-02-22"
 
     # Citation
-    producer: Office for National Statistics
+    producer: UK Office for National Statistics
     citation_full: |-
-      Office for National Statistics (ONS), released22 February 2024, ONS website, dataset, Divorces in England and Wales, UK. Retrieved 12 January 2025.
-    attribution_short: ONS
+      Office for National Statistics (ONS), released 22 February 2024, ONS website, dataset, Divorces in England and Wales, UK. Retrieved 12 January 2025.
+    attribution_short: UK ONS
 
     # Files
     url_main: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/divorce/datasets/divorcesinenglandandwales

diff --git a/snapshots/ons/2025-01-21/marriages.py b/snapshots/ons/2025-01-21/marriages.py
@@ -0,0 +1,24 @@
+"""Script to create a snapshot of dataset."""
+
+from pathlib import Path
+
+import click
+
+from etl.snapshot import Snapshot
+
+# Version for current snapshot dataset.
+SNAPSHOT_VERSION = Path(__file__).parent.name
+
+
+@click.command()
+@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
+def main(upload: bool) -> None:
+    # Create a new snapshot.
+    snap = Snapshot(f"ons/{SNAPSHOT_VERSION}/marriages.xlsx")
+
+    # Download data from source, add file to DVC and upload to S3.
+    snap.create_snapshot(upload=upload)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/snapshots/ons/2025-01-21/marriages.xlsx.dvc b/snapshots/ons/2025-01-21/marriages.xlsx.dvc
@@ -0,0 +1,30 @@
+# Learn more at:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+meta:
+  origin:
+    # Data product / Snapshot
+    title: Marriages in England and Wales
+    description: |-
+      Number of marriages that took place in England and Wales by age, sex, previous partnership status and civil or religious ceremony.
+
+    date_published: "2024-06-20"
+
+    # Citation
+    producer: UK Office for National Statistics
+    citation_full: |-
+      Office for National Statistics (ONS), released 06 June 2020, ONS website, dataset, Marriages in England and Wales, UK. Retrieved 21 January 2025.
+    attribution_short: UK ONS
+
+    # Files
+    url_main: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013
+    url_download: https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013/2021and2022/marriagesworkbook20212022final.xlsx
+    date_accessed: 2025-01-21
+
+    # License
+    license:
+      name: Open Government Licence v3.0
+      url: https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/marriagecohabitationandcivilpartnerships/datasets/marriagesinenglandandwales2013
+outs:
+  - md5: 575dae27814b9734c7e7e542ffd67a92
+    size: 698419
+    path: marriages.xlsx