From ea87b56aa763ec86cf0c6532ba60ac5e64858a04 Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Mon, 27 May 2024 13:21:22 +0200 Subject: [PATCH] Add data from Jonsson 1998 --- dag/agriculture.yml | 6 +++ .../2024-05-23/daily_calories_per_person.py | 4 ++ .../agriculture/2024-05-23/jonsson_1998.py | 32 ++++++++++++ .../2024-05-23/harris_et_al_2015.csv.dvc | 3 +- .../2024-05-23/jonsson_1998.csv.dvc | 30 +++++++++++ .../agriculture/2024-05-23/jonsson_1998.py | 50 +++++++++++++++++++ 6 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 etl/steps/data/meadow/agriculture/2024-05-23/jonsson_1998.py create mode 100644 snapshots/agriculture/2024-05-23/jonsson_1998.csv.dvc create mode 100644 snapshots/agriculture/2024-05-23/jonsson_1998.py diff --git a/dag/agriculture.yml b/dag/agriculture.yml index 16e76b97374..24584e57983 100644 --- a/dag/agriculture.yml +++ b/dag/agriculture.yml @@ -85,11 +85,17 @@ steps: - snapshot://agriculture/2024-05-23/floud_et_al_2011_daily_calories_europe.csv - snapshot://agriculture/2024-05-23/floud_et_al_2011_daily_calories_us.csv # + # Jonsson (1998) - Daily calories in Iceland. + # + data://meadow/agriculture/2024-05-23/jonsson_1998: + - snapshot://agriculture/2024-05-23/jonsson_1998.csv + # # Agriculture - Long-run daily calorie supply per person. # data://garden/agriculture/2024-05-23/daily_calories_per_person: - data://meadow/agriculture/2024-05-23/harris_et_al_2015 - data://meadow/agriculture/2024-05-23/floud_et_al_2011 + - data://meadow/agriculture/2024-05-23/jonsson_1998 data://grapher/agriculture/2024-05-23/daily_calories_per_person: - data://garden/agriculture/2024-05-23/daily_calories_per_person ###################################################################################################################### diff --git a/etl/steps/data/garden/agriculture/2024-05-23/daily_calories_per_person.py b/etl/steps/data/garden/agriculture/2024-05-23/daily_calories_per_person.py index 3bf3a4201b1..6dca083cfa5 100644 --- a/etl/steps/data/garden/agriculture/2024-05-23/daily_calories_per_person.py +++ b/etl/steps/data/garden/agriculture/2024-05-23/daily_calories_per_person.py @@ -20,6 +20,10 @@ def run(dest_dir: str) -> None: ds_floud = paths.load_dataset("floud_et_al_2011") tb_floud = ds_floud["floud_et_al_2011"].reset_index() + # Load Jonsson (1998) dataset and read its main table. + ds_jonsson = paths.load_dataset("jonsson_1998") + tb_jonsson = ds_jonsson["jonsson_1998"].reset_index() + # # Process data. # diff --git a/etl/steps/data/meadow/agriculture/2024-05-23/jonsson_1998.py b/etl/steps/data/meadow/agriculture/2024-05-23/jonsson_1998.py new file mode 100644 index 00000000000..ae84f20378c --- /dev/null +++ b/etl/steps/data/meadow/agriculture/2024-05-23/jonsson_1998.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot and read its data. + snap = paths.load_snapshot("jonsson_1998.csv") + tb = snap.read() + + # + # Process data. + # + # Add a country column. + tb["country"] = "Iceland" + + # Format table conveniently. + tb = tb.format() + + # + # Save outputs. + # + # Create a new meadow dataset. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) + ds_meadow.save() diff --git a/snapshots/agriculture/2024-05-23/harris_et_al_2015.csv.dvc b/snapshots/agriculture/2024-05-23/harris_et_al_2015.csv.dvc index 730901ccbcb..d96fa7c97db 100644 --- a/snapshots/agriculture/2024-05-23/harris_et_al_2015.csv.dvc +++ b/snapshots/agriculture/2024-05-23/harris_et_al_2015.csv.dvc @@ -3,7 +3,8 @@ meta: origin: # Data product / Snapshot - title: Daily calories in England and Wales according to various authors + title: How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries + title_snapshot: How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries - Daily calories in England and Wales description: |- This dataset contains the table in the appendix of Harris et al. (2015) paper: "How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries". That table contains a compilation of daily calorie (supply or consumption) in England and Wales, according to various different studies. diff --git a/snapshots/agriculture/2024-05-23/jonsson_1998.csv.dvc b/snapshots/agriculture/2024-05-23/jonsson_1998.csv.dvc new file mode 100644 index 00000000000..a7cc48a8338 --- /dev/null +++ b/snapshots/agriculture/2024-05-23/jonsson_1998.csv.dvc @@ -0,0 +1,30 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Changes in food consumption in Iceland, 1770-1940 + title_snapshot: Changes in food consumption in Iceland, 1770-1940 - Daily calories in Iceland + description: |- + This dataset contains daily energy from Table 5 of Jonsson (1998) paper: "Changes in food consumption in Iceland, 1770-1940". + date_published: "1998-01-01" + + # Citation + producer: Jonsson + citation_full: |- + Jonsson, G.R. (1998), "Changes in food consumption in Iceland, 1770-1940". Scandinavian Economic History Review, 46, 24-41. + Data extracted from Table 5. + attribution_short: Jonsson (1998) + + # Files + url_main: https://www.tandfonline.com/doi/abs/10.1080/03585522.1998.10414677 + date_accessed: 2024-05-27 + + # License + license: + name: © Scandinavian Economic History Review 1998 + url: https://www.tandfonline.com/doi/abs/10.1080/03585522.1998.10414677 +outs: + - md5: 9637e39deb3ff3064e125c5141d273f1 + size: 180 + path: jonsson_1998.csv diff --git a/snapshots/agriculture/2024-05-23/jonsson_1998.py b/snapshots/agriculture/2024-05-23/jonsson_1998.py new file mode 100644 index 00000000000..c819cb394d4 --- /dev/null +++ b/snapshots/agriculture/2024-05-23/jonsson_1998.py @@ -0,0 +1,50 @@ +"""Script to create a snapshot of dataset.""" + +from io import StringIO +from pathlib import Path + +import click +import pandas as pd + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"agriculture/{SNAPSHOT_VERSION}/jonsson_1998.csv") + + # Data extracted using chatGPT 4o (and manually inspected and corrected). + data = """ +year,daily_calories +1770,3048 +1784,2322 +1795,2724 +1819,2887 +1840,3080 +1849,3381 +1855,2917 +1863,2885 +1870,2573 +1880,3002 +1890,3106 +1900,3316 +1910,3499 +1920,3610 +1930,4207 +1938,4066 + """ + + # Create a dataframe with the extracted data. + df = pd.read_csv(StringIO(data)) + + # Create snapshot. + snap.create_snapshot(upload=upload, data=df) + + +if __name__ == "__main__": + main()