Skip to content

Commit

Permalink
Add data from Jonsson 1998
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloarosado committed May 27, 2024
1 parent 704afce commit ea87b56
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 1 deletion.
6 changes: 6 additions & 0 deletions dag/agriculture.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,17 @@ steps:
- snapshot://agriculture/2024-05-23/floud_et_al_2011_daily_calories_europe.csv
- snapshot://agriculture/2024-05-23/floud_et_al_2011_daily_calories_us.csv
#
# Jonsson (1998) - Daily calories in Iceland.
#
data://meadow/agriculture/2024-05-23/jonsson_1998:
- snapshot://agriculture/2024-05-23/jonsson_1998.csv
#
# Agriculture - Long-run daily calorie supply per person.
#
data://garden/agriculture/2024-05-23/daily_calories_per_person:
- data://meadow/agriculture/2024-05-23/harris_et_al_2015
- data://meadow/agriculture/2024-05-23/floud_et_al_2011
- data://meadow/agriculture/2024-05-23/jonsson_1998
data://grapher/agriculture/2024-05-23/daily_calories_per_person:
- data://garden/agriculture/2024-05-23/daily_calories_per_person
######################################################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def run(dest_dir: str) -> None:
ds_floud = paths.load_dataset("floud_et_al_2011")
tb_floud = ds_floud["floud_et_al_2011"].reset_index()

# Load Jonsson (1998) dataset and read its main table.
ds_jonsson = paths.load_dataset("jonsson_1998")
tb_jonsson = ds_jonsson["jonsson_1998"].reset_index()

#
# Process data.
#
Expand Down
32 changes: 32 additions & 0 deletions etl/steps/data/meadow/agriculture/2024-05-23/jonsson_1998.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Load a snapshot and create a meadow dataset."""


from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshot and read its data.
snap = paths.load_snapshot("jonsson_1998.csv")
tb = snap.read()

#
# Process data.
#
# Add a country column.
tb["country"] = "Iceland"

# Format table conveniently.
tb = tb.format()

#
# Save outputs.
#
# Create a new meadow dataset.
ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True)
ds_meadow.save()
3 changes: 2 additions & 1 deletion snapshots/agriculture/2024-05-23/harris_et_al_2015.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
meta:
origin:
# Data product / Snapshot
title: Daily calories in England and Wales according to various authors
title: How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries
title_snapshot: How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries - Daily calories in England and Wales
description: |-
This dataset contains the table in the appendix of Harris et al. (2015) paper: "How Many Calories? Food Availability in England and Wales in the Eighteenth and Nineteenth Centuries".
That table contains a compilation of daily calorie (supply or consumption) in England and Wales, according to various different studies.
Expand Down
30 changes: 30 additions & 0 deletions snapshots/agriculture/2024-05-23/jonsson_1998.csv.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Learn more at:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
meta:
origin:
# Data product / Snapshot
title: Changes in food consumption in Iceland, 1770-1940
title_snapshot: Changes in food consumption in Iceland, 1770-1940 - Daily calories in Iceland
description: |-
This dataset contains daily energy from Table 5 of Jonsson (1998) paper: "Changes in food consumption in Iceland, 1770-1940".
date_published: "1998-01-01"

# Citation
producer: Jonsson
citation_full: |-
Jonsson, G.R. (1998), "Changes in food consumption in Iceland, 1770-1940". Scandinavian Economic History Review, 46, 24-41.
Data extracted from Table 5.
attribution_short: Jonsson (1998)

# Files
url_main: https://www.tandfonline.com/doi/abs/10.1080/03585522.1998.10414677
date_accessed: 2024-05-27

# License
license:
name: © Scandinavian Economic History Review 1998
url: https://www.tandfonline.com/doi/abs/10.1080/03585522.1998.10414677
outs:
- md5: 9637e39deb3ff3064e125c5141d273f1
size: 180
path: jonsson_1998.csv
50 changes: 50 additions & 0 deletions snapshots/agriculture/2024-05-23/jonsson_1998.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Script to create a snapshot of dataset."""

from io import StringIO
from pathlib import Path

import click
import pandas as pd

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
def main(upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"agriculture/{SNAPSHOT_VERSION}/jonsson_1998.csv")

# Data extracted using chatGPT 4o (and manually inspected and corrected).
data = """
year,daily_calories
1770,3048
1784,2322
1795,2724
1819,2887
1840,3080
1849,3381
1855,2917
1863,2885
1870,2573
1880,3002
1890,3106
1900,3316
1910,3499
1920,3610
1930,4207
1938,4066
"""

# Create a dataframe with the extracted data.
df = pd.read_csv(StringIO(data))

# Create snapshot.
snap.create_snapshot(upload=upload, data=df)


if __name__ == "__main__":
main()

0 comments on commit ea87b56

Please sign in to comment.