-
-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add data from Floud et al. (2011) (WIP)
- Loading branch information
1 parent
e750a5a
commit 931ce73
Showing
7 changed files
with
199 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 45 additions & 0 deletions
45
etl/steps/data/meadow/agriculture/2024-05-23/floud_et_al_2011.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
"""Load a snapshot and create a meadow dataset.""" | ||
|
||
import owid.catalog.processing as pr | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Retrieve snapshots. | ||
snap_europe = paths.load_snapshot("floud_et_al_2011_daily_calories_europe.csv") | ||
snap_us = paths.load_snapshot("floud_et_al_2011_daily_calories_us.csv") | ||
|
||
# Load data from snapshots. | ||
tb_europe = snap_europe.read() | ||
tb_us = snap_us.read() | ||
|
||
# | ||
# Process data. | ||
# | ||
# Transform Europe data to have a year column. | ||
tb_europe = tb_europe.melt(id_vars=["country"], var_name="year", value_name="daily_calories") | ||
|
||
# Prepare US data. | ||
tb_us = tb_us.rename(columns={"Year": "year", "Calories": "daily_calories"}, errors="raise").assign( | ||
**{"country": "United States"} | ||
) | ||
|
||
# Combine both tables. | ||
tb = pr.concat([tb_europe, tb_us], ignore_index=True) | ||
|
||
# Format table conveniently. | ||
tb = tb.format(["country", "year"], short_name=paths.short_name) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new meadow dataset. | ||
ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) | ||
ds_meadow.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
"""Script to create a snapshot of dataset.""" | ||
|
||
from pathlib import Path | ||
|
||
import click | ||
import pandas as pd | ||
|
||
from etl.snapshot import Snapshot | ||
|
||
# Version for current snapshot dataset. | ||
SNAPSHOT_VERSION = Path(__file__).parent.name | ||
|
||
|
||
@click.command() | ||
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") | ||
def main(upload: bool) -> None: | ||
# Initialize new snapshots for daily caloric intake in the US and in Western Europe. | ||
snap_us = Snapshot(f"agriculture/{SNAPSHOT_VERSION}/floud_et_al_2011_daily_calories_us.csv") | ||
snap_europe = Snapshot(f"agriculture/{SNAPSHOT_VERSION}/floud_et_al_2011_daily_calories_europe.csv") | ||
|
||
# Data from Table 6.6 on US daily caloric intake, extracted using chatGPT 4o (and manually inspected). | ||
data_us = """ | ||
Year,Calories | ||
1800,2952 | ||
1810,2935 | ||
1820,2904 | ||
1830,2888 | ||
1840,3013 | ||
1850,2585 | ||
1860,2826 | ||
1870,3029 | ||
1880,3237 | ||
1890,3134 | ||
1900,3212 | ||
1910,3068 | ||
1920,3259 | ||
1930,3400 | ||
1940,3300 | ||
1952,3200 | ||
1960,3100 | ||
1970,3200 | ||
1980,3200 | ||
1990,3500 | ||
2000,3900 | ||
2004,3900 | ||
""" | ||
|
||
# Create a dataframe with the extracted data. | ||
data_us_parsed = [line.split(",") for line in data_us.split("\n")[1:-1]] | ||
df_us = pd.DataFrame(data_us_parsed[1:], columns=data_us_parsed[0]) | ||
|
||
# Data from Table 5.5 on Western Europe daily caloric intake, extracted using chatGPT 4o (and manually inspected). | ||
data_europe = """ | ||
country,1800,1810,1820,1830,1840,1850,1860,1870,1880,1890,1900,1910,1920,1930,1940,1950,1960 | ||
Belgium,2840,,,,,2423,2426,2553,2663,2851,2987,3278,,2940,,,3040 | ||
England,2436,,,,,2512,,,2773,,,2977,,2810,3060,3120,3280 | ||
Finland,,,,,,,1900,,,,,3000,,2950,,,3110 | ||
France,1846,,1984,2118,2377,2840,2854,3085,3085,3220,3192,3323,3133,,,,3050 | ||
Germany,2210,,,,,,2120,,,,,,,,,,2960 | ||
Iceland,,,2887,,3080,3381,,2573,3002,3106,3316,3499,,,,, | ||
Italy,,,,,,,,2647,2197,2119,,2617,,2627,,,2730 | ||
Netherlands,,,,,,,2227,,2493,,2721,,,,,, | ||
Norway,,1800,,,2250,,3300,,,,,,,,,,2930 | ||
""" | ||
# Create a dataframe with the extracted data. | ||
data_europe_parsed = [line.split(",") for line in data_europe.split("\n")[1:-1]] | ||
df_europe = pd.DataFrame(data_europe_parsed[1:], columns=data_europe_parsed[0]) | ||
|
||
# Create snapshots. | ||
snap_us.create_snapshot(upload=upload, data=df_us) | ||
snap_europe.create_snapshot(upload=upload, data=df_europe) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
30 changes: 30 additions & 0 deletions
30
snapshots/agriculture/2024-05-23/floud_et_al_2011_daily_calories_europe.csv.dvc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Learn more at: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
meta: | ||
origin: | ||
# Data product / Snapshot | ||
title: The Changing Body | ||
title_snapshot: The Changing Body - Daily calories in Western Europe | ||
description: |- | ||
This dataset contains the estimates on the daily caloric intake in the United States (Table 6.6) and Western Europe (Table 5.5) of "The Changing Body", by Floud et al. (2011). | ||
date_published: "2011-03-31" | ||
|
||
# Citation | ||
producer: Floud et al. | ||
citation_full: |- | ||
Floud, R., Fogel, R. W., Harris, B. and Hong, S. C. (2011), "The Changing Body," Cambridge Books, Cambridge University Press, number 9780521879750. | ||
Data extracted from Tables 5.5 and 6.6. | ||
attribution_short: Floud et al. (2011) | ||
|
||
# Files | ||
url_main: https://www.cambridge.org/core/books/changing-body/DE3BB0E3577205AC26823CF2120D8B7E | ||
date_accessed: 2024-05-27 | ||
|
||
# License | ||
license: | ||
name: © Cambridge University Press 2011 | ||
url: https://www.cambridge.org/core/books/changing-body/DE3BB0E3577205AC26823CF2120D8B7E | ||
outs: | ||
- md5: 4f31506ded236dc72a590695f8868a1c | ||
size: 554 | ||
path: floud_et_al_2011_daily_calories_europe.csv |
30 changes: 30 additions & 0 deletions
30
snapshots/agriculture/2024-05-23/floud_et_al_2011_daily_calories_us.csv.dvc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Learn more at: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
meta: | ||
origin: | ||
# Data product / Snapshot | ||
title: The Changing Body | ||
title_snapshot: The Changing Body - Daily calories in United States | ||
description: |- | ||
This dataset contains the estimates on the daily caloric intake in the United States (Table 6.6) and Western Europe (Table 5.5) of "The Changing Body", by Floud et al. (2011). | ||
date_published: "2011-03-31" | ||
|
||
# Citation | ||
producer: Floud et al. | ||
citation_full: |- | ||
Floud, R., Fogel, R. W., Harris, B. and Hong, S. C. (2011), "The Changing Body," Cambridge Books, Cambridge University Press, number 9780521879750. | ||
Data extracted from Tables 5.5 and 6.6. | ||
attribution_short: Floud et al. (2011) | ||
|
||
# Files | ||
url_main: https://www.cambridge.org/core/books/changing-body/DE3BB0E3577205AC26823CF2120D8B7E | ||
date_accessed: 2024-05-27 | ||
|
||
# License | ||
license: | ||
name: © Cambridge University Press 2011 | ||
url: https://www.cambridge.org/core/books/changing-body/DE3BB0E3577205AC26823CF2120D8B7E | ||
outs: | ||
- md5: 4316767b9de23caf9710fe44caff5ec9 | ||
size: 234 | ||
path: floud_et_al_2011_daily_calories_us.csv |