From ee665c1fef81bd296122068cbb34c4b3e844214a Mon Sep 17 00:00:00 2001 From: owidbot Date: Thu, 28 Nov 2024 16:59:42 +0000 Subject: [PATCH] fasttrack: fasttrack/latest/cumulative_lives_saved_vaccination_shattock.csv --- ...lative_lives_saved_vaccination_shattock.py | 20 ++++++++++++++++++- ...e_lives_saved_vaccination_shattock.csv.dvc | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/etl/steps/data/grapher/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.py b/etl/steps/data/grapher/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.py index d3b822d1f10..ed50f459b1a 100644 --- a/etl/steps/data/grapher/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.py +++ b/etl/steps/data/grapher/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.py @@ -1,3 +1,5 @@ +import pandas as pd + from etl.helpers import PathFinder, create_dataset, get_metadata_path from etl.snapshot import Snapshot @@ -11,8 +13,20 @@ def run(dest_dir: str) -> None: # load data tb = snap.read_csv() + # add dimensions with dim_ prefix + dims = [c for c in tb.columns if c.startswith("dim_")] + dims_without_prefix = [c[4:] for c in dims] + + if dims: + tb = tb.rename(columns={d: dw for d, dw in zip(dims, dims_without_prefix)}) + + if uses_dates(tb["year"]): + tb = tb.rename(columns={"year": "date"}).format(["country", "date"] + dims_without_prefix) + else: + tb = tb.format(["country", "year"] + dims_without_prefix) + # add table, update metadata from *.meta.yml and save - ds = create_dataset(dest_dir, tables=[tb.set_index(["country", "year"])], default_metadata=snap.metadata) + ds = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata) # override metadata if necessary meta_path = get_metadata_path(dest_dir).with_suffix(".override.yml") @@ -20,3 +34,7 @@ def run(dest_dir: str) -> None: ds.update_metadata(meta_path) ds.save() + + +def uses_dates(s: pd.Series) -> bool: + return pd.to_datetime(s, errors="coerce", format="%Y-%m-%d").notnull().all() diff --git a/snapshots/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.csv.dvc b/snapshots/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.csv.dvc index 8c9c6de7258..6fee0afab30 100644 --- a/snapshots/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.csv.dvc +++ b/snapshots/fasttrack/latest/cumulative_lives_saved_vaccination_shattock.csv.dvc @@ -1,14 +1,14 @@ meta: origin: producer: |- - Shattock et al. (2024). Contribution of vaccination to improved child survival: modelling 50 years of the Expanded Programme on Immunization. + Shattock et al. (2024). Contribution of vaccination to improved survival and health: modelling 50 years of the Expanded Programme on Immunization. title: cumulative_lives_saved_vaccination_shattock citation_full: |- - Shattock et al. (2024). Contribution of vaccination to improved child survival: modelling 50 years of the Expanded Programme on Immunization. The Lancet. + Shattock et al. (2024). Contribution of vaccination to improved survival and health: modelling 50 years of the Expanded Programme on Immunization. The Lancet. url_main: https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)00850-X/fulltext url_download: |- https://docs.google.com/spreadsheets/d/e/2PACX-1vQHxzyufogWkSCuwEmHd6jF2c3JfqTdUY9ngwgpdwZfR5FA7JGQRzKNKhJ6hmAfGXReeAmgpHRMj8iM/pub?output=csv - date_accessed: '2024-05-03' + date_accessed: '2024-11-28' name: Cumulative lives saved from vaccinations since 1974 (Shattock et al. 2024) description: The cumulative number of lives saved thanks to vaccinations, from 1974 onwards. license: {}