Skip to content

Commit

Permalink
🐛 Remove Google Sheet from sources of fast-tracked datasets (#3618)
Browse files Browse the repository at this point in the history
* 🐛 Remove Google Sheet from sources of fast-tracked datasets
  • Loading branch information
Marigold authored Nov 28, 2024
1 parent 529fbd3 commit 6532def
Show file tree
Hide file tree
Showing 75 changed files with 4,014 additions and 3,804 deletions.
2 changes: 1 addition & 1 deletion apps/wizard/app_pages/fasttrack/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def _parse_sources(sources_meta_df: pd.DataFrame) -> Optional[Source]:
source = sources[0]

if pd.isnull(source.get("date_accessed")):
source.pop("date_accessed")
source.pop("date_accessed", None)

if pd.isnull(source.get("publication_year")):
source.pop("publication_year")
Expand Down
2 changes: 1 addition & 1 deletion apps/wizard/app_pages/fasttrack/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def _convert_percentages(data: pd.DataFrame, variables_meta_dict: Dict[str, Vari
"""Convert percentages to numbers."""
for col in data.columns:
if getattr(variables_meta_dict.get(col, {}), "unit", "") == "%":
data[col] = data[col].str.replace("%", "").astype(float)
data[col] = data[col].astype(str).str.replace("%", "").astype(float)
return data


Expand Down
Original file line number Diff line number Diff line change
@@ -1,40 +1,18 @@
dataset:
namespace: fasttrack
version: '2022-11-01'
short_name: lighting_efficiency_uk
title: Lighting effiency and shares in the UK
description: ''
sources:
- name: 'Fouquet & Pearson (2006). Seven centuries of energy services: The price
and use of light in the United Kingdom (1300-2000).'
published_by: 'Fouquet, R., & Pearson, P. J. (2006). Seven centuries of energy
services: The price and use of light in the United Kingdom (1300-2000). The
energy journal, 27(1).'
url: https://www.jstor.org/stable/23296980
licenses:
- {}
tables:
lighting_efficiency_uk:
variables:
share_of_lighting_uk:
title: share_of_lighting_uk
short_unit: '%'
unit: '%'
short_unit: '%'
description: The share of lighting in the UK that was provided by each source.
sources:
- name: 'Fouquet & Pearson (2006). Seven centuries of energy services: The
price and use of light in the United Kingdom (1300-2000).'
published_by: 'Fouquet, R., & Pearson, P. J. (2006). Seven centuries of
energy services: The price and use of light in the United Kingdom (1300-2000).
The energy journal, 27(1).'
url: https://www.jstor.org/stable/23296980
efficiency_lighting_uk:
title: efficiency_lighting_uk
unit: lumen-hours per kWh
description: The efficiency of lighting measures the output of light per unit
of energy. It's measured in lumen-hours per kilowatt-hour (kWh).
sources:
- name: 'Fouquet & Pearson (2006). Seven centuries of energy services: The
price and use of light in the United Kingdom (1300-2000).'
published_by: 'Fouquet, R., & Pearson, P. J. (2006). Seven centuries of
energy services: The price and use of light in the United Kingdom (1300-2000).
The energy journal, 27(1).'
url: https://www.jstor.org/stable/23296980
description: |-
The efficiency of lighting measures the output of light per unit of energy. It's measured in lumen-hours per kilowatt-hour (kWh).
Original file line number Diff line number Diff line change
@@ -1,21 +1,40 @@
import pandas as pd
from owid import catalog

from etl.helpers import PathFinder
from etl.helpers import PathFinder, create_dataset, get_metadata_path
from etl.snapshot import Snapshot

N = PathFinder(__file__)
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
# load snapshot
data = pd.read_csv(Snapshot("fasttrack/2022-11-01/lighting_efficiency_uk.csv").path)
snap = Snapshot("fasttrack/2022-11-01/lighting_efficiency_uk.csv")

# create empty dataframe and table
ds = catalog.Dataset.create_empty(dest_dir)
tb = catalog.Table(data, short_name=N.short_name)
# load data
tb = snap.read_csv()

# add dimensions with dim_ prefix
dims = [c for c in tb.columns if c.startswith("dim_")]
dims_without_prefix = [c[4:] for c in dims]

if dims:
tb = tb.rename(columns={d: dw for d, dw in zip(dims, dims_without_prefix)})

if uses_dates(tb["year"]):
tb = tb.rename(columns={"year": "date"}).format(["country", "date"] + dims_without_prefix)
else:
tb = tb.format(["country", "year"] + dims_without_prefix)

# add table, update metadata from *.meta.yml and save
ds.add(tb)
ds.update_metadata(N.metadata_path)
ds = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata)

# override metadata if necessary
meta_path = get_metadata_path(dest_dir).with_suffix(".override.yml")
if meta_path.exists():
ds.update_metadata(meta_path)

ds.save()


def uses_dates(s: pd.Series) -> bool:
return pd.to_datetime(s, errors="coerce", format="%Y-%m-%d").notnull().all()
Loading

0 comments on commit 6532def

Please sign in to comment.