Skip to content

Commit

Permalink
🔨 Improve metadata and follow license restrictions
Browse files Browse the repository at this point in the history
  • Loading branch information
larsyencken committed May 8, 2024
1 parent 8c0834b commit 54f23fc
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 16 deletions.
15 changes: 8 additions & 7 deletions lib/catalog/owid/catalog/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import pandas as pd

from .internal import _fetch_bundle, _GrapherBundle, _list_charts
from .internal import LicenseError, _fetch_bundle, _GrapherBundle, _list_charts # noqa


@dataclass
Expand All @@ -26,18 +26,19 @@ class Chart:
_bundle: Optional[_GrapherBundle] = None

@property
def config(self) -> str:
def bundle(self) -> _GrapherBundle:
# LARS: give a nice error if the chart does not exist
if self._bundle is None:
self._bundle = _fetch_bundle(self.slug)

return self._bundle.config # type: ignore
return self._bundle

@property
def get_data(self) -> pd.DataFrame:
if self._bundle is None:
self._bundle = _fetch_bundle(self.slug)
def config(self) -> dict:
return self.bundle.config # type: ignore

return self._bundle.to_frame() # type: ignore
def get_data(self) -> pd.DataFrame:
return self.bundle.to_frame()


def list_charts() -> List[Chart]:
Expand Down
44 changes: 36 additions & 8 deletions lib/catalog/owid/catalog/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
import requests


class LicenseError(Exception):
pass


@dataclass
class _Indicator:
data: dict
Expand All @@ -22,11 +26,18 @@ def to_dict(self):
return {"data": self.data, "metadata": self.metadata}

def to_frame(self):
if self.metadata.get("nonRedistributable"):
raise LicenseError(
"API download is disallowed for this indicator due to license restrictions from the data provider"
)

# getting a data frame is easy
df = pd.DataFrame.from_dict(self.data)

# turning entity ids into entity names
entities = pd.DataFrame.from_records(self.metadata["dimensions"]["entities"]["values"])
entities = pd.DataFrame.from_records(
self.metadata["dimensions"]["entities"]["values"]
)
id_to_name = entities.set_index("id").name.to_dict()
df["entities"] = df.entities.apply(id_to_name.__getitem__)

Expand All @@ -35,7 +46,9 @@ def to_frame(self):
df = df.rename(columns={"values": short_name})

# order the columns better
cols = ["entities", "years"] + sorted([c for c in df.columns if c not in ["entities", "years"]])
cols = ["entities", "years"] + sorted(
[c for c in df.columns if c not in ["entities", "years"]]
)
df = df[cols]

return df
Expand Down Expand Up @@ -74,12 +87,19 @@ def to_frame(self):

assert df is not None

# save some useful metadata
slug = self.config["slug"]
df.attrs["slug"] = slug
df.attrs["url"] = f"https://ourworldindata.org/grapher/{slug}"

if len(df.columns) == 3:
# use the slug as the column name for values
assert self.config
(value_col,) = [c for c in df.columns if c not in ["entities", "years"]]
slug = self.config["slug"].replace("-", "_")
df = df.rename(columns={value_col: slug})
short_name = slug.replace("-", "_")
df = df.rename(columns={value_col: short_name})

df.attrs["value_col"] = short_name

return df

Expand All @@ -94,12 +114,18 @@ def _fetch_grapher_config(slug):


def _fetch_dimension(id: int) -> _Indicator:
data = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.data.json").json()
metadata = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.metadata.json").json()
data = requests.get(
f"https://api.ourworldindata.org/v1/indicators/{id}.data.json"
).json()
metadata = requests.get(
f"https://api.ourworldindata.org/v1/indicators/{id}.metadata.json"
).json()
return _Indicator(data, metadata)


def _fetch_bundle(slug: Optional[str] = None, indicator_id: Optional[int] = None) -> _GrapherBundle:
def _fetch_bundle(
slug: Optional[str] = None, indicator_id: Optional[int] = None
) -> _GrapherBundle:
indicator_ids: List[int]
if slug:
config = _fetch_grapher_config(slug)
Expand All @@ -110,7 +136,9 @@ def _fetch_bundle(slug: Optional[str] = None, indicator_id: Optional[int] = None
config = None
indicator_ids = [indicator_id]

dimensions = {indicator_id: _fetch_dimension(indicator_id) for indicator_id in indicator_ids}
dimensions = {
indicator_id: _fetch_dimension(indicator_id) for indicator_id in indicator_ids
}

origins = []
for d in dimensions.values():
Expand Down
11 changes: 10 additions & 1 deletion lib/catalog/tests/test_charts.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
import pytest

from owid.catalog import charts
from owid.catalog.internal import LicenseError


def test_fetch_chart_data():
chart = charts.Chart("life-expectancy")
df = chart.get_data
df = chart.get_data()
assert df is not None
assert len(df) > 0
assert "entities" in df.columns
assert "years" in df.columns
assert "life_expectancy" in df.columns


def test_fetch_non_redistributable_chart():
chart = charts.Chart("eat-lancet-diet-comparison")
with pytest.raises(LicenseError):
chart.get_data()


def test_list_charts():
cs = charts.list_charts()
assert len(cs) > 0
Expand Down

0 comments on commit 54f23fc

Please sign in to comment.