Skip to content

Commit

Permalink
🎉 Add owid.catalog.charts module
Browse files Browse the repository at this point in the history
Adds a draft API for fetching the data for a chart.

```
from owid.catalog.charts import Chart

df = Chart('life-expectancy').get_data()
```
  • Loading branch information
larsyencken committed May 7, 2024
1 parent 7a4538d commit 8c0834b
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 0 deletions.
5 changes: 5 additions & 0 deletions lib/catalog/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ SRC = owid tests
# watch:
# poetry run watchmedo shell-command -c 'clear; make unittest' --recursive --drop .

.venv: poetry.toml pyproject.toml poetry.lock
@echo '==> Installing packages'
poetry install
touch .venv

check-typing: .venv
@echo '==> Checking types'
poetry run pyright $(SRC)
Expand Down
47 changes: 47 additions & 0 deletions lib/catalog/owid/catalog/charts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#
# owid.catalog.charts
#
#
# Access to data in OWID charts.
#

from dataclasses import dataclass
from typing import List, Optional

import pandas as pd

from .internal import _fetch_bundle, _GrapherBundle, _list_charts


@dataclass
class Chart:
"""
A chart published on Our World in Data, for example:
https://ourworldindata.org/grapher/life-expectancy
"""

slug: str

_bundle: Optional[_GrapherBundle] = None

@property
def config(self) -> str:
if self._bundle is None:
self._bundle = _fetch_bundle(self.slug)

return self._bundle.config # type: ignore

@property
def get_data(self) -> pd.DataFrame:
if self._bundle is None:
self._bundle = _fetch_bundle(self.slug)

return self._bundle.to_frame() # type: ignore


def list_charts() -> List[Chart]:
"""
List all available charts published on Our World in Data.
"""
return [Chart(slug) for slug in _list_charts()]
126 changes: 126 additions & 0 deletions lib/catalog/owid/catalog/internal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#
# internal.py
#
# Internal APIs subject to change at any time.
#

import json
import re
from dataclasses import dataclass
from typing import Dict, List, Optional

import pandas as pd
import requests


@dataclass
class _Indicator:
data: dict
metadata: dict

def to_dict(self):
return {"data": self.data, "metadata": self.metadata}

def to_frame(self):
# getting a data frame is easy
df = pd.DataFrame.from_dict(self.data)

# turning entity ids into entity names
entities = pd.DataFrame.from_records(self.metadata["dimensions"]["entities"]["values"])
id_to_name = entities.set_index("id").name.to_dict()
df["entities"] = df.entities.apply(id_to_name.__getitem__)

# make the "values" column more interestingly named
short_name = self.metadata.get("shortName", f'_{self.metadata["id"]}')
df = df.rename(columns={"values": short_name})

# order the columns better
cols = ["entities", "years"] + sorted([c for c in df.columns if c not in ["entities", "years"]])
df = df[cols]

return df


@dataclass
class _GrapherBundle:
config: Optional[dict]
dimensions: Dict[int, _Indicator]
origins: List[dict]

def to_json(self):
return json.dumps(
{
"config": self.config,
"dimensions": {k: i.to_dict() for k, i in self.dimensions.items()},
"origins": self.origins,
}
)

def size(self):
return len(self.to_json())

@property
def indicators(self) -> List[_Indicator]:
return list(self.dimensions.values())

def to_frame(self):
df = None
for i in self.indicators:
to_merge = i.to_frame()
if df is None:
df = to_merge
else:
df = pd.merge(df, to_merge, how="outer", on=["entities", "years"])

assert df is not None

if len(df.columns) == 3:
# use the slug as the column name for values
assert self.config
(value_col,) = [c for c in df.columns if c not in ["entities", "years"]]
slug = self.config["slug"].replace("-", "_")
df = df.rename(columns={value_col: slug})

return df

def __repr__(self):
return f"GrapherBundle(config={self.config}, dimensions=..., origins=...)"


def _fetch_grapher_config(slug):
resp = requests.get(f"https://ourworldindata.org/grapher/{slug}")
resp.raise_for_status()
return json.loads(resp.content.decode("utf-8").split("//EMBEDDED_JSON")[1])


def _fetch_dimension(id: int) -> _Indicator:
data = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.data.json").json()
metadata = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.metadata.json").json()
return _Indicator(data, metadata)


def _fetch_bundle(slug: Optional[str] = None, indicator_id: Optional[int] = None) -> _GrapherBundle:
indicator_ids: List[int]
if slug:
config = _fetch_grapher_config(slug)
indicator_ids = [d["variableId"] for d in config["dimensions"]]
else:
assert indicator_id is not None
print(f"Fetching indicator {indicator_id}")
config = None
indicator_ids = [indicator_id]

dimensions = {indicator_id: _fetch_dimension(indicator_id) for indicator_id in indicator_ids}

origins = []
for d in dimensions.values():
if d.metadata.get("origins"):
origins.append(d.metadata.pop("origins"))
return _GrapherBundle(config, dimensions, origins)


def _list_charts() -> List[str]:
content = requests.get("https://ourworldindata.org/charts").content.decode("utf-8")
links = re.findall('"(/grapher/[^"]+)"', content)
slugs = [link.strip('"').split("/")[-1] for link in links]
return slugs
18 changes: 18 additions & 0 deletions lib/catalog/tests/test_charts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from owid.catalog import charts


def test_fetch_chart_data():
chart = charts.Chart("life-expectancy")
df = chart.get_data
assert df is not None
assert len(df) > 0
assert "entities" in df.columns
assert "years" in df.columns
assert "life_expectancy" in df.columns


def test_list_charts():
cs = charts.list_charts()
assert len(cs) > 0
assert all(isinstance(c, charts.Chart) for c in cs)
assert "life-expectancy" in [c.slug for c in cs]

0 comments on commit 8c0834b

Please sign in to comment.