-
-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds a draft API for fetching the data for a chart. ``` from owid.catalog.charts import Chart df = Chart('life-expectancy').get_data() ```
- Loading branch information
1 parent
7a4538d
commit 8c0834b
Showing
4 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# | ||
# owid.catalog.charts | ||
# | ||
# | ||
# Access to data in OWID charts. | ||
# | ||
|
||
from dataclasses import dataclass | ||
from typing import List, Optional | ||
|
||
import pandas as pd | ||
|
||
from .internal import _fetch_bundle, _GrapherBundle, _list_charts | ||
|
||
|
||
@dataclass | ||
class Chart: | ||
""" | ||
A chart published on Our World in Data, for example: | ||
https://ourworldindata.org/grapher/life-expectancy | ||
""" | ||
|
||
slug: str | ||
|
||
_bundle: Optional[_GrapherBundle] = None | ||
|
||
@property | ||
def config(self) -> str: | ||
if self._bundle is None: | ||
self._bundle = _fetch_bundle(self.slug) | ||
|
||
return self._bundle.config # type: ignore | ||
|
||
@property | ||
def get_data(self) -> pd.DataFrame: | ||
if self._bundle is None: | ||
self._bundle = _fetch_bundle(self.slug) | ||
|
||
return self._bundle.to_frame() # type: ignore | ||
|
||
|
||
def list_charts() -> List[Chart]: | ||
""" | ||
List all available charts published on Our World in Data. | ||
""" | ||
return [Chart(slug) for slug in _list_charts()] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
# | ||
# internal.py | ||
# | ||
# Internal APIs subject to change at any time. | ||
# | ||
|
||
import json | ||
import re | ||
from dataclasses import dataclass | ||
from typing import Dict, List, Optional | ||
|
||
import pandas as pd | ||
import requests | ||
|
||
|
||
@dataclass | ||
class _Indicator: | ||
data: dict | ||
metadata: dict | ||
|
||
def to_dict(self): | ||
return {"data": self.data, "metadata": self.metadata} | ||
|
||
def to_frame(self): | ||
# getting a data frame is easy | ||
df = pd.DataFrame.from_dict(self.data) | ||
|
||
# turning entity ids into entity names | ||
entities = pd.DataFrame.from_records(self.metadata["dimensions"]["entities"]["values"]) | ||
id_to_name = entities.set_index("id").name.to_dict() | ||
df["entities"] = df.entities.apply(id_to_name.__getitem__) | ||
|
||
# make the "values" column more interestingly named | ||
short_name = self.metadata.get("shortName", f'_{self.metadata["id"]}') | ||
df = df.rename(columns={"values": short_name}) | ||
|
||
# order the columns better | ||
cols = ["entities", "years"] + sorted([c for c in df.columns if c not in ["entities", "years"]]) | ||
df = df[cols] | ||
|
||
return df | ||
|
||
|
||
@dataclass | ||
class _GrapherBundle: | ||
config: Optional[dict] | ||
dimensions: Dict[int, _Indicator] | ||
origins: List[dict] | ||
|
||
def to_json(self): | ||
return json.dumps( | ||
{ | ||
"config": self.config, | ||
"dimensions": {k: i.to_dict() for k, i in self.dimensions.items()}, | ||
"origins": self.origins, | ||
} | ||
) | ||
|
||
def size(self): | ||
return len(self.to_json()) | ||
|
||
@property | ||
def indicators(self) -> List[_Indicator]: | ||
return list(self.dimensions.values()) | ||
|
||
def to_frame(self): | ||
df = None | ||
for i in self.indicators: | ||
to_merge = i.to_frame() | ||
if df is None: | ||
df = to_merge | ||
else: | ||
df = pd.merge(df, to_merge, how="outer", on=["entities", "years"]) | ||
|
||
assert df is not None | ||
|
||
if len(df.columns) == 3: | ||
# use the slug as the column name for values | ||
assert self.config | ||
(value_col,) = [c for c in df.columns if c not in ["entities", "years"]] | ||
slug = self.config["slug"].replace("-", "_") | ||
df = df.rename(columns={value_col: slug}) | ||
|
||
return df | ||
|
||
def __repr__(self): | ||
return f"GrapherBundle(config={self.config}, dimensions=..., origins=...)" | ||
|
||
|
||
def _fetch_grapher_config(slug): | ||
resp = requests.get(f"https://ourworldindata.org/grapher/{slug}") | ||
resp.raise_for_status() | ||
return json.loads(resp.content.decode("utf-8").split("//EMBEDDED_JSON")[1]) | ||
|
||
|
||
def _fetch_dimension(id: int) -> _Indicator: | ||
data = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.data.json").json() | ||
metadata = requests.get(f"https://api.ourworldindata.org/v1/indicators/{id}.metadata.json").json() | ||
return _Indicator(data, metadata) | ||
|
||
|
||
def _fetch_bundle(slug: Optional[str] = None, indicator_id: Optional[int] = None) -> _GrapherBundle: | ||
indicator_ids: List[int] | ||
if slug: | ||
config = _fetch_grapher_config(slug) | ||
indicator_ids = [d["variableId"] for d in config["dimensions"]] | ||
else: | ||
assert indicator_id is not None | ||
print(f"Fetching indicator {indicator_id}") | ||
config = None | ||
indicator_ids = [indicator_id] | ||
|
||
dimensions = {indicator_id: _fetch_dimension(indicator_id) for indicator_id in indicator_ids} | ||
|
||
origins = [] | ||
for d in dimensions.values(): | ||
if d.metadata.get("origins"): | ||
origins.append(d.metadata.pop("origins")) | ||
return _GrapherBundle(config, dimensions, origins) | ||
|
||
|
||
def _list_charts() -> List[str]: | ||
content = requests.get("https://ourworldindata.org/charts").content.decode("utf-8") | ||
links = re.findall('"(/grapher/[^"]+)"', content) | ||
slugs = [link.strip('"').split("/")[-1] for link in links] | ||
return slugs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from owid.catalog import charts | ||
|
||
|
||
def test_fetch_chart_data(): | ||
chart = charts.Chart("life-expectancy") | ||
df = chart.get_data | ||
assert df is not None | ||
assert len(df) > 0 | ||
assert "entities" in df.columns | ||
assert "years" in df.columns | ||
assert "life_expectancy" in df.columns | ||
|
||
|
||
def test_list_charts(): | ||
cs = charts.list_charts() | ||
assert len(cs) > 0 | ||
assert all(isinstance(c, charts.Chart) for c in cs) | ||
assert "life-expectancy" in [c.slug for c in cs] |