Skip to content

Commit

Permalink
✨ Jinja whitespaces and newlines
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed Nov 29, 2024
1 parent ee665c1 commit d2ccc34
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 58 deletions.
39 changes: 38 additions & 1 deletion docs/architecture/metadata/structuring-yaml.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,4 +233,41 @@ tables:
{definitions.conflict_type_estimate}
```

Be cautious with line breaks and trailing whitespace when utilizing templates. Despite using good defaults, you might end up experimenting a lot to get the desired result.
Line breaks and whitespaces can be tricky when using Jinja templates. We use reasonable defaults and strip whitespaces, so in most cases you should be fine with using `<%` and `%>`, but in more complex cases, you might have to experiment with
more fine grained [whitespace control](https://jinja.palletsprojects.com/en/stable/templates/#whitespace-control) using tags `<%-` and `-%>`. This is most often used in if-else blocks like this

```yaml
age: |-
<% if age_group == "ALLAges" %>
...
<%- elif age_group == "Age-standardized" %>
...
<%- else %>
...
<%- endif %>
```

The most straightforward way to check your metadata is in Admin, although that means waiting for your step to finish. There's a faster way to check your YAML file directly. Create a `playground.ipynb` notebook in the same folder as your YAML file and copy this to the first cell:

```python
from etl import grapher_helpers as gh
dim_dict = {
"age_group": "YEARS0-4", "sex": "Male", "cause": "Drug use disorders"
}
d = gh.render_yaml_file("ghe.meta.yml", dim_dict=dim_dict)
d["tables"]["ghe"]["variables"]["death_count"]
```

An alternative is examining `VariableMeta`

```python
from etl import grapher_helpers as gh
from etl import paths
tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe']
# Sample a random row to get the dimension values
dim_dict = dict(zip(tb.index.names, tb.sample(1).index[0]))
gh.render_variable_meta(tb.death_count.m, dim_dict=dim_dict)
```
36 changes: 32 additions & 4 deletions etl/grapher_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass, field, is_dataclass
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, Iterable, List, Literal, Optional, Set, cast
from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Union, cast

import jinja2
import numpy as np
Expand All @@ -13,7 +13,7 @@
from jinja2 import Environment
from owid import catalog
from owid.catalog import warnings
from owid.catalog.utils import underscore
from owid.catalog.utils import dynamic_yaml_load, dynamic_yaml_to_dict, underscore
from sqlalchemy import text
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -209,14 +209,15 @@ def _expand_jinja_text(text: str, dim_dict: Dict[str, str]) -> str:
return text

try:
return _cached_jinja_template(text).render(dim_dict)
# NOTE: we're stripping the result to avoid trailing newlines
return _cached_jinja_template(text).render(dim_dict).strip()
except jinja2.exceptions.TemplateSyntaxError as e:
new_message = f"{e.message}\n\nDimensions:\n{dim_dict}\n\nTemplate:\n{text}\n"
raise e.__class__(new_message, e.lineno, e.name, e.filename) from e


def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any:
"""Expand Jinja in all metadata fields."""
"""Expand Jinja in all metadata fields. This modifies the original object in place."""
if obj is None:
return None
elif isinstance(obj, str):
Expand All @@ -233,6 +234,33 @@ def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any:
return obj


def render_yaml_file(path: Union[str, Path], dim_dict: Dict[str, str]) -> Dict[str, Any]:
"""Load YAML file and render Jinja in all fields. Return a dictionary.
Usage:
from etl import grapher_helpers as gh
from etl import paths
tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe']
gh.render_variable_meta(tb.my_col.m, dim_dict={"sex": "male"})
"""
meta = dynamic_yaml_to_dict(dynamic_yaml_load(path))
return _expand_jinja(meta, dim_dict)


def render_variable_meta(meta: catalog.VariableMeta, dim_dict: Dict[str, str]) -> catalog.VariableMeta:
"""Render Jinja in all fields of VariableMeta. Return a new VariableMeta object.
Usage:
# Create a playground.ipynb next to YAML file and run this in notebook
from etl import grapher_helpers as gh
m = gh.render_yaml_file("ghe.meta.yml", dim_dict={"sex": "male"})
m['tables']['ghe']['variables']['death_count']
"""
# TODO: move this as a method to VariableMeta class
return _expand_jinja(meta.copy(), dim_dict)


def _title_column_and_dimensions(title: str, dim_dict: Dict[str, Any]) -> str:
"""Create new title from column title and dimensions.
For instance `Deaths`, ["age", "sex"], ["10-18", "male"] will be converted into
Expand Down
106 changes: 53 additions & 53 deletions etl/steps/data/garden/who/2024-07-30/ghe.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,109 +11,109 @@ definitions:
- World
processing_level: major
age: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
all ages
<%- elif age_group == "age-standardized" -%>
<% elif age_group == "age-standardized" %>
an age-standardized population
<%- elif age_group == "YEARS0-14" -%>
<% elif age_group == "YEARS0-14" %>
0-14 year olds
<%- elif age_group == "YEARS0-4" -%>
<% elif age_group == "YEARS0-4" %>
0-4 year olds
<%- elif age_group == "YEARS5-14" -%>
<% elif age_group == "YEARS5-14" %>
5-14 year olds
<%- elif age_group == "YEARS15-19" -%>
<% elif age_group == "YEARS15-19" %>
15-19 year olds
<%- elif age_group == "YEARS15-49" -%>
<% elif age_group == "YEARS15-49" %>
15-49 year olds
<%- elif age_group == "YEARS20-24" -%>
<% elif age_group == "YEARS20-24" %>
20-24 year olds
<%- elif age_group == "YEARS25-34" -%>
<% elif age_group == "YEARS25-34" %>
25-34 year olds
<%- elif age_group == "YEARS35-44" -%>
<% elif age_group == "YEARS35-44" %>
35-44 year olds
<%- elif age_group == "YEARS45-54" -%>
<% elif age_group == "YEARS45-54" %>
45-54 year olds
<%- elif age_group == "YEARS50-69" -%>
<% elif age_group == "YEARS50-69" %>
50-69 year olds
<%- elif age_group == "YEARS55-64" -%>
<% elif age_group == "YEARS55-64" %>
55-64 year olds
<%- elif age_group == "YEARS65-74" -%>
<% elif age_group == "YEARS65-74" %>
65-74 year olds
<%- elif age_group == "YEARS70+" -%>
<% elif age_group == "YEARS70+" %>
70+ year olds
<%- elif age_group == "YEARS75-84" -%>
<% elif age_group == "YEARS75-84" %>
75-84 year olds
<%- elif age_group == "YEARS85PLUS" -%>
<% elif age_group == "YEARS85PLUS" %>
85+ year olds
<%- endif -%>
<% endif %>
sex: |-
<%- if sex == "Both sexes" %>both sexes<% elif sex == "Male" %>males<% elif sex == "Female" %>females<% endif -%>
<% if sex == "Both sexes" %>both sexes<% elif sex == "Male" %>males<% elif sex == "Female" %>females<% endif %>
deaths_title: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Total deaths from << cause.lower() >> among {definitions.sex}
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Age-standardized deaths from << cause.lower() >> among {definitions.sex}
<%- else -%>
<% else %>
Deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}
<%- endif -%>
<% endif %>
deaths_description: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Estimated number of deaths from << cause.lower() >> in {definitions.sex}.
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Estimated number of age-standardized deaths from << cause.lower() >> in {definitions.sex}.
<%- else -%>
Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}.
<%- endif -%>
<% else %>
Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}.
<% endif %>
death_rate_title: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Death rate from << cause.lower() >> among {definitions.sex}
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Age-standardized death rate from << cause.lower() >> among {definitions.sex}
<%- else -%>
<% else %>
Death rate from << cause.lower() >> among {definitions.sex} aged {definitions.age}
<%- endif -%>
<% endif %>
death_rate_description: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Estimated number of deaths from << cause.lower() >> in {definitions.sex}, per 100,000 people.
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Estimated number of age-standardized deaths from << cause.lower() >> in {definitions.sex}, per 100,000 people.
<%- else -%>
<% else %>
Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people.
<%- endif -%>
<% endif %>
dalys_title: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
DALYs from << cause.lower() >> among {definitions.sex}
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Age-standardized DALYs from << cause.lower() >> among {definitions.sex}
<%- else -%>
<% else %>
DALYs from << cause.lower() >> among {definitions.sex} aged {definitions.age}
<%- endif -%>
<% endif %>
dalys_description: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}.
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Estimated number of age-standardized [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}.
<%- else -%>
<% else %>
Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> among {definitions.sex} aged {definitions.age}.
<%- endif -%>
<% endif %>
dalys_rate_title: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
DALYs from << cause.lower() >>, among {definitions.sex} per 100,000 people
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Age-standardized DALYs from << cause.lower() >> among {definitions.sex}, per 100,000 people
<%- else -%>
<% else %>
DALYs from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people
<%- endif -%>
<% endif %>
dalys_rate_description: |-
<%- if age_group == "ALLAges" -%>
<% if age_group == "ALLAges" %>
Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}, per 100,000 people.
<%- elif age_group == "Age-standardized" -%>
<% elif age_group == "Age-standardized" %>
Estimated number of age-standardized [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}, per 100,000 people.
<%- else -%>
<% else %>
Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people.
<%- endif -%>
<% endif %>
footnote: |-
<%- if age == "Age-standardized" -%>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<%- endif -%>
<% if age == "Age-standardized" %>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<% endif %>
tables:
ghe:
variables:
Expand Down

0 comments on commit d2ccc34

Please sign in to comment.