From d2ccc34b65c6a6d7ab224b6093f0a3ff6272d99c Mon Sep 17 00:00:00 2001 From: Marigold Date: Fri, 29 Nov 2024 09:58:22 +0100 Subject: [PATCH] :sparkles: Jinja whitespaces and newlines --- .../architecture/metadata/structuring-yaml.md | 39 ++++++- etl/grapher_helpers.py | 36 +++++- .../data/garden/who/2024-07-30/ghe.meta.yml | 106 +++++++++--------- 3 files changed, 123 insertions(+), 58 deletions(-) diff --git a/docs/architecture/metadata/structuring-yaml.md b/docs/architecture/metadata/structuring-yaml.md index bb8a16b28627..f0c4091c5dae 100644 --- a/docs/architecture/metadata/structuring-yaml.md +++ b/docs/architecture/metadata/structuring-yaml.md @@ -233,4 +233,41 @@ tables: {definitions.conflict_type_estimate} ``` -Be cautious with line breaks and trailing whitespace when utilizing templates. Despite using good defaults, you might end up experimenting a lot to get the desired result. +Line breaks and whitespaces can be tricky when using Jinja templates. We use reasonable defaults and strip whitespaces, so in most cases you should be fine with using `<%` and `%>`, but in more complex cases, you might have to experiment with +more fine grained [whitespace control](https://jinja.palletsprojects.com/en/stable/templates/#whitespace-control) using tags `<%-` and `-%>`. This is most often used in if-else blocks like this + +```yaml +age: |- + <% if age_group == "ALLAges" %> + ... + <%- elif age_group == "Age-standardized" %> + ... + <%- else %> + ... + <%- endif %> +``` + +The most straightforward way to check your metadata is in Admin, although that means waiting for your step to finish. There's a faster way to check your YAML file directly. Create a `playground.ipynb` notebook in the same folder as your YAML file and copy this to the first cell: + +```python +from etl import grapher_helpers as gh +dim_dict = { + "age_group": "YEARS0-4", "sex": "Male", "cause": "Drug use disorders" +} +d = gh.render_yaml_file("ghe.meta.yml", dim_dict=dim_dict) +d["tables"]["ghe"]["variables"]["death_count"] +``` + +An alternative is examining `VariableMeta` + +```python +from etl import grapher_helpers as gh +from etl import paths + +tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe'] + +# Sample a random row to get the dimension values +dim_dict = dict(zip(tb.index.names, tb.sample(1).index[0])) + +gh.render_variable_meta(tb.death_count.m, dim_dict=dim_dict) +``` diff --git a/etl/grapher_helpers.py b/etl/grapher_helpers.py index 67bf0df7bd3c..a33fa2bf9cbc 100644 --- a/etl/grapher_helpers.py +++ b/etl/grapher_helpers.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field, is_dataclass from functools import lru_cache from pathlib import Path -from typing import Any, Dict, Iterable, List, Literal, Optional, Set, cast +from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Union, cast import jinja2 import numpy as np @@ -13,7 +13,7 @@ from jinja2 import Environment from owid import catalog from owid.catalog import warnings -from owid.catalog.utils import underscore +from owid.catalog.utils import dynamic_yaml_load, dynamic_yaml_to_dict, underscore from sqlalchemy import text from sqlalchemy.engine import Engine from sqlalchemy.orm import Session @@ -209,14 +209,15 @@ def _expand_jinja_text(text: str, dim_dict: Dict[str, str]) -> str: return text try: - return _cached_jinja_template(text).render(dim_dict) + # NOTE: we're stripping the result to avoid trailing newlines + return _cached_jinja_template(text).render(dim_dict).strip() except jinja2.exceptions.TemplateSyntaxError as e: new_message = f"{e.message}\n\nDimensions:\n{dim_dict}\n\nTemplate:\n{text}\n" raise e.__class__(new_message, e.lineno, e.name, e.filename) from e def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any: - """Expand Jinja in all metadata fields.""" + """Expand Jinja in all metadata fields. This modifies the original object in place.""" if obj is None: return None elif isinstance(obj, str): @@ -233,6 +234,33 @@ def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any: return obj +def render_yaml_file(path: Union[str, Path], dim_dict: Dict[str, str]) -> Dict[str, Any]: + """Load YAML file and render Jinja in all fields. Return a dictionary. + + Usage: + from etl import grapher_helpers as gh + from etl import paths + + tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe'] + gh.render_variable_meta(tb.my_col.m, dim_dict={"sex": "male"}) + """ + meta = dynamic_yaml_to_dict(dynamic_yaml_load(path)) + return _expand_jinja(meta, dim_dict) + + +def render_variable_meta(meta: catalog.VariableMeta, dim_dict: Dict[str, str]) -> catalog.VariableMeta: + """Render Jinja in all fields of VariableMeta. Return a new VariableMeta object. + + Usage: + # Create a playground.ipynb next to YAML file and run this in notebook + from etl import grapher_helpers as gh + m = gh.render_yaml_file("ghe.meta.yml", dim_dict={"sex": "male"}) + m['tables']['ghe']['variables']['death_count'] + """ + # TODO: move this as a method to VariableMeta class + return _expand_jinja(meta.copy(), dim_dict) + + def _title_column_and_dimensions(title: str, dim_dict: Dict[str, Any]) -> str: """Create new title from column title and dimensions. For instance `Deaths`, ["age", "sex"], ["10-18", "male"] will be converted into diff --git a/etl/steps/data/garden/who/2024-07-30/ghe.meta.yml b/etl/steps/data/garden/who/2024-07-30/ghe.meta.yml index 0c75bdc41351..361e9040aa8d 100644 --- a/etl/steps/data/garden/who/2024-07-30/ghe.meta.yml +++ b/etl/steps/data/garden/who/2024-07-30/ghe.meta.yml @@ -11,109 +11,109 @@ definitions: - World processing_level: major age: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> all ages - <%- elif age_group == "age-standardized" -%> + <% elif age_group == "age-standardized" %> an age-standardized population - <%- elif age_group == "YEARS0-14" -%> + <% elif age_group == "YEARS0-14" %> 0-14 year olds - <%- elif age_group == "YEARS0-4" -%> + <% elif age_group == "YEARS0-4" %> 0-4 year olds - <%- elif age_group == "YEARS5-14" -%> + <% elif age_group == "YEARS5-14" %> 5-14 year olds - <%- elif age_group == "YEARS15-19" -%> + <% elif age_group == "YEARS15-19" %> 15-19 year olds - <%- elif age_group == "YEARS15-49" -%> + <% elif age_group == "YEARS15-49" %> 15-49 year olds - <%- elif age_group == "YEARS20-24" -%> + <% elif age_group == "YEARS20-24" %> 20-24 year olds - <%- elif age_group == "YEARS25-34" -%> + <% elif age_group == "YEARS25-34" %> 25-34 year olds - <%- elif age_group == "YEARS35-44" -%> + <% elif age_group == "YEARS35-44" %> 35-44 year olds - <%- elif age_group == "YEARS45-54" -%> + <% elif age_group == "YEARS45-54" %> 45-54 year olds - <%- elif age_group == "YEARS50-69" -%> + <% elif age_group == "YEARS50-69" %> 50-69 year olds - <%- elif age_group == "YEARS55-64" -%> + <% elif age_group == "YEARS55-64" %> 55-64 year olds - <%- elif age_group == "YEARS65-74" -%> + <% elif age_group == "YEARS65-74" %> 65-74 year olds - <%- elif age_group == "YEARS70+" -%> + <% elif age_group == "YEARS70+" %> 70+ year olds - <%- elif age_group == "YEARS75-84" -%> + <% elif age_group == "YEARS75-84" %> 75-84 year olds - <%- elif age_group == "YEARS85PLUS" -%> + <% elif age_group == "YEARS85PLUS" %> 85+ year olds - <%- endif -%> + <% endif %> sex: |- - <%- if sex == "Both sexes" %>both sexes<% elif sex == "Male" %>males<% elif sex == "Female" %>females<% endif -%> + <% if sex == "Both sexes" %>both sexes<% elif sex == "Male" %>males<% elif sex == "Female" %>females<% endif %> deaths_title: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Total deaths from << cause.lower() >> among {definitions.sex} - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Age-standardized deaths from << cause.lower() >> among {definitions.sex} - <%- else -%> + <% else %> Deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age} - <%- endif -%> + <% endif %> deaths_description: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Estimated number of deaths from << cause.lower() >> in {definitions.sex}. - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Estimated number of age-standardized deaths from << cause.lower() >> in {definitions.sex}. - <%- else -%> - Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}. - <%- endif -%> + <% else %> + Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}. + <% endif %> death_rate_title: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Death rate from << cause.lower() >> among {definitions.sex} - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Age-standardized death rate from << cause.lower() >> among {definitions.sex} - <%- else -%> + <% else %> Death rate from << cause.lower() >> among {definitions.sex} aged {definitions.age} - <%- endif -%> + <% endif %> death_rate_description: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Estimated number of deaths from << cause.lower() >> in {definitions.sex}, per 100,000 people. - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Estimated number of age-standardized deaths from << cause.lower() >> in {definitions.sex}, per 100,000 people. - <%- else -%> + <% else %> Estimated number of deaths from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people. - <%- endif -%> + <% endif %> dalys_title: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> DALYs from << cause.lower() >> among {definitions.sex} - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Age-standardized DALYs from << cause.lower() >> among {definitions.sex} - <%- else -%> + <% else %> DALYs from << cause.lower() >> among {definitions.sex} aged {definitions.age} - <%- endif -%> + <% endif %> dalys_description: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}. - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Estimated number of age-standardized [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}. - <%- else -%> + <% else %> Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> among {definitions.sex} aged {definitions.age}. - <%- endif -%> + <% endif %> dalys_rate_title: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> DALYs from << cause.lower() >>, among {definitions.sex} per 100,000 people - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Age-standardized DALYs from << cause.lower() >> among {definitions.sex}, per 100,000 people - <%- else -%> + <% else %> DALYs from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people - <%- endif -%> + <% endif %> dalys_rate_description: |- - <%- if age_group == "ALLAges" -%> + <% if age_group == "ALLAges" %> Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}, per 100,000 people. - <%- elif age_group == "Age-standardized" -%> + <% elif age_group == "Age-standardized" %> Estimated number of age-standardized [DALYs](#dod:dalys) from << cause.lower() >> in {definitions.sex}, per 100,000 people. - <%- else -%> + <% else %> Estimated number of [DALYs](#dod:dalys) from << cause.lower() >> among {definitions.sex} aged {definitions.age}, per 100,000 people. - <%- endif -%> + <% endif %> footnote: |- - <%- if age == "Age-standardized" -%>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<%- endif -%> + <% if age == "Age-standardized" %>To allow for comparisons between countries and over time, this metric is [age-standardized](#dod:age_standardized).<% endif %> tables: ghe: variables: