diff --git a/docs/architecture/metadata/structuring-yaml.md b/docs/architecture/metadata/structuring-yaml.md index bb8a16b2862..f0c4091c5da 100644 --- a/docs/architecture/metadata/structuring-yaml.md +++ b/docs/architecture/metadata/structuring-yaml.md @@ -233,4 +233,41 @@ tables: {definitions.conflict_type_estimate} ``` -Be cautious with line breaks and trailing whitespace when utilizing templates. Despite using good defaults, you might end up experimenting a lot to get the desired result. +Line breaks and whitespaces can be tricky when using Jinja templates. We use reasonable defaults and strip whitespaces, so in most cases you should be fine with using `<%` and `%>`, but in more complex cases, you might have to experiment with +more fine grained [whitespace control](https://jinja.palletsprojects.com/en/stable/templates/#whitespace-control) using tags `<%-` and `-%>`. This is most often used in if-else blocks like this + +```yaml +age: |- + <% if age_group == "ALLAges" %> + ... + <%- elif age_group == "Age-standardized" %> + ... + <%- else %> + ... + <%- endif %> +``` + +The most straightforward way to check your metadata is in Admin, although that means waiting for your step to finish. There's a faster way to check your YAML file directly. Create a `playground.ipynb` notebook in the same folder as your YAML file and copy this to the first cell: + +```python +from etl import grapher_helpers as gh +dim_dict = { + "age_group": "YEARS0-4", "sex": "Male", "cause": "Drug use disorders" +} +d = gh.render_yaml_file("ghe.meta.yml", dim_dict=dim_dict) +d["tables"]["ghe"]["variables"]["death_count"] +``` + +An alternative is examining `VariableMeta` + +```python +from etl import grapher_helpers as gh +from etl import paths + +tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe'] + +# Sample a random row to get the dimension values +dim_dict = dict(zip(tb.index.names, tb.sample(1).index[0])) + +gh.render_variable_meta(tb.death_count.m, dim_dict=dim_dict) +``` diff --git a/etl/grapher_helpers.py b/etl/grapher_helpers.py index 67bf0df7bd3..f755de6493a 100644 --- a/etl/grapher_helpers.py +++ b/etl/grapher_helpers.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field, is_dataclass from functools import lru_cache from pathlib import Path -from typing import Any, Dict, Iterable, List, Literal, Optional, Set, cast +from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Union, cast import jinja2 import numpy as np @@ -10,10 +10,9 @@ import pymysql import sqlalchemy import structlog -from jinja2 import Environment from owid import catalog from owid.catalog import warnings -from owid.catalog.utils import underscore +from owid.catalog.utils import dynamic_yaml_load, dynamic_yaml_to_dict, underscore from sqlalchemy import text from sqlalchemy.engine import Engine from sqlalchemy.orm import Session @@ -23,7 +22,7 @@ log = structlog.get_logger() -jinja_env = Environment( +jinja_env = jinja2.Environment( block_start_string="<%", block_end_string="%>", variable_start_string="<<", @@ -32,8 +31,17 @@ comment_end_string="#>", trim_blocks=True, lstrip_blocks=True, + undefined=jinja2.StrictUndefined, ) + +# Helper function to raise an error with << raise("uh oh...") >> +def raise_helper(msg): + raise Exception(msg) + + +jinja_env.globals["raise"] = raise_helper + # this might work too pd.api.types.is_integer_dtype(col) INT_TYPES = tuple( {f"{n}{b}{p}" for n in ("int", "Int", "uint", "UInt") for b in ("8", "16", "32", "64") for p in ("", "[pyarrow]")} @@ -209,14 +217,18 @@ def _expand_jinja_text(text: str, dim_dict: Dict[str, str]) -> str: return text try: - return _cached_jinja_template(text).render(dim_dict) + # NOTE: we're stripping the result to avoid trailing newlines + return _cached_jinja_template(text).render(dim_dict).strip() except jinja2.exceptions.TemplateSyntaxError as e: new_message = f"{e.message}\n\nDimensions:\n{dim_dict}\n\nTemplate:\n{text}\n" raise e.__class__(new_message, e.lineno, e.name, e.filename) from e + except jinja2.exceptions.UndefinedError as e: + new_message = f"{e.message}\n\nDimensions:\n{dim_dict}\n\nTemplate:\n{text}\n" + raise e.__class__(new_message) from e def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any: - """Expand Jinja in all metadata fields.""" + """Expand Jinja in all metadata fields. This modifies the original object in place.""" if obj is None: return None elif isinstance(obj, str): @@ -233,6 +245,33 @@ def _expand_jinja(obj: Any, dim_dict: Dict[str, str]) -> Any: return obj +def render_yaml_file(path: Union[str, Path], dim_dict: Dict[str, str]) -> Dict[str, Any]: + """Load YAML file and render Jinja in all fields. Return a dictionary. + + Usage: + from etl import grapher_helpers as gh + from etl import paths + + tb = Dataset(paths.DATA_DIR / "garden/who/2024-07-30/ghe")['ghe'] + gh.render_variable_meta(tb.my_col.m, dim_dict={"sex": "male"}) + """ + meta = dynamic_yaml_to_dict(dynamic_yaml_load(path)) + return _expand_jinja(meta, dim_dict) + + +def render_variable_meta(meta: catalog.VariableMeta, dim_dict: Dict[str, str]) -> catalog.VariableMeta: + """Render Jinja in all fields of VariableMeta. Return a new VariableMeta object. + + Usage: + # Create a playground.ipynb next to YAML file and run this in notebook + from etl import grapher_helpers as gh + m = gh.render_yaml_file("ghe.meta.yml", dim_dict={"sex": "male"}) + m['tables']['ghe']['variables']['death_count'] + """ + # TODO: move this as a method to VariableMeta class + return _expand_jinja(meta.copy(), dim_dict) + + def _title_column_and_dimensions(title: str, dim_dict: Dict[str, Any]) -> str: """Create new title from column title and dimensions. For instance `Deaths`, ["age", "sex"], ["10-18", "male"] will be converted into diff --git a/etl/steps/data/garden/covid/latest/sequence.meta.yml b/etl/steps/data/garden/covid/latest/sequence.meta.yml index 1f79931b216..acebff8b8e4 100644 --- a/etl/steps/data/garden/covid/latest/sequence.meta.yml +++ b/etl/steps/data/garden/covid/latest/sequence.meta.yml @@ -21,13 +21,13 @@ tables: num_sequences: title: "Number of sequenced COVID-19 genomes - Variant: << variant >>" description_short: |- - <% if variant == 'non_who' %> - The number of analyzed sequences in the preceding two weeks that correspond to non-relevant variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. - <% elif variant == 'other' %> - The number of analyzed sequences in the preceding two weeks that correspond to non-categorised variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. - <% else %> - The number of analyzed sequences in the preceding two weeks that correspond to variant group '<< variant >>'. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. - <%- endif -%> + <% set mapping = dict( + non_who="The number of analyzed sequences in the preceding two weeks that correspond to non-relevant variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced.", + other="The number of analyzed sequences in the preceding two weeks that correspond to non-categorised variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced.", + else="The number of analyzed sequences in the preceding two weeks that correspond to variant group '<< variant >>'. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced." + ) %> + + << mapping.get(variant, mapping['else']) >> unit: "sequenced genomes" display: tolerance: 28 diff --git a/etl/steps/data/garden/covid/latest/yougov.meta.yml b/etl/steps/data/garden/covid/latest/yougov.meta.yml index d4ac7be9e20..03e09284fd1 100644 --- a/etl/steps/data/garden/covid/latest/yougov.meta.yml +++ b/etl/steps/data/garden/covid/latest/yougov.meta.yml @@ -17,111 +17,65 @@ definitions: part1: |- Have you had the first or second doses of a Coronavirus (COVID-19) vaccine? question_mapper: |- - <%- if (question == 'activities_improve_health') -%> - {definitions.questions_templates.standard.part1} 'I feel it is important to carry out activities which will improve my health' - <%- elif (question == 'avoided_crowded_areas') -%> - {definitions.questions_templates.i12.part1} 'Avoided crowded areas' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_going_out') -%> - {definitions.questions_templates.i12.part1} 'Avoided going out in general' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_guests_at_home') -%> - {definitions.questions_templates.i12.part1} 'Avoided having guests to your home' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_healthcare_settings') -%> - {definitions.questions_templates.i12.part1} 'Avoided going to hospital or other healthcare settings' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_large_gatherings') -%> - {definitions.questions_templates.i12.part1} 'Avoided large-sized social gatherings (more than 10 people)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_medium_gatherings') -%> - {definitions.questions_templates.i12.part1} 'Avoided medium-sized social gatherings (between 3 and 10 people)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_objects_public') -%> - {definitions.questions_templates.i12.part1} 'Avoided touching objects in public (e.g. elevator buttons or doors)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_people_with_symptoms') -%> - {definitions.questions_templates.i12.part1} 'Avoided contact with people who have symptoms or you think may have been exposed to the coronavirus' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_public_events') -%> - {definitions.questions_templates.i12.part1} 'Avoided attending public events, such as sports matches, festivals, theatres, clubs, or going to religious services' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_public_transport') -%> - {definitions.questions_templates.i12.part1} 'Avoided taking public transport' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_shops') -%> - {definitions.questions_templates.i12.part1} 'Avoided going to shops' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_small_gatherings') -%> - {definitions.questions_templates.i12.part1} 'Avoided small social gatherings (not more than 2 people)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'avoided_working_outside_home') -%> - {definitions.questions_templates.i12.part1} 'Avoided working outside your home' {definitions.questions_templates.i12.part2} - <%- elif (question == 'children_avoided_school') -%> - {definitions.questions_templates.i12.part1} 'Avoided letting your children go to school/university' {definitions.questions_templates.i12.part2} - <%- elif (question == 'cleaned_surfaces_home') -%> - {definitions.questions_templates.i12.part1} 'Cleaned frequently touched surfaces in the home (e.g. doorknobs, toilets, taps)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'covered_mouth_sneeze') -%> - {definitions.questions_templates.i12.part1} 'Covered your nose and mouth when sneezing or coughing' {definitions.questions_templates.i12.part2} + <% set standard = "{definitions.questions_templates.standard.part1}" %> + <% set i12_part1 = "{definitions.questions_templates.i12.part1}" %> + <% set i12_part2 = "{definitions.questions_templates.i12.part2}" %> - <%- elif (question == 'covid_dangerous_to_me') -%> - {definitions.questions_templates.standard.part1} 'Coronavirus (COVID-19) is very dangerous for me' - <%- elif (question == 'covid_vaccine_important_health') -%> - How important do you think getting a COVID-19 vaccine is for your health? - <%- elif (question == 'covid_vaccine_received_one_or_two_doses') -%> - Have you had the first or second doses of a Coronavirus (COVID-19) vaccine? + <% set question_mapper = dict( + activities_improve_health=standard ~ " 'I feel it is important to carry out activities which will improve my health'", + avoided_crowded_areas=i12_part1 ~ " 'Avoided crowded areas' " ~ i12_part2, + avoided_going_out=i12_part1 ~ " 'Avoided going out in general' " ~ i12_part2, + avoided_guests_at_home=i12_part1 ~ " 'Avoided having guests to your home' " ~ i12_part2, + avoided_healthcare_settings=i12_part1 ~ " 'Avoided going to hospital or other healthcare settings' " ~ i12_part2, + avoided_large_gatherings=i12_part1 ~ " 'Avoided large-sized social gatherings (more than 10 people)' " ~ i12_part2, + avoided_medium_gatherings=i12_part1 ~ " 'Avoided medium-sized social gatherings (between 3 and 10 people)' " ~ i12_part2, + avoided_objects_public=i12_part1 ~ " 'Avoided touching objects in public (e.g. elevator buttons or doors)' " ~ i12_part2, + avoided_people_with_symptoms=i12_part1 ~ " 'Avoided contact with people who have symptoms or you think may have been exposed to the coronavirus' " ~ i12_part2, + avoided_public_events=i12_part1 ~ " 'Avoided attending public events, such as sports matches, festivals, theatres, clubs, or going to religious services' " ~ i12_part2, + avoided_public_transport=i12_part1 ~ " 'Avoided taking public transport' " ~ i12_part2, + avoided_shops=i12_part1 ~ " 'Avoided going to shops' " ~ i12_part2, + avoided_small_gatherings=i12_part1 ~ " 'Avoided small social gatherings (not more than 2 people)' " ~ i12_part2, + avoided_working_outside_home=i12_part1 ~ " 'Avoided working outside your home' " ~ i12_part2, + children_avoided_school=i12_part1 ~ " 'Avoided letting your children go to school/university' " ~ i12_part2, + cleaned_surfaces_home=i12_part1 ~ " 'Cleaned frequently touched surfaces in the home (e.g. doorknobs, toilets, taps)' " ~ i12_part2, + covered_mouth_sneeze=i12_part1 ~ " 'Covered your nose and mouth when sneezing or coughing' " ~ i12_part2, + covid_dangerous_to_me=standard ~ " 'Coronavirus (COVID-19) is very dangerous for me'", + covid_vaccine_important_health="How important do you think getting a COVID-19 vaccine is for your health?", + covid_vaccine_received_one_or_two_doses="Have you had the first or second doses of a Coronavirus (COVID-19) vaccine?", + covid_vaccine_will_prevent_transmission=standard ~ " 'A vaccine will completely prevent those who receive it from transmitting COVID19 to others'", + covid_vaccine_will_protect_health=standard ~ " 'A vaccine will completely protect those who receive it from possible health effects of COVID19'", + difficult_to_isolate="If you were advised to do so by a healthcare professional or public health authority, how easy or difficult would it be for you to self-isolate for 7 days?", + eaten_separately=i12_part1 ~ " 'Eaten separately at home, when normally you would eat a meal with others' " ~ i12_part2, + govt_will_provide_effective_covid_vaccine=standard ~ " 'I believe government health authorities in my country will provide me with an effective COVID19 vaccine'", + hand_sanitiser=i12_part1 ~ " 'Used hand sanitiser' " ~ i12_part2, + handwashing_yesterday="Thinking about yesterday… about how many times, would you say you washed your hands with soap or used hand sanitiser?", + household_members_contact="About how many people from your household have you come into physical contact with (within 2 meters / 6 feet)?", + life_greatly_affected=standard ~ " 'My life has been greatly affected by coronavirus (COVID-19)'", + likely_get_covid_future=standard ~ " 'It is likely that I will get coronavirus (COVID-19) in the future'", + mask_at_home=i12_part1 ~ " 'Worn a face mask inside your home' " ~ i12_part2, + mask_at_work=i12_part1 ~ " 'Worn a face mask at your place of work' " ~ i12_part2, + mask_clothing_store=i12_part1 ~ " 'Worn a face mask inside a clothing / footwear shop' " ~ i12_part2, + mask_grocery_store=i12_part1 ~ " 'Worn a face mask inside a grocery store / supermarket' " ~ i12_part2, + mask_not_possible=standard ~ " 'Wearing a mask to protect me against coronavirus (COVID-19) is not possible for me'", + mask_outside_home=i12_part1 ~ " 'Worn a face mask outside your home (e.g. when on public transport, going to a supermarket, going to a main road)' " ~ i12_part2, + mask_protect_me=standard ~ " 'Wearing a mask will protect me against coronavirus (COVID-19)'", + mask_protect_others=standard ~ " 'Wearing a mask will protect others against coronavirus (COVID-19)'", + mask_public_transport=i12_part1 ~ " 'Worn a face mask on public transportation' " ~ i12_part2, + people_contact_outside_household="Not including those people in your household, about how many people have you come into physical contact with (within 2 meters / 6 feet)?", + slept_separate_bedrooms=i12_part1 ~ " 'Slept in separate bedrooms at home, when normally you would share a bedroom' " ~ i12_part2, + times_left_home_yesterday="How many times did you leave your home yesterday? If you are not staying at home, how many times did you leave where you are staying yesterday?", + trust_covid_vaccines="How much do you trust COVID-19 vaccines?", + uncertain_covid_vaccinate_this_week=standard ~ " 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (neutral)", + unwillingness_covid_vaccinate_this_week=standard ~ " 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (disagreement)", + washed_hands=i12_part1 ~ " 'Washed hands with soap and water' " ~ i12_part2, + willingness_covid_vaccinate_this_week=standard ~ " 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (agreement)", + willingness_isolate_if_advised=i12_part1 ~ " 'Washed hands with soap and water' " ~ i12_part2, + willingness_isolate_if_symptoms="Thinking about the next 7 days… would you isolate yourself after feeling unwell or having any of the following new symptoms: a dry cough, fever, loss of sense of smell, loss of sense of taste, shortness of breath or difficulty breathing?", + worried_covid_vaccine_side_effects=standard ~ " 'I am worried about potential side effects of a COVID19 vaccine'" + ) %> - <%- elif (question == 'covid_vaccine_will_prevent_transmission') -%> - {definitions.questions_templates.standard.part1} 'A vaccine will completely prevent those who receive it from transmitting COVID19 to others' - <%- elif (question == 'covid_vaccine_will_protect_health') -%> - {definitions.questions_templates.standard.part1} 'A vaccine will completely protect those who receive it from possible health effects of COVID19' - <%- elif (question == 'difficult_to_isolate') -%> - If you were advised to do so by a healthcare professional or public health authority, how easy or difficult would it be for you be to self-isolate for 7 days? + << question_mapper[question] >> - <%- elif (question == 'eaten_separately') -%> - {definitions.questions_templates.i12.part1} 'Eaten separately at home, when normally you would eat a meal with others' {definitions.questions_templates.i12.part2} - - <%- elif (question == 'govt_will_provide_effective_covid_vaccine') -%> - {definitions.questions_templates.standard.part1} 'I believe government health authorities in my country will provide me with an effective COVID19 vaccine' - <%- elif (question == 'hand_sanitiser') -%> - {definitions.questions_templates.i12.part1} 'Used hand sanitiser' {definitions.questions_templates.i12.part2} - <%- elif (question == 'handwashing_yesterday') -%> - Thinking about yesterday… about how many times, would you say you washed your hands with soap or used hand sanitiser? - <%- elif (question == 'household_members_contact') -%> - About how many people from your household have you come into physical contact with (within 2 meters / 6 feet)? - <%- elif (question == 'life_greatly_affected') -%> - {definitions.questions_templates.standard.part1} 'My life has been greatly affected by coronavirus (COVID-19)' - <%- elif (question == 'likely_get_covid_future') -%> - {definitions.questions_templates.standard.part1} 'It is likely that I will get coronavirus (COVID-19) in the future' - <%- elif (question == 'mask_at_home') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask inside your home' {definitions.questions_templates.i12.part2} - <%- elif (question == 'mask_at_work') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask at your place of work' {definitions.questions_templates.i12.part2} - <%- elif (question == 'mask_clothing_store') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask inside a clothing / footwear shop' {definitions.questions_templates.i12.part2} - <%- elif (question == 'mask_grocery_store') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask inside a grocery store / supermarket' {definitions.questions_templates.i12.part2} - <%- elif (question == 'mask_not_possible') -%> - {definitions.questions_templates.standard.part1} 'Wearing a mask to protect me against coronavirus (COVID-19) is not possible for me' - <%- elif (question == 'mask_outside_home') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask outside your home (e.g. when on public transport, going to a supermarket, going to a main road)' {definitions.questions_templates.i12.part2} - <%- elif (question == 'mask_protect_me') -%> - {definitions.questions_templates.standard.part1} 'Wearing a mask will protect me against coronavirus (COVID-19)' - <%- elif (question == 'mask_protect_others') -%> - {definitions.questions_templates.standard.part1} 'Wearing a mask will protect others against coronavirus (COVID-19)' - <%- elif (question == 'mask_public_transport') -%> - {definitions.questions_templates.i12.part1} 'Worn a face mask on public transportation' {definitions.questions_templates.i12.part2} - <%- elif (question == 'people_contact_outside_household') -%> - Not including those people in your household, about how many people have you come into physical contact with (within 2 meters / 6 feet)? - <%- elif (question == 'slept_separate_bedrooms') -%> - {definitions.questions_templates.i12.part1} 'Slept in separate bedrooms at home, when normally you would share a bedroom' {definitions.questions_templates.i12.part2} - <%- elif (question == 'times_left_home_yesterday') -%> - How many times did you leave your home yesterday? If you are not staying at home, how many times did you leave where you are staying yesterday? - <%- elif (question == 'trust_covid_vaccines') -%> - How much do you trust COVID-19 vaccines? - <%- elif (question == 'uncertain_covid_vaccinate_this_week') -%> - {definitions.questions_templates.standard.part1} 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (neutral) - <%- elif (question == 'unwillingness_covid_vaccinate_this_week') -%> - {definitions.questions_templates.standard.part1} 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (disagreement) - <%- elif (question == 'washed_hands') -%> - <%- elif (question == 'willingness_covid_vaccinate_this_week') -%> - {definitions.questions_templates.standard.part1} 'If a Covid-19 vaccine were made available to me this week, I would definitely get it' (agreement) - - <%- elif (question == 'willingness_isolate_if_advised') -%> - {definitions.questions_templates.i12.part1} 'Washed hands with soap and water' {definitions.questions_templates.i12.part2} - <%- elif (question == 'willingness_isolate_if_symptoms') -%> - Thinking about the next 7 days… would you isolate yourself after feeling unwell or having any of the following new symptoms: a dry cough, fever, loss of sense of smell, loss of sense of taste, shortness of breath or difficulty breathing? - <%- elif (question == 'worried_covid_vaccine_side_effects') -%> - {definitions.questions_templates.standard.part1} 'I am worried about potential side effects of a COVID19 vaccine' - <%- endif -%> # Learn more about the available fields: # http://docs.owid.io/projects/etl/architecture/metadata/reference/