Skip to content

Commit

Permalink
Merge branch 'master' into data/vdem-2
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Apr 18, 2024
2 parents 284c2d3 + 0ac820e commit de8deea
Show file tree
Hide file tree
Showing 37 changed files with 2,810 additions and 13 deletions.
47 changes: 42 additions & 5 deletions apps/staging_sync/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import datetime as dt
import re
from pathlib import Path
from typing import Any, Dict, Optional, Set
from typing import Any, Dict, Literal, Optional, Set

import click
import pandas as pd
Expand Down Expand Up @@ -53,6 +53,24 @@
help="""Staging server UTC creation date. It is used to warn about charts that have been
updated in production. Default is branch creation date.""",
)
@click.option(
"--include",
default=None,
type=str,
help="""Include only charts with variables whose catalogPath matches the provided string.""",
)
@click.option(
"--exclude",
default=None,
type=str,
help="""Exclude charts with variables whose catalogPath matches the provided string.""",
)
@click.option(
"--errors",
default="raise",
type=click.Choice(["raise", "warn"]),
help="""How to handle errors when syncing charts. 'warn' will skip the chart and continue.""",
)
@click.option(
"--dry-run/--no-dry-run",
default=False,
Expand All @@ -66,6 +84,9 @@ def cli(
publish: bool,
approve_revisions: bool,
staging_created_at: Optional[dt.datetime],
include: Optional[str],
exclude: Optional[str],
errors: Literal["warn", "raise"],
dry_run: bool,
) -> None:
"""Sync Grapher charts and revisions from an environment to the main environment.
Expand Down Expand Up @@ -145,12 +166,28 @@ def cli(
_remove_nonexisting_column_slug(source_chart, source_session)

try:
target_chart = source_chart.migrate_to_db(source_session, target_session)
target_chart = source_chart.migrate_to_db(
source_session, target_session, include=include, exclude=exclude
)
except ValueError as e:
if "variables.catalogPath not found in target" in str(e):
raise ValueError("ETL deploy hasn't finished yet. Check the repository.") from e
if errors == "warn":
log.warning("staging_sync.error", chart_id=chart_id, error=str(e))
continue
else:
raise e
if "variables.catalogPath not found in target" in str(e):
raise ValueError("ETL deploy hasn't finished yet. Check the repository.") from e
else:
raise e

# exclude charts with variables whose catalogPath matches the provided string
if target_chart is None:
log.info(
"staging_sync.skip",
slug=source_chart.config["slug"],
reason="filtered by --include/--exclude",
chart_id=chart_id,
)
continue

# try getting chart with the same slug
try:
Expand Down
1 change: 1 addition & 0 deletions apps/wizard/pages/charts/variable_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Concerns the second stage of wizard charts, when the variable mapping is constructed."""

from typing import Any, Dict, List

import pandas as pd
Expand Down
9 changes: 9 additions & 0 deletions dag/archive/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ steps:
data://grapher/wvs/2023-03-08/wvs_trust:
- data://garden/wvs/2023-03-08/wvs_trust

# UNDP HDR
data://meadow/un/2022-11-29/undp_hdr:
- snapshot://un/2022-11-29/undp_hdr.csv
- snapshot://un/2022-11-29/undp_hdr.xlsx
data://garden/un/2022-11-29/undp_hdr:
- data://meadow/un/2022-11-29/undp_hdr
data://grapher/un/2022-11-29/undp_hdr:
- data://garden/un/2022-11-29/undp_hdr

# Include all active steps plus all archive steps.
include:
- dag/main.yml
Expand Down
36 changes: 36 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,39 @@ steps:
- data://garden/demography/2023-03-31/population
data://grapher/health/2024-04-02/organ_donation_and_transplantation:
- data://garden/health/2024-04-02/organ_donation_and_transplantation

# Polio AFP surveillance
data://meadow/who/2024-04-08/polio_afp:
- snapshot://who/2024-04-08/polio_afp.csv

# Polio historical data
data://meadow/who/2024-04-09/polio_historical:
- snapshot://who/2024-04-09/polio_historical.xls
data://garden/who/2024-04-09/polio_historical:
- data://meadow/who/2024-04-09/polio_historical

# Combining polio datasets
data://garden/who/2024-04-08/polio:
- data://meadow/who/2024-04-08/polio_afp
- data://meadow/who/2024-04-09/polio_historical
- data://garden/wb/2023-04-30/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/demography/2023-03-31/population
- snapshot://fasttrack/latest/gpei.csv
- snapshot://health/2024-04-12/polio_status.csv
data://grapher/who/2024-04-08/polio:
- data://garden/who/2024-04-08/polio

# Polio certification status
data://meadow/health/2024-04-12/polio_status:
- snapshot://health/2024-04-12/polio_status.csv

# Polio free countries
data://meadow/health/2024-04-12/polio_free_countries:
- snapshot://health/2024-04-12/polio_free_countries.csv
data://garden/health/2024-04-12/polio_free_countries:
- data://meadow/health/2024-04-12/polio_status
- data://meadow/health/2024-04-12/polio_free_countries
- data://garden/regions/2023-01-01/regions
data://grapher/health/2024-04-12/polio_free_countries:
- data://garden/health/2024-04-12/polio_free_countries
13 changes: 6 additions & 7 deletions dag/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,12 @@ steps:
- data://meadow/hmd/2022-12-07/life_tables

# UNDP
data://meadow/un/2022-11-29/undp_hdr:
- snapshot://un/2022-11-29/undp_hdr.csv
- snapshot://un/2022-11-29/undp_hdr.xlsx
data://garden/un/2022-11-29/undp_hdr:
- data://meadow/un/2022-11-29/undp_hdr
data://grapher/un/2022-11-29/undp_hdr:
- data://garden/un/2022-11-29/undp_hdr
data://meadow/un/2024-04-09/undp_hdr:
- snapshot://un/2024-04-09/undp_hdr.csv
data://garden/un/2024-04-09/undp_hdr:
- data://meadow/un/2024-04-09/undp_hdr
data://grapher/un/2024-04-09/undp_hdr:
- data://garden/un/2024-04-09/undp_hdr

#
# EM-DAT Natural disasters (2023).
Expand Down
24 changes: 23 additions & 1 deletion etl/grapher_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
It has been slightly modified since then.
"""
import json
import re
from datetime import date, datetime
from pathlib import Path
from typing import Annotated, Any, Dict, List, Literal, Optional, TypedDict, Union, get_args
Expand Down Expand Up @@ -329,7 +330,13 @@ def load_chart_variables(self, session: Session) -> Dict[int, "Variable"]:

return variables

def migrate_to_db(self, source_session: Session, target_session: Session) -> "Chart":
def migrate_to_db(
self,
source_session: Session,
target_session: Session,
include: Optional[str] = None,
exclude: Optional[str] = None,
) -> Optional["Chart"]:
"""Remap variable ids from source to target session. Variable in source is uniquely identified
by its catalogPath if available, or by name and datasetId otherwise. It is looked up
by this identifier in the target session to get the new variable id.
Expand All @@ -340,6 +347,21 @@ def migrate_to_db(self, source_session: Session, target_session: Session) -> "Ch
assert self.id, "Chart must come from a database"
source_variables = self.load_chart_variables(source_session)

# if chart contains a variable that is excluded, skip the whole chart
if exclude:
for source_var in source_variables.values():
if source_var.catalogPath and re.search(exclude, source_var.catalogPath):
return None

# a chart must contain at least one variable matching include, otherwise skip it
if include:
matching = False
for source_var in source_variables.values():
if source_var.catalogPath and re.search(include, source_var.catalogPath):
matching = True
if not matching:
return None

remap_ids = {}
for source_var_id, source_var in source_variables.items():
if source_var.catalogPath:
Expand Down
Loading

0 comments on commit de8deea

Please sign in to comment.