Skip to content

Commit

Permalink
🎉 Update long_to_wide functionality and metadata handling in grapher …
Browse files Browse the repository at this point in the history
…steps
  • Loading branch information
Marigold committed Dec 4, 2024
1 parent efb3477 commit 6339162
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 6 deletions.
2 changes: 1 addition & 1 deletion etl/grapher_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def long_to_wide(long_tb: catalog.Table) -> catalog.Table:
short_names.append(short_name)

# Create metadata for the column from dimensions
metadatas.append(_metadata_for_dimensions(long_tb[dims[0]].metadata, dim_dict, column))
metadatas.append(_metadata_for_dimensions(long_tb[dims[0]].metadata.copy(), dim_dict, column))

# Set column names to new short names and use proper metadata
wide_tb.columns = short_names
Expand Down
3 changes: 3 additions & 0 deletions etl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,9 @@ def create_dataset(

# Expand long to wide
if long_to_wide:
if ds.metadata.channel != "grapher":
log.warning("long_to_wide=True should be only used in grapher channel")

dim_names = set(table.index.names) - {"country", "year", "date"}
if dim_names:
# First pass to update metadata from YAML
Expand Down
6 changes: 3 additions & 3 deletions etl/steps/data/garden/cancer/2024-08-30/gco_alcohol.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ def run(dest_dir: str) -> None:
ds_meadow = paths.load_dataset("gco_alcohol")

# Read table from meadow dataset.
tb = ds_meadow.read("gco_alcohol")
tb = ds_meadow["gco_alcohol"].reset_index()

#
# Process data.
#
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path)

# To display on grapher we need to replace "<0.1" with "0.05" and set the decimal places to 1 so that it shows up as <0.1 on the chart
# To display on grapher we need to replace "<0.1" with "0.05" and set the decimal places to 1 so that it shows up as <0.1 on the chart.
tb["value"] = tb["value"].replace("<0.1", "0.05")

tb = tb.format(["country", "year", "sex", "cancer", "indicator"])
Expand All @@ -31,7 +31,7 @@ def run(dest_dir: str) -> None:
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], long_to_wide=True, check_variables_metadata=True, default_metadata=ds_meadow.metadata
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
Expand Down
4 changes: 2 additions & 2 deletions etl/steps/data/grapher/cancer/2024-08-30/gco_alcohol.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def run(dest_dir: str) -> None:
#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
# Create a new grapher dataset with the same metadata as the garden dataset
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
dest_dir, tables=[tb], long_to_wide=True, check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
Expand Down

0 comments on commit 6339162

Please sign in to comment.