Skip to content

Commit

Permalink
Merge branch 'master' into data-mean-paternal-age
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Dec 11, 2024
2 parents 2f8b202 + b8d4a0c commit ec5c7ed
Show file tree
Hide file tree
Showing 87 changed files with 2,910 additions and 246 deletions.
6 changes: 0 additions & 6 deletions .flake8

This file was deleted.

6 changes: 0 additions & 6 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,11 @@
"**/docs/architecture/*.md"
],
"files.exclude": {
"etl/steps/archive": true,
"snapshots/archive": true,
"**/dataset_*_config.json": true,
"**/dataset_*_values.json": true,
"**/dataset_*.json.dvc": true,
"**/dataset_*.feather.dvc": true
},
"search.exclude": {
"etl/steps/archive": true,
"snapshots/archive": true
},
"yaml.format.printWidth": 999,
"ruff.path": [
".venv/bin/ruff"
Expand Down
16 changes: 16 additions & 0 deletions apps/wizard/app_pages/chart_diff/chart_diff.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime as dt
import difflib
import json
import pprint
from typing import Any, Dict, List, Optional

Expand Down Expand Up @@ -653,6 +654,7 @@ def _modified_chart_configs_on_staging(
select
c.id as chartId,
MD5(cc.full) as chartChecksum,
cc.full as chartConfig,
c.lastEditedByUserId as chartLastEditedByUserId,
c.publishedByUserId as chartPublishedByUserId,
c.lastEditedAt as chartLastEditedAt
Expand Down Expand Up @@ -699,6 +701,20 @@ def _modified_chart_configs_on_staging(
diff = source_df.copy()
diff["configEdited"] = source_df["chartChecksum"] != target_df["chartChecksum"]

# Go through edited configs and do a more detailed comparison
ix = diff["configEdited"] & target_df["chartChecksum"].notnull()
equal_configs = []
for chart_id, row in diff.loc[ix].iterrows():
source_config = json.loads(row["chartConfig"])
target_config = json.loads(target_df.loc[chart_id, "chartConfig"])

# Compare configs
if configs_are_equal(source_config, target_config):
equal_configs.append(chart_id)

# Exclude configs that have different chartChecksum, but are actually the same (e.g. have just different version)
diff = diff[~diff.index.isin(equal_configs)]

# Add flag 'edited in staging'
diff["chartEditedInStaging"] = True

Expand Down
48 changes: 48 additions & 0 deletions dag/archive/artificial_intelligence.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
steps:
##############################################################################################################
# EPOCH archive (monthly updates)

# Artificial Intelligence (EPOCH)
data://meadow/artificial_intelligence/latest/epoch:
- snapshot://artificial_intelligence/latest/epoch.csv
Expand Down Expand Up @@ -236,6 +237,53 @@ steps:
data://grapher/artificial_intelligence/2024-10-01/epoch_compute_intensive_domain:
- data://garden/artificial_intelligence/2024-10-01/epoch_compute_intensive_domain

# Main EPOCH dataset
data://meadow/artificial_intelligence/2024-11-03/epoch:
- snapshot://artificial_intelligence/2024-11-03/epoch.csv
data://garden/artificial_intelligence/2024-11-03/epoch:
- data://meadow/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch:
- data://garden/artificial_intelligence/2024-11-03/epoch

# Main EPOCH dataset regression lines
data://garden/artificial_intelligence/2024-11-03/epoch_regressions:
- data://garden/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_regressions:
- data://garden/artificial_intelligence/2024-11-03/epoch_regressions

# EPOCH aggregates by domain
data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_domain:
- data://meadow/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_aggregates_domain:
- data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_domain

# EPOCH aggregates by researcher affiliaiton
data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation

# EPOCH dataset on Compute Intensive AI
data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive:
- snapshot://artificial_intelligence/2024-11-03/epoch_compute_intensive.csv
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive


# EPOCH dataset on Compute Intensive AI, aggregates by country
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries:
- data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries

# EPOCH dataset on Compute Intensive AI, aggregates by domain
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain:
- data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain



##############################################################################################################

# AI Incidents
Expand Down
60 changes: 30 additions & 30 deletions dag/artificial_intelligence.yml
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
steps:
########### UPDATED MONTHLY #############################################################################
# Main EPOCH dataset
data://meadow/artificial_intelligence/2024-11-03/epoch:
- snapshot://artificial_intelligence/2024-11-03/epoch.csv
data://garden/artificial_intelligence/2024-11-03/epoch:
- data://meadow/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch:
- data://garden/artificial_intelligence/2024-11-03/epoch
data://meadow/artificial_intelligence/2024-12-05/epoch:
- snapshot://artificial_intelligence/2024-12-05/epoch.csv
data://garden/artificial_intelligence/2024-12-05/epoch:
- data://meadow/artificial_intelligence/2024-12-05/epoch
data://grapher/artificial_intelligence/2024-12-05/epoch:
- data://garden/artificial_intelligence/2024-12-05/epoch

# Main EPOCH dataset regression lines
data://garden/artificial_intelligence/2024-11-03/epoch_regressions:
- data://garden/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_regressions:
- data://garden/artificial_intelligence/2024-11-03/epoch_regressions
data://garden/artificial_intelligence/2024-12-05/epoch_regressions:
- data://garden/artificial_intelligence/2024-12-05/epoch
data://grapher/artificial_intelligence/2024-12-05/epoch_regressions:
- data://garden/artificial_intelligence/2024-12-05/epoch_regressions

# EPOCH aggregates by domain
data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_domain:
- data://meadow/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_aggregates_domain:
- data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_domain
data://garden/artificial_intelligence/2024-12-05/epoch_aggregates_domain:
- data://meadow/artificial_intelligence/2024-12-05/epoch
data://grapher/artificial_intelligence/2024-12-05/epoch_aggregates_domain:
- data://garden/artificial_intelligence/2024-12-05/epoch_aggregates_domain

# EPOCH aggregates by researcher affiliaiton
data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-11-03/epoch
data://grapher/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-11-03/epoch_aggregates_affiliation
data://garden/artificial_intelligence/2024-12-05/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-12-05/epoch
data://grapher/artificial_intelligence/2024-12-05/epoch_aggregates_affiliation:
- data://garden/artificial_intelligence/2024-12-05/epoch_aggregates_affiliation

# EPOCH dataset on Compute Intensive AI
data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive:
- snapshot://artificial_intelligence/2024-11-03/epoch_compute_intensive.csv
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive
data://meadow/artificial_intelligence/2024-12-05/epoch_compute_intensive:
- snapshot://artificial_intelligence/2024-12-05/epoch_compute_intensive.csv
data://garden/artificial_intelligence/2024-12-05/epoch_compute_intensive:
- data://meadow/artificial_intelligence/2024-12-05/epoch_compute_intensive


# EPOCH dataset on Compute Intensive AI, aggregates by country
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries:
- data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_countries
data://garden/artificial_intelligence/2024-12-05/epoch_compute_intensive_countries:
- data://meadow/artificial_intelligence/2024-12-05/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-12-05/epoch_compute_intensive_countries:
- data://garden/artificial_intelligence/2024-12-05/epoch_compute_intensive_countries

# EPOCH dataset on Compute Intensive AI, aggregates by domain
data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain:
- data://meadow/artificial_intelligence/2024-11-03/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain:
- data://garden/artificial_intelligence/2024-11-03/epoch_compute_intensive_domain
data://garden/artificial_intelligence/2024-12-05/epoch_compute_intensive_domain:
- data://meadow/artificial_intelligence/2024-12-05/epoch_compute_intensive
data://grapher/artificial_intelligence/2024-12-05/epoch_compute_intensive_domain:
- data://garden/artificial_intelligence/2024-12-05/epoch_compute_intensive_domain

############### OTHERS #####################################################################################

Expand Down
4 changes: 2 additions & 2 deletions dag/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ steps:
data://garden/ggdc/2024-01-19/maddison_federico_paper:
- data://meadow/ggdc/2024-01-19/maddison_federico_paper

# UN WPP experiments
# UN WPP largest age-group per country
data://garden/un/2024-03-14/un_wpp_most:
- data://garden/un/2022-07-11/un_wpp
- data://garden/un/2024-07-12/un_wpp
data://grapher/un/2024-03-14/un_wpp_most:
- data://garden/un/2024-03-14/un_wpp_most

Expand Down
4 changes: 2 additions & 2 deletions dag/fasttrack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ steps:
- snapshot-private://fasttrack/latest/pain_hours_hen_systems.csv
data-private://grapher/fasttrack/latest/antibiotic_usage_livestock:
- snapshot-private://fasttrack/latest/antibiotic_usage_livestock.csv
data-private://grapher/fasttrack/latest/antimicrobial_usage_livestock:
- snapshot-private://fasttrack/latest/antimicrobial_usage_livestock.csv
data://grapher/fasttrack/2023-08-07/pain_hours_days_hen_systems:
- snapshot://fasttrack/2023-08-07/pain_hours_days_hen_systems.csv
data-private://grapher/fasttrack/latest/historical_france_mortality_cause:
Expand Down Expand Up @@ -240,3 +238,5 @@ steps:
- snapshot://fasttrack/latest/useful_energy_cost_way.csv
data://grapher/fasttrack/2023-06-19/world_population_comparison:
- snapshot://fasttrack/2023-06-19/world_population_comparison.csv
data://grapher/fasttrack/latest/antimicrobial_usage_livestock:
- snapshot://fasttrack/latest/antimicrobial_usage_livestock.csv
21 changes: 10 additions & 11 deletions dag/urbanization.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,6 @@ steps:
data://grapher/un/2024-01-17/urban_agglomerations_definition_count:
- data://garden/un/2024-01-17/urban_agglomerations_definition_count
#
# GHSL degree of urbanization.
#
data://meadow/urbanization/2024-01-26/ghsl_degree_of_urbanisation:
- snapshot://urbanization/2024-01-26/ghsl_degree_of_urbanisation.zip
data://garden/urbanization/2024-01-26/ghsl_degree_of_urbanisation:
- data://meadow/urbanization/2024-01-26/ghsl_degree_of_urbanisation
- data://garden/wb/2023-04-30/income_groups
- data://garden/regions/2023-01-01/regions
data://grapher/urbanization/2024-01-26/ghsl_degree_of_urbanisation:
- data://garden/urbanization/2024-01-26/ghsl_degree_of_urbanisation
#
# UN SDG indicators related to urbanization.
#
data://meadow/un/2024-02-14/sdgs_urbanization:
Expand All @@ -76,3 +65,13 @@ steps:
- data://garden/regions/2023-01-01/regions
data://grapher/urbanization/2024-10-14/ghsl_degree_of_urbanisation:
- data://garden/urbanization/2024-10-14/ghsl_degree_of_urbanisation

# GHSL urban centers.
data://meadow/urbanization/2024-12-02/ghsl_urban_centers:
- snapshot://urbanization/2024-12-02/ghsl_urban_centers.xlsx
data://garden/urbanization/2024-12-02/ghsl_urban_centers:
- data://meadow/urbanization/2024-12-02/ghsl_urban_centers
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
data://grapher/urbanization/2024-12-02/ghsl_urban_centers:
- data://garden/urbanization/2024-12-02/ghsl_urban_centers
2 changes: 0 additions & 2 deletions etl/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

# Snapshots
SNAPSHOTS_DIR = BASE_DIR / "snapshots"
SNAPSHOTS_DIR_ARCHIVE = BASE_DIR / "snapshots_archive"

# ETL library
ETL_DIR = BASE_DIR / "etl"
Expand All @@ -32,7 +31,6 @@
STEPS_MEADOW_DIR = STEPS_DATA_DIR / "meadow"
STEPS_GARDEN_DIR = STEPS_DATA_DIR / "garden"
STEPS_GRAPHER_DIR = STEPS_DATA_DIR / "grapher"
STEP_DIR_ARCHIVE = STEP_DIR / "archive"

# Apps
APPS_DIR = BASE_DIR / "apps"
Expand Down
6 changes: 1 addition & 5 deletions etl/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ def path(self) -> Path:
@property
def metadata_path(self) -> Path:
"""Path to metadata file."""
archive_path = Path(f"{paths.SNAPSHOTS_DIR_ARCHIVE / self.uri}.dvc")
if archive_path.exists():
return archive_path
else:
return Path(f"{paths.SNAPSHOTS_DIR / self.uri}.dvc")
return Path(f"{paths.SNAPSHOTS_DIR / self.uri}.dvc")

def _download_dvc_file(self, md5: str) -> None:
"""Download file from remote to self.path."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
WHO_REGION_MEMBERS = {
"African Region (WHO)": 47,
"World": 194,
"Eastern Mediterranean (WHO)": 21,
"Eastern Mediterranean (WHO)": 22,
"European Region (WHO)": 53,
"Region of the Americas (WHO)": 35,
"South-East Asia Region (WHO)": 11,
Expand Down
Loading

0 comments on commit ec5c7ed

Please sign in to comment.