From 82241e421a3eb1af1c2703bf7e6571cef151a0a3 Mon Sep 17 00:00:00 2001 From: Pablo Arriagada <63430031+paarriagadap@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:47:41 +0000 Subject: [PATCH 01/10] =?UTF-8?q?=F0=9F=93=8A=20wb:=20add=20series=20conne?= =?UTF-8?q?cting=20estimates=20and=20projections=20of=20poverty=20from=20W?= =?UTF-8?q?orld=20Bank=20(#3709)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📊 wb: add series connecting estimates and projections of poverty from World Bank * :lisptick: change attribution * :sparkles: add historical + projections --- .../wb/2024-12-03/poverty_projections.meta.yml | 4 +++- .../garden/wb/2024-12-03/poverty_projections.py | 16 ++++++++++++++++ ...ity_package_poverty_prosperity_planet.zip.dvc | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml index fd736fb1874..d70aa817927 100644 --- a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml @@ -33,6 +33,8 @@ definitions: Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts). <% elif scenario == "Current forecast + historical growth" %> This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. + <% elif scenario == "Historical + current forecast + historical growth" %> + This data combines data based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts, with projections based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. <% elif scenario == "2% growth" %> This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant. <% elif scenario == "2% growth + Gini reduction 1%" %> @@ -48,7 +50,7 @@ definitions: <%- endif -%> isprojection_by_scenario: |- - <% if scenario == "Historical" %> + <% if scenario == "Historical" or scenario == "Historical + current forecast + historical growth" %> false <% else %> true diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py index fa62ff97730..65617353e71 100644 --- a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py @@ -92,6 +92,10 @@ def connect_estimates_with_projections(tb: Table) -> Table: tb = tb.copy() + # Save tb_historical and tb_current_forecast, by filtering scenario in historical and current_forecast + tb_historical = tb[tb["scenario"] == "historical"].copy().reset_index(drop=True) + tb_current_forecast = tb[tb["scenario"] == "current_forecast"].copy().reset_index(drop=True) + # Make table wider, by using scenario as columns tb = tb.pivot(index=["country", "year", "povertyline"], columns="scenario", values=INDICATOR_COLUMNS) @@ -116,4 +120,16 @@ def connect_estimates_with_projections(tb: Table) -> Table: for indicator in INDICATOR_COLUMNS: tb[indicator] = tb[indicator].copy_metadata(tb["country"]) + # Combine historical and current_forecast, by concatenating tb_historical and tb_current_forecast + tb_connected = pr.concat([tb_historical, tb_current_forecast], ignore_index=True) + + # Rename scenario column to "Historical + current forecast + historical growth" + tb_connected["scenario"] = "Historical + current forecast + historical growth" + + # Keep only the columns in INDEX_COLUMNS and INDICATOR_COLUMNS + tb_connected = tb_connected[INDEX_COLUMNS + INDICATOR_COLUMNS] + + # Concatenate tb and tb_connected + tb = pr.concat([tb, tb_connected], ignore_index=True) + return tb diff --git a/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc index 2e560863971..08689d46ba7 100644 --- a/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc +++ b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc @@ -12,6 +12,7 @@ meta: producer: Lakner et al. citation_full: |- Lakner, C., Genoni, M. E., Stemmler, H., Yonzan, N., & Tetteh Baah, S. K. (2024). Reproducibility package for Poverty, Prosperity and Planet Report 2024. World Bank. https://doi.org/10.60572/KGE4-CX54 + attribution: Lakner et al. (2024). Reproducibility package for Poverty, Prosperity and Planet Report 2024 # Files url_main: https://reproducibility.worldbank.org/index.php/catalog/189/ From 53227435376e0594cd3786b934db9ebd45a814b7 Mon Sep 17 00:00:00 2001 From: Pablo Arriagada <63430031+paarriagadap@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:19:12 +0000 Subject: [PATCH 02/10] =?UTF-8?q?=F0=9F=93=8A=20wb:=20modify=20poverty=20p?= =?UTF-8?q?rojections=20metadata=20(#3712)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📊 wb: modify poverty projections metadata * :sparkles: changes in metadata * empty * :bug: Exclude chart diffs with indistinguishable configs --------- Co-authored-by: Marigold --- .../wizard/app_pages/chart_diff/chart_diff.py | 16 +++++++++++++++ .../2024-12-03/poverty_projections.meta.yml | 20 +++++++++---------- .../wb/2024-12-03/poverty_projections.py | 18 ++++++++--------- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/apps/wizard/app_pages/chart_diff/chart_diff.py b/apps/wizard/app_pages/chart_diff/chart_diff.py index af74d1fe8f2..3ed8b71f3a0 100644 --- a/apps/wizard/app_pages/chart_diff/chart_diff.py +++ b/apps/wizard/app_pages/chart_diff/chart_diff.py @@ -1,5 +1,6 @@ import datetime as dt import difflib +import json import pprint from typing import Any, Dict, List, Optional @@ -653,6 +654,7 @@ def _modified_chart_configs_on_staging( select c.id as chartId, MD5(cc.full) as chartChecksum, + cc.full as chartConfig, c.lastEditedByUserId as chartLastEditedByUserId, c.publishedByUserId as chartPublishedByUserId, c.lastEditedAt as chartLastEditedAt @@ -699,6 +701,20 @@ def _modified_chart_configs_on_staging( diff = source_df.copy() diff["configEdited"] = source_df["chartChecksum"] != target_df["chartChecksum"] + # Go through edited configs and do a more detailed comparison + ix = diff["configEdited"] & target_df["chartChecksum"].notnull() + equal_configs = [] + for chart_id, row in diff.loc[ix].iterrows(): + source_config = json.loads(row["chartConfig"]) + target_config = json.loads(target_df.loc[chart_id, "chartConfig"]) + + # Compare configs + if configs_are_equal(source_config, target_config): + equal_configs.append(chart_id) + + # Exclude configs that have different chartChecksum, but are actually the same (e.g. have just different version) + diff = diff[~diff.index.isin(equal_configs)] + # Add flag 'edited in staging' diff["chartEditedInStaging"] = True diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml index d70aa817927..17af424e414 100644 --- a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml @@ -29,28 +29,28 @@ definitions: Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account. description_key_scenarios: |- - <% if scenario == "Historical" %> + <% if scenario == "Historical estimates" %> Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts). - <% elif scenario == "Current forecast + historical growth" %> + <% elif scenario == "Current forecast + historical growth projections" %> This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. - <% elif scenario == "Historical + current forecast + historical growth" %> + <% elif scenario == "Historical estimates + current forecast + historical growth projections" %> This data combines data based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts, with projections based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. - <% elif scenario == "2% growth" %> + <% elif scenario == "2% growth projections" %> This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant. - <% elif scenario == "2% growth + Gini reduction 1%" %> + <% elif scenario == "2% growth + Gini reduction 1% projections" %> This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 1% of the Gini coefficient per year. - <% elif scenario == "2% growth + Gini reduction 2%" %> + <% elif scenario == "2% growth + Gini reduction 2% projections" %> This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 2% of the Gini coefficient per year. - <% elif scenario == "4% growth" %> + <% elif scenario == "4% growth projections" %> This data is a projection of the estimates based on a scenario of 4% average GDP per capita growth, while keeping income inequality constant. - <% elif scenario == "6% growth" %> + <% elif scenario == "6% growth projections" %> This data is a projection of the estimates based on a scenario of 6% average GDP per capita growth, while keeping income inequality constant. - <% elif scenario == "8% growth" %> + <% elif scenario == "8% growth projections" %> This data is a projection of the estimates based on a scenario of 8% average GDP per capita growth, while keeping income inequality constant. <%- endif -%> isprojection_by_scenario: |- - <% if scenario == "Historical" or scenario == "Historical + current forecast + historical growth" %> + <% if scenario == "Historical estimates" or scenario == "Historical estimates + current forecast + historical growth projections" %> false <% else %> true diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py index 65617353e71..66e637c2fcd 100644 --- a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py @@ -18,14 +18,14 @@ # Define scenarios and new names SCENARIOS = { - "historical": "Historical", - "current_forecast": "Current forecast + historical growth", - "2pct": "2% growth", - "2pct_gini1": "2% growth + Gini reduction 1%", - "2pct_gini2": "2% growth + Gini reduction 2%", - "4pct": "4% growth", - "6pct": "6% growth", - "8pct": "8% growth", + "historical": "Historical estimates", + "current_forecast": "Current forecast + historical growth projections", + "2pct": "2% growth projections", + "2pct_gini1": "2% growth + Gini reduction 1% projections", + "2pct_gini2": "2% growth + Gini reduction 2% projections", + "4pct": "4% growth projections", + "6pct": "6% growth projections", + "8pct": "8% growth projections", } # Define index columns @@ -124,7 +124,7 @@ def connect_estimates_with_projections(tb: Table) -> Table: tb_connected = pr.concat([tb_historical, tb_current_forecast], ignore_index=True) # Rename scenario column to "Historical + current forecast + historical growth" - tb_connected["scenario"] = "Historical + current forecast + historical growth" + tb_connected["scenario"] = "Historical estimates + projections" # Keep only the columns in INDEX_COLUMNS and INDICATOR_COLUMNS tb_connected = tb_connected[INDEX_COLUMNS + INDICATOR_COLUMNS] From 88d719085fa74cf2a99747ee75468c44740c5497 Mon Sep 17 00:00:00 2001 From: Pablo Arriagada Date: Tue, 10 Dec 2024 17:35:39 +0000 Subject: [PATCH 03/10] :bug: add the right scenario for estimates + projections --- .../data/garden/wb/2024-12-03/poverty_projections.meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml index 17af424e414..f3f04bbd41d 100644 --- a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml @@ -33,7 +33,7 @@ definitions: Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts). <% elif scenario == "Current forecast + historical growth projections" %> This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. - <% elif scenario == "Historical estimates + current forecast + historical growth projections" %> + <% elif scenario == "Historical estimates + projections" %> This data combines data based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts, with projections based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. <% elif scenario == "2% growth projections" %> This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant. @@ -50,7 +50,7 @@ definitions: <%- endif -%> isprojection_by_scenario: |- - <% if scenario == "Historical estimates" or scenario == "Historical estimates + current forecast + historical growth projections" %> + <% if scenario == "Historical estimates" or scenario == "Historical estimates + projections" %> false <% else %> true From 936f85c9437f17dc203d841010d2cb11c1feb308 Mon Sep 17 00:00:00 2001 From: owidbot Date: Wed, 11 Dec 2024 06:28:32 +0000 Subject: [PATCH 04/10] :robot: automatic excess mortality update --- snapshots/excess_mortality/latest/hmd_stmf.csv.dvc | 2 +- snapshots/excess_mortality/latest/wmd.csv.dvc | 2 +- snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc | 2 +- .../excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc index bdc68216782..29102377237 100644 --- a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc +++ b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc @@ -13,7 +13,7 @@ meta: HMD provides an online STMF visualization toolkit (https://mpidr.shinyapps.io/stmortality). url: https://www.mortality.org/Data/STMF source_data_url: https://www.mortality.org/File/GetDocument/Public/STMF/Outputs/stmf.csv - date_accessed: 2024-12-10 + date_accessed: 2024-12-11 publication_date: 2024-11-11 publication_year: 2024 published_by: |- diff --git a/snapshots/excess_mortality/latest/wmd.csv.dvc b/snapshots/excess_mortality/latest/wmd.csv.dvc index ffdbd6a06db..d7b4d86e4fd 100644 --- a/snapshots/excess_mortality/latest/wmd.csv.dvc +++ b/snapshots/excess_mortality/latest/wmd.csv.dvc @@ -13,7 +13,7 @@ meta: Published paper available at https://elifesciences.org/articles/69336. url: https://github.com/akarlinsky/world_mortality/ source_data_url: https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv - date_accessed: 2024-12-10 + date_accessed: 2024-12-11 publication_date: '2021-06-30' publication_year: 2021 published_by: |- diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc index 71e3a116720..91a48ea6e6d 100644 --- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc +++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc @@ -7,7 +7,7 @@ meta: For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic. url: https://github.com/dkobak/excess-mortality source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-per-year.csv - date_accessed: 2024-12-10 + date_accessed: 2024-12-11 publication_date: '2021-06-30' publication_year: 2021 published_by: |- diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc index b3cf92134a5..4571161e0bd 100644 --- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc +++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc @@ -6,7 +6,7 @@ meta: For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic. url: https://github.com/dkobak/excess-mortality source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-stmf.csv - date_accessed: 2024-12-10 + date_accessed: 2024-12-11 publication_date: '2021-06-30' publication_year: 2021 published_by: |- From ddc77f9427b9a2dfa56fc7dd5307b32c417b6d4c Mon Sep 17 00:00:00 2001 From: owidbot Date: Wed, 11 Dec 2024 06:28:56 +0000 Subject: [PATCH 05/10] :robot: update: monkeypox --- snapshots/health/latest/global_health_mpox.csv.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snapshots/health/latest/global_health_mpox.csv.dvc b/snapshots/health/latest/global_health_mpox.csv.dvc index 516ded55647..4d27ab747a3 100644 --- a/snapshots/health/latest/global_health_mpox.csv.dvc +++ b/snapshots/health/latest/global_health_mpox.csv.dvc @@ -22,6 +22,6 @@ meta: url: https://global.health/terms-of-use/ outs: - - md5: 7928d79ed3caf862d86ba729737fc255 - size: 16733780 + - md5: 08388d2230adafbb7fe28ddcd1eb0dc8 + size: 16813136 path: global_health_mpox.csv From 0b8133a565ecd0ee210abae4334cebf941992087 Mon Sep 17 00:00:00 2001 From: owidbot Date: Wed, 11 Dec 2024 06:29:32 +0000 Subject: [PATCH 06/10] :robot: update: covid-19 cases and deaths --- snapshots/covid/latest/cases_deaths.csv.dvc | 2 +- snapshots/who/latest/fluid.csv.dvc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/snapshots/covid/latest/cases_deaths.csv.dvc b/snapshots/covid/latest/cases_deaths.csv.dvc index 2552d99dd6e..247507293dc 100644 --- a/snapshots/covid/latest/cases_deaths.csv.dvc +++ b/snapshots/covid/latest/cases_deaths.csv.dvc @@ -22,7 +22,7 @@ meta: version_producer: WHO COVID-19 Dashboard - Daily cases and deaths url_main: https://covid19.who.int/ url_download: https://srhdpeuwpubsa.blob.core.windows.net/whdh/COVID/WHO-COVID-19-global-daily-data.csv - date_accessed: 2024-12-10 + date_accessed: 2024-12-11 date_published: '2024-07-07' license: name: CC BY 4.0 diff --git a/snapshots/who/latest/fluid.csv.dvc b/snapshots/who/latest/fluid.csv.dvc index c98234464de..30b2f569464 100644 --- a/snapshots/who/latest/fluid.csv.dvc +++ b/snapshots/who/latest/fluid.csv.dvc @@ -16,6 +16,6 @@ meta: The platform accommodates both qualitative and quantitative data which facilitates the tracking of global trends, spread, intensity, and impact of influenza. These data are made freely available to health policy makers in order to assist them in making informed decisions regarding the management of influenza. wdir: ../../../data/snapshots/who/latest outs: - - md5: 516f378e03682d099c5bdcecb732b38b - size: 168097330 + - md5: 811f5ca9e719e680bc1cde286e599f9d + size: 168107745 path: fluid.csv From 3d39bb3cba79016e49ba15609cf1046daa79c07e Mon Sep 17 00:00:00 2001 From: owidbot Date: Wed, 11 Dec 2024 06:29:37 +0000 Subject: [PATCH 07/10] :robot: automatic flunet update --- snapshots/who/latest/flunet.csv.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snapshots/who/latest/flunet.csv.dvc b/snapshots/who/latest/flunet.csv.dvc index dc62b167acc..6a11439d09e 100644 --- a/snapshots/who/latest/flunet.csv.dvc +++ b/snapshots/who/latest/flunet.csv.dvc @@ -16,6 +16,6 @@ meta: The data are provided remotely by National Influenza Centres (NICs) of the Global Influenza Surveillance and Response System (GISRS) and other national influenza reference laboratories collaborating actively with GISRS, or are uploaded from WHO regional databases. wdir: ../../../data/snapshots/who/latest outs: - - md5: 50775d6806b50d572bc79031134bc3e3 - size: 27221232 + - md5: b687f5f92351d148e71bb3b5d60c0c50 + size: 27222953 path: flunet.csv From c45148c40772d247f7b697a65949a545a75f5707 Mon Sep 17 00:00:00 2001 From: owidbot Date: Wed, 11 Dec 2024 06:30:31 +0000 Subject: [PATCH 08/10] :robot: automatic wildfires update --- snapshots/climate/latest/weekly_wildfires.csv.dvc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snapshots/climate/latest/weekly_wildfires.csv.dvc b/snapshots/climate/latest/weekly_wildfires.csv.dvc index 555d87e87a4..d103b37e950 100644 --- a/snapshots/climate/latest/weekly_wildfires.csv.dvc +++ b/snapshots/climate/latest/weekly_wildfires.csv.dvc @@ -9,12 +9,12 @@ meta: citation_full: Global Wildfire Information System attribution_short: GWIS url_main: https://gwis.jrc.ec.europa.eu/apps/gwis.statistics/seasonaltrend - date_accessed: 2024-12-10 - date_published: 2024-12-10 + date_accessed: 2024-12-11 + date_published: 2024-12-11 license: name: CC BY 4.0 url: https://gwis.jrc.ec.europa.eu/about-gwis/data-license outs: - - md5: d1de4bd7ac3c08a0dcc6eb63f891f71b - size: 12799309 + - md5: fc6f8b908a2988b2d8048707526c460a + size: 12799310 path: weekly_wildfires.csv From 2dcc5b42e481234d5e6ab9b00f105a9c62bdc681 Mon Sep 17 00:00:00 2001 From: Marigold Date: Wed, 11 Dec 2024 14:19:51 +0100 Subject: [PATCH 09/10] :bug: fix jinja for covid sequences --- .../data/garden/covid/latest/sequence.meta.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/covid/latest/sequence.meta.yml b/etl/steps/data/garden/covid/latest/sequence.meta.yml index acebff8b8e4..1f79931b216 100644 --- a/etl/steps/data/garden/covid/latest/sequence.meta.yml +++ b/etl/steps/data/garden/covid/latest/sequence.meta.yml @@ -21,13 +21,13 @@ tables: num_sequences: title: "Number of sequenced COVID-19 genomes - Variant: << variant >>" description_short: |- - <% set mapping = dict( - non_who="The number of analyzed sequences in the preceding two weeks that correspond to non-relevant variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced.", - other="The number of analyzed sequences in the preceding two weeks that correspond to non-categorised variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced.", - else="The number of analyzed sequences in the preceding two weeks that correspond to variant group '<< variant >>'. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced." - ) %> - - << mapping.get(variant, mapping['else']) >> + <% if variant == 'non_who' %> + The number of analyzed sequences in the preceding two weeks that correspond to non-relevant variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. + <% elif variant == 'other' %> + The number of analyzed sequences in the preceding two weeks that correspond to non-categorised variant groups. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. + <% else %> + The number of analyzed sequences in the preceding two weeks that correspond to variant group '<< variant >>'. This number may not reflect the complete breakdown of cases since only a fraction of all cases are sequenced. + <%- endif -%> unit: "sequenced genomes" display: tolerance: 28 From b8d4a0cc44fcf28b8ae144c178e425ef883a0d41 Mon Sep 17 00:00:00 2001 From: Marigold Date: Wed, 11 Dec 2024 15:27:50 +0100 Subject: [PATCH 10/10] :bug: fix unittests when OWID_STRICT is set --- lib/catalog/owid/catalog/datasets.py | 4 ++-- tests/test_datadiff.py | 5 +++++ tests/test_steps.py | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/catalog/owid/catalog/datasets.py b/lib/catalog/owid/catalog/datasets.py index 745563a2d81..d24f55d2c5f 100644 --- a/lib/catalog/owid/catalog/datasets.py +++ b/lib/catalog/owid/catalog/datasets.py @@ -119,7 +119,7 @@ def add( utils.validate_underscore(col, "Variable's name") if not table.primary_key: - if "OWID_STRICT" in environ: + if environ.get("OWID_STRICT"): raise PrimaryKeyMissing( f"Table `{table.metadata.short_name}` does not have a primary_key -- please use t.set_index([col, ...], verify_integrity=True) to indicate dimensions before saving" ) @@ -128,7 +128,7 @@ def add( f"Table `{table.metadata.short_name}` does not have a primary_key -- please use t.set_index([col, ...], verify_integrity=True) to indicate dimensions before saving" ) - if not table.index.is_unique and "OWID_STRICT" in environ: + if not table.index.is_unique and environ.get("OWID_STRICT"): [(k, dups)] = table.index.value_counts().head(1).to_dict().items() raise NonUniqueIndex( f"Table `{table.metadata.short_name}` has duplicate values in the index -- could you have made a mistake?\n\n" diff --git a/tests/test_datadiff.py b/tests/test_datadiff.py index 316ecfdeaf1..be4466781c1 100644 --- a/tests/test_datadiff.py +++ b/tests/test_datadiff.py @@ -1,3 +1,6 @@ +import os +from unittest.mock import patch + import pandas as pd from owid.catalog import Dataset, DatasetMeta, Table @@ -19,6 +22,7 @@ def _create_datasets(tmp_path): return ds_a, ds_b +@patch.dict(os.environ, {"OWID_STRICT": ""}) def test_DatasetDiff_summary(tmp_path): ds_a, ds_b = _create_datasets(tmp_path) @@ -43,6 +47,7 @@ def test_DatasetDiff_summary(tmp_path): ] +@patch.dict(os.environ, {"OWID_STRICT": ""}) def test_new_data(tmp_path): ds_a, ds_b = _create_datasets(tmp_path) diff --git a/tests/test_steps.py b/tests/test_steps.py index ff266f1917d..5693fcd05fd 100644 --- a/tests/test_steps.py +++ b/tests/test_steps.py @@ -15,6 +15,7 @@ from unittest.mock import patch import pandas as pd +import requests from owid.catalog import Dataset from etl import paths @@ -162,7 +163,11 @@ def test_select_dirty_steps(): def test_get_etag(): - etag = get_etag("https://raw.githubusercontent.com/owid/owid-grapher/master/README.md") + try: + etag = get_etag("https://raw.githubusercontent.com/owid/owid-grapher/master/README.md") + # ignore SSL errors + except requests.exceptions.SSLError: + return assert etag