From 8e37b0727a3cc6024b7c9ff42787ef18638e8906 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Thu, 5 Dec 2024 13:38:21 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20microbe=20remove=20fill=20na?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 40064d4d350d3c2f9b5bcfb95c78249362a5c380 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Thu, 5 Dec 2024 13:40:09 +0000 Subject: [PATCH 2/3] removing fill na for amr attributable charts --- etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py | 4 +--- .../garden/antibiotics/2024-12-02/microbe_neonatal_amr.py | 4 +--- .../antibiotics/2024-12-02/total_pathogen_bloodstream_amr.py | 4 +--- .../antibiotics/2024-12-04/microbe_total_pathogens_amr.py | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py index b9b7d253de0..399c2beba5a 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py @@ -35,9 +35,7 @@ def run(dest_dir: str) -> None: ) tb_total = tb_total.rename(columns={"value": "total_deaths"}, errors="raise").drop(columns=["lower", "upper"]) - tb = tb_amr.merge(tb_total, on=["country", "year", "infectious_syndrome"], how="right") - # Fill missing values with 0 - tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].replace(pd.NA, 0) + tb = tb_amr.merge(tb_total, on=["country", "year", "infectious_syndrome"], how="inner") tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"] # Rename syndromes to be shorter for use in stacked bar charts tb = rename_syndromes(tb) diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py index bf345a63809..17e913c7a07 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py @@ -39,9 +39,7 @@ def run(dest_dir: str) -> None: ) tb_total = tb_total[tb_total["year"] == 2021] - tb = tb_amr.merge(tb_total, on=["country", "year", "pathogen"], how="right") - # Fill missing values with 0 - tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].replace(pd.NA, 0) + tb = tb_amr.merge(tb_total, on=["country", "year", "pathogen"], how="inner") tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"] tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr.py b/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr.py index e4301c23e42..6c09aa1c119 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr.py @@ -28,9 +28,7 @@ def run(dest_dir: str) -> None: .rename(columns={"value": "total_deaths"}) ) - tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="right") - - tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].fillna(0) + tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="inner") tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"] # Process data. tb = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"}) diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py index ca8e9998c57..0c71e6b28f7 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py @@ -34,9 +34,7 @@ def run(dest_dir: str) -> None: countries_file=paths.country_mapping_path, ) - tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="right") - - tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].fillna(0) + tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="inner") tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"] # Process data. tb = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"}) From ce22bdd1ec3a5652598d868fc8bfac784a54d6b4 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Thu, 5 Dec 2024 13:42:45 +0000 Subject: [PATCH 3/3] remove pandas import --- etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py | 1 - .../data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py index 399c2beba5a..62db655bce1 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_amr.py @@ -1,6 +1,5 @@ """Load a meadow dataset and create a garden dataset.""" -import pandas as pd from owid.catalog import Table from etl.data_helpers import geo diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py index 17e913c7a07..aab5b70b399 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py +++ b/etl/steps/data/garden/antibiotics/2024-12-02/microbe_neonatal_amr.py @@ -1,6 +1,4 @@ """Load a meadow dataset and create a garden dataset.""" -import pandas as pd - from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset