✨ Remove 2023 and change name of other channel

owid · Oct 23, 2024 · ad98a70 · ad98a70
1 parent 23cf4ac
commit ad98a70
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 33 deletions.
diff --git a/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.py b/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.py
@@ -479,38 +479,6 @@ def add_aid_by_channel(tb: Table, tb_channels_donor: Table, tb_channels_recipien
     return tb
 
 
-def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
-    """
-    Remove jumps in the data generated by own calculation.
-    This is most likely because of aggregations of population and GNI not properly done by the source.
-    This is a temporary solution until the source fixes the data. It is already reported.
-
-    Also, remove redundant columns.
-    """
-
-    # For i_oda_net_disbursements_share_gni
-    tb.loc[
-        (tb["country"] == "Non-DAC countries (OECD)"),
-        "i_oda_net_disbursements_share_gni",
-    ] = None
-
-    # For i_oda_net_disbursements_per_capita
-    tb.loc[
-        (tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2007), "i_oda_net_disbursements_per_capita"
-    ] = None
-
-    # Remove rows where country = Non-DAC countries (OECD) and year 2023
-    # This is because the data is not complete until December 2024
-    tb = tb[~((tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2023))].reset_index(drop=True)
-
-    # Remove columns
-    tb = tb.drop(
-        columns=["oda_bilateral_2_grant_equivalents", "oda_multilateral_2_grant_equivalents", "i_oda_grant_equivalents"]
-    )
-
-    return tb
-
-
 def limit_grant_equivalents_from_2018_only(tb: Table) -> Table:
     """
     Limit grant equivalent indicators from year 2018 onwards.
@@ -541,3 +509,35 @@ def combine_net_and_grant_equivalents(tb: Table) -> Table:
     tb.loc[tb["year"] < 2018, "oda_official_estimate_share_gni"] = tb["i_oda_net_disbursements_share_gni"]
 
     return tb
+
+
+def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
+    """
+    Remove jumps in the data generated by own calculation.
+    This is most likely because of aggregations of population and GNI not properly done by the source.
+    This is a temporary solution until the source fixes the data. It is already reported.
+
+    Also, remove redundant columns.
+    """
+
+    # For i_oda_net_disbursements_share_gni
+    tb.loc[
+        (tb["country"] == "Non-DAC countries (OECD)"),
+        "i_oda_net_disbursements_share_gni",
+    ] = None
+
+    # For i_oda_net_disbursements_per_capita
+    tb.loc[
+        (tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2007), "i_oda_net_disbursements_per_capita"
+    ] = None
+
+    # Remove rows where country = Non-DAC countries (OECD) and year 2023
+    # This is because the data is not complete until December 2024
+    tb = tb[~((tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2023))].reset_index(drop=True)
+
+    # Remove columns
+    tb = tb.drop(
+        columns=["oda_bilateral_2_grant_equivalents", "oda_multilateral_2_grant_equivalents", "i_oda_grant_equivalents"]
+    )
+
+    return tb
diff --git a/etl/steps/data/garden/one/2024-10-02/official_development_assistance_one.py b/etl/steps/data/garden/one/2024-10-02/official_development_assistance_one.py
@@ -1,3 +1,4 @@
+# NOTE: After December 2024 update, check the steps in `remove_data_for_most_recent_year`
 """Load a meadow dataset and create a garden dataset."""
 
 import owid.catalog.processing as pr
@@ -13,6 +14,9 @@
 INDEX_SECTORS = ["donor_name", "recipient_name", "year", "sector_name"]
 INDEX_CHANNELS = ["donor_name", "recipient_name", "year", "channel_name"]
 
+# Define most recent year in the data
+MOST_RECENT_YEAR = 2023
+
 # Define mapping for sectors, including new names, sub-sectors, and sectors.
 SECTORS_MAPPING = {
     "I.1.a. Education, Level Unspecified": {
@@ -254,7 +258,7 @@
     "4": "Multilateral organizations",
     "5": "University, college or other teaching institution, research institute or think-tank",
     "6": "Private sector institutions",
-    "9": "Other",
+    "9": "Unspecified",
 }
 
 # Define multiplier for values
@@ -275,6 +279,10 @@ def run(dest_dir: str) -> None:
     #
     # Process data.
     #
+    # Remove data for the most recent year.
+    tb_sectors = remove_data_for_most_recent_year(tb=tb_sectors, year=MOST_RECENT_YEAR)
+    tb_channels = remove_data_for_most_recent_year(tb=tb_channels, year=MOST_RECENT_YEAR)
+
     tb_sectors = geo.harmonize_countries(
         df=tb_sectors,
         country_col="donor_name",
@@ -486,3 +494,14 @@ def rename_and_aggregate_channels(tb: Table) -> Table:
     tb = tb.groupby(INDEX_CHANNELS, observed=True, dropna=False)["value"].sum().reset_index()
 
     return tb
+
+
+def remove_data_for_most_recent_year(tb: Table, year: int) -> Table:
+    """
+    Remove data for the most recent year.
+    """
+
+    # Filter the table to remove the most recent year
+    tb = tb[tb["year"] != year].reset_index(drop=True)
+
+    return tb