Skip to content

Commit

Permalink
✨ Remove 2023 and change name of other channel
Browse files Browse the repository at this point in the history
  • Loading branch information
paarriagadap committed Oct 23, 2024
1 parent 23cf4ac commit ad98a70
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -479,38 +479,6 @@ def add_aid_by_channel(tb: Table, tb_channels_donor: Table, tb_channels_recipien
return tb


def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
"""
Remove jumps in the data generated by own calculation.
This is most likely because of aggregations of population and GNI not properly done by the source.
This is a temporary solution until the source fixes the data. It is already reported.
Also, remove redundant columns.
"""

# For i_oda_net_disbursements_share_gni
tb.loc[
(tb["country"] == "Non-DAC countries (OECD)"),
"i_oda_net_disbursements_share_gni",
] = None

# For i_oda_net_disbursements_per_capita
tb.loc[
(tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2007), "i_oda_net_disbursements_per_capita"
] = None

# Remove rows where country = Non-DAC countries (OECD) and year 2023
# This is because the data is not complete until December 2024
tb = tb[~((tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2023))].reset_index(drop=True)

# Remove columns
tb = tb.drop(
columns=["oda_bilateral_2_grant_equivalents", "oda_multilateral_2_grant_equivalents", "i_oda_grant_equivalents"]
)

return tb


def limit_grant_equivalents_from_2018_only(tb: Table) -> Table:
"""
Limit grant equivalent indicators from year 2018 onwards.
Expand Down Expand Up @@ -541,3 +509,35 @@ def combine_net_and_grant_equivalents(tb: Table) -> Table:
tb.loc[tb["year"] < 2018, "oda_official_estimate_share_gni"] = tb["i_oda_net_disbursements_share_gni"]

return tb


def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
"""
Remove jumps in the data generated by own calculation.
This is most likely because of aggregations of population and GNI not properly done by the source.
This is a temporary solution until the source fixes the data. It is already reported.
Also, remove redundant columns.
"""

# For i_oda_net_disbursements_share_gni
tb.loc[
(tb["country"] == "Non-DAC countries (OECD)"),
"i_oda_net_disbursements_share_gni",
] = None

# For i_oda_net_disbursements_per_capita
tb.loc[
(tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2007), "i_oda_net_disbursements_per_capita"
] = None

# Remove rows where country = Non-DAC countries (OECD) and year 2023
# This is because the data is not complete until December 2024
tb = tb[~((tb["country"] == "Non-DAC countries (OECD)") & (tb["year"] == 2023))].reset_index(drop=True)

# Remove columns
tb = tb.drop(
columns=["oda_bilateral_2_grant_equivalents", "oda_multilateral_2_grant_equivalents", "i_oda_grant_equivalents"]
)

return tb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: After December 2024 update, check the steps in `remove_data_for_most_recent_year`
"""Load a meadow dataset and create a garden dataset."""

import owid.catalog.processing as pr
Expand All @@ -13,6 +14,9 @@
INDEX_SECTORS = ["donor_name", "recipient_name", "year", "sector_name"]
INDEX_CHANNELS = ["donor_name", "recipient_name", "year", "channel_name"]

# Define most recent year in the data
MOST_RECENT_YEAR = 2023

# Define mapping for sectors, including new names, sub-sectors, and sectors.
SECTORS_MAPPING = {
"I.1.a. Education, Level Unspecified": {
Expand Down Expand Up @@ -254,7 +258,7 @@
"4": "Multilateral organizations",
"5": "University, college or other teaching institution, research institute or think-tank",
"6": "Private sector institutions",
"9": "Other",
"9": "Unspecified",
}

# Define multiplier for values
Expand All @@ -275,6 +279,10 @@ def run(dest_dir: str) -> None:
#
# Process data.
#
# Remove data for the most recent year.
tb_sectors = remove_data_for_most_recent_year(tb=tb_sectors, year=MOST_RECENT_YEAR)
tb_channels = remove_data_for_most_recent_year(tb=tb_channels, year=MOST_RECENT_YEAR)

tb_sectors = geo.harmonize_countries(
df=tb_sectors,
country_col="donor_name",
Expand Down Expand Up @@ -486,3 +494,14 @@ def rename_and_aggregate_channels(tb: Table) -> Table:
tb = tb.groupby(INDEX_CHANNELS, observed=True, dropna=False)["value"].sum().reset_index()

return tb


def remove_data_for_most_recent_year(tb: Table, year: int) -> Table:
"""
Remove data for the most recent year.
"""

# Filter the table to remove the most recent year
tb = tb[tb["year"] != year].reset_index(drop=True)

return tb

0 comments on commit ad98a70

Please sign in to comment.