diff --git a/etl/steps/data/garden/education/2024-12-11/people_with_education.countries.json b/etl/steps/data/garden/education/2024-12-11/people_with_education.countries.json deleted file mode 100644 index ed57831356b..00000000000 --- a/etl/steps/data/garden/education/2024-12-11/people_with_education.countries.json +++ /dev/null @@ -1,202 +0,0 @@ -{ - "Afghanistan": "Afghanistan", - "Albania": "Albania", - "Algeria": "Algeria", - "Angola": "Angola", - "Antigua and Barbuda": "Antigua and Barbuda", - "Argentina": "Argentina", - "Armenia": "Armenia", - "Aruba": "Aruba", - "Australia": "Australia", - "Austria": "Austria", - "Azerbaijan": "Azerbaijan", - "Bahamas": "Bahamas", - "Bahrain": "Bahrain", - "Bangladesh": "Bangladesh", - "Barbados": "Barbados", - "Belarus": "Belarus", - "Belgium": "Belgium", - "Belize": "Belize", - "Benin": "Benin", - "Bhutan": "Bhutan", - "Bolivia (Plurinational State of)": "Bolivia", - "Bosnia and Herzegovina": "Bosnia and Herzegovina", - "Botswana": "Botswana", - "Brazil": "Brazil", - "Brunei Darussalam": "Brunei", - "Bulgaria": "Bulgaria", - "Burkina Faso": "Burkina Faso", - "Burundi": "Burundi", - "Cabo Verde": "Cape Verde", - "Cambodia": "Cambodia", - "Cameroon": "Cameroon", - "Canada": "Canada", - "Central African Republic": "Central African Republic", - "Chad": "Chad", - "Chile": "Chile", - "China": "China", - "China, Hong Kong SAR": "Hong Kong", - "China, Macao SAR": "Macao", - "Colombia": "Colombia", - "Comoros": "Comoros", - "Congo": "Congo", - "Costa Rica": "Costa Rica", - "Cote d'Ivoire": "Cote d'Ivoire", - "Croatia": "Croatia", - "Cuba": "Cuba", - "Curacao": "Curacao", - "Cyprus": "Cyprus", - "Czechia": "Czechia", - "Denmark": "Denmark", - "Djibouti": "Djibouti", - "Dominican Republic": "Dominican Republic", - "Ecuador": "Ecuador", - "Egypt": "Egypt", - "El Salvador": "El Salvador", - "Equatorial Guinea": "Equatorial Guinea", - "Eritrea": "Eritrea", - "Estonia": "Estonia", - "Eswatini": "Eswatini", - "Ethiopia": "Ethiopia", - "Fiji": "Fiji", - "Finland": "Finland", - "France": "France", - "French Guiana": "French Guiana", - "French Polynesia": "French Polynesia", - "Gabon": "Gabon", - "Gambia": "Gambia", - "Georgia": "Georgia", - "Germany": "Germany", - "Ghana": "Ghana", - "Greece": "Greece", - "Grenada": "Grenada", - "Guadeloupe": "Guadeloupe", - "Guam": "Guam", - "Guatemala": "Guatemala", - "Guinea": "Guinea", - "Guinea-Bissau": "Guinea-Bissau", - "Guyana": "Guyana", - "Haiti": "Haiti", - "Honduras": "Honduras", - "Hungary": "Hungary", - "Iceland": "Iceland", - "India": "India", - "Indonesia": "Indonesia", - "Iran (Islamic Republic of)": "Iran", - "Iraq": "Iraq", - "Ireland": "Ireland", - "Israel": "Israel", - "Italy": "Italy", - "Jamaica": "Jamaica", - "Japan": "Japan", - "Jordan": "Jordan", - "Kazakhstan": "Kazakhstan", - "Kenya": "Kenya", - "Kiribati": "Kiribati", - "Kuwait": "Kuwait", - "Kyrgyzstan": "Kyrgyzstan", - "Latvia": "Latvia", - "Lebanon": "Lebanon", - "Lesotho": "Lesotho", - "Liberia": "Liberia", - "Libya": "Libya", - "Lithuania": "Lithuania", - "Luxembourg": "Luxembourg", - "Madagascar": "Madagascar", - "Malawi": "Malawi", - "Malaysia": "Malaysia", - "Maldives": "Maldives", - "Mali": "Mali", - "Malta": "Malta", - "Martinique": "Martinique", - "Mauritania": "Mauritania", - "Mauritius": "Mauritius", - "Mayotte": "Mayotte", - "Mexico": "Mexico", - "Micronesia (Fed. States of)": "Micronesia (country)", - "Mongolia": "Mongolia", - "Montenegro": "Montenegro", - "Morocco": "Morocco", - "Mozambique": "Mozambique", - "Myanmar": "Myanmar", - "Namibia": "Namibia", - "Nepal": "Nepal", - "Netherlands": "Netherlands", - "New Caledonia": "New Caledonia", - "New Zealand": "New Zealand", - "Nicaragua": "Nicaragua", - "Niger": "Niger", - "Nigeria": "Nigeria", - "North Macedonia": "North Macedonia", - "Norway": "Norway", - "Oman": "Oman", - "Pakistan": "Pakistan", - "Panama": "Panama", - "Papua New Guinea": "Papua New Guinea", - "Paraguay": "Paraguay", - "Peru": "Peru", - "Philippines": "Philippines", - "Poland": "Poland", - "Portugal": "Portugal", - "Puerto Rico": "Puerto Rico", - "Qatar": "Qatar", - "Republic of Korea": "South Korea", - "Republic of Moldova": "Moldova", - "Reunion": "Reunion", - "Romania": "Romania", - "Russian Federation": "Russia", - "Rwanda": "Rwanda", - "Saint Lucia": "Saint Lucia", - "Samoa": "Samoa", - "Sao Tome and Principe": "Sao Tome and Principe", - "Saudi Arabia": "Saudi Arabia", - "Senegal": "Senegal", - "Serbia": "Serbia", - "Seychelles": "Seychelles", - "Sierra Leone": "Sierra Leone", - "Singapore": "Singapore", - "Slovakia": "Slovakia", - "Slovenia": "Slovenia", - "Solomon Islands": "Solomon Islands", - "Somalia": "Somalia", - "South Africa": "South Africa", - "South Sudan": "South Sudan", - "Spain": "Spain", - "Sri Lanka": "Sri Lanka", - "St. Vincent and the Grenadines": "Saint Vincent and the Grenadines", - "State of Palestine": "Palestine", - "Sudan": "Sudan", - "Suriname": "Suriname", - "Sweden": "Sweden", - "Switzerland": "Switzerland", - "Syrian Arab Republic": "Syria", - "Tajikistan": "Tajikistan", - "Thailand": "Thailand", - "Timor-Leste": "East Timor", - "Togo": "Togo", - "Tonga": "Tonga", - "Trinidad and Tobago": "Trinidad and Tobago", - "Tunisia": "Tunisia", - "Turkey": "Turkey", - "Turkmenistan": "Turkmenistan", - "Uganda": "Uganda", - "Ukraine": "Ukraine", - "United Arab Emirates": "United Arab Emirates", - "United Kingdom": "United Kingdom", - "United Republic of Tanzania": "Tanzania", - "United States Virgin Islands": "United States Virgin Islands", - "United States of America": "United States", - "Uruguay": "Uruguay", - "Uzbekistan": "Uzbekistan", - "Vanuatu": "Vanuatu", - "Venezuela (Bolivarian Republic of)": "Venezuela", - "Viet Nam": "Vietnam", - "Western Sahara": "Western Sahara", - "Yemen": "Yemen", - "Zambia": "Zambia", - "Zimbabwe": "Zimbabwe", - "China, Taiwan Province of China": "Taiwan", - "Dem. People's Rep. of Korea": "North Korea", - "Dem. Republic of the Congo": "Democratic Republic of Congo", - "Lao People's Dem. Republic": "Laos" - } \ No newline at end of file diff --git a/etl/steps/data/garden/education/2024-12-11/people_with_education.yml b/etl/steps/data/garden/education/2024-12-11/people_with_education.meta.yml similarity index 56% rename from etl/steps/data/garden/education/2024-12-11/people_with_education.yml rename to etl/steps/data/garden/education/2024-12-11/people_with_education.meta.yml index d16b7cfc417..b24e45ac0c9 100644 --- a/etl/steps/data/garden/education/2024-12-11/people_with_education.yml +++ b/etl/steps/data/garden/education/2024-12-11/people_with_education.meta.yml @@ -7,54 +7,37 @@ definitions: - Global Education display: numDecimalPlaces: 1 - short_unit: '%' - unit: '%' + short_unit: "%" + unit: "%" description_key: - - Historical data for educational attainment between 1870 to 2005 comes from van Zanden, J. et al. (2014). - - Data for 2010 and 2015 is sourced via World Bank and is based on the Wittgenstein Centre for Demography and Global Human Capita. These projections are based on collected census and survey data for the base year 2010 and the Medium Shared Socioeconomic Pathways (SSP2) projection model. The SSP2 is a middle-of-the-road scenario that combines medium fertility with medium mortality, medium migration, and the Global Education Trend (GET) education scenario. For more information and other projection models, consult the Wittgenstein Centre for Demography and Global Human Capital's website - http://www.oeaw.ac.at/vid/dataexplorer/. - - Data for 2020 onwards is also based on the Medium Shared Socioeconomic Pathways (SSP2) Wittgenstein Centre for Demography and Global Human Capita projections. However, these have been updated to reflect recent global demographic changes, including COVID-19 impacts, changes in mortality, fertility, and migration trends, while maintaining the original long-term assumptions. For more information, see https://zenodo.org/records/7921989. + - Historical data for educational attainment between 1870 to 1950 comes from van Zanden, J. et al. (2014). + - "Data for 1950 to 2015 is sourced from the Wittgenstein Centre Human Capital Centre. These projections are based on collected census and survey data. The SSP2 is a middle-of-the-road scenario that combines medium fertility with medium mortality, medium migration, and the Global Education Trend (GET) education scenario. For more information and other projection models, consult the Wittgenstein Centre for Demography and Global Human Capital's website: https://dataexplorer.wittgensteincentre.org/." + - Data for 2020 onwards is also based on the Medium Shared Socioeconomic Pathways (SSP2) Wittgenstein Centre for Demography and Global Human Capital projections. For more information, see https://pure.iiasa.ac.at/id/eprint/19487/. -# Learn more about the available fields: -# http://docs.owid.io/projects/etl/architecture/metadata/reference/dataset/ dataset: update_period_days: 365 + title: People with formal basic education (Wittgenstein Centre, OECD) - -# Learn more about the available fields: -# http://docs.owid.io/projects/etl/architecture/metadata/reference/tables/ - title: Wittgenstein Center Population and Human Capital Projections tables: - wittgenstein_center_data: - # Learn more about the available fields: - # http://docs.owid.io/projects/etl/architecture/metadata/reference/indicator/ + people_with_education: variables: - no_formal_education: + no_basic_education: title: Share of population with no education description_short: Share of people aged 15 or older who have not received some kind of formal [primary](#dod:primary-education), [secondary](#dod:secondary-education), or [tertiary](#dod:tertiary-education) education. - description_processing: > + description_processing: |- For each country and year, the share of the population aged 15 and older with no formal education was calculated. This involved summing up the population with no formal education and dividing it by the total population aged 15 and older for each country and year, then converting this ratio into a percentage. - - A global estimate was calculated for each year by summing the total population aged 15 and older across all countries and the total population within this age group with no formal education. The share of the global population aged 15+ with no formal education was then computed for each year. - - - Historical data from van Zanden, J. et al. (2014) with estimates from 1870 to 2005 was combined with educational attainment estimates from Wittgenstein Centre for Demography and Global Human Capita. - population_with_basic_education: + Historical data from van Zanden, J. et al. (2014) with estimates from 1870 to 1950 was combined with educational attainment estimates from Wittgenstein Centre for Demography and Global Human Capita. + basic_education: title: Share of population with at least some basic education description_short: Share of people aged 15 or older who have received at least some kind of formal [primary](#dod:primary-education), [secondary](#dod:secondary-education), or [tertiary](#dod:tertiary-education) education. - description_processing: > + description_processing: |- For each country and year, the share of the population aged 15 and older with no formal education was calculated. This involved summing up the population with no formal education and dividing it by the total population aged 15 and older for each country and year, then converting this ratio into a percentage. - - A global estimate was calculated for each year by summing the total population aged 15 and older across all countries and the total population within this age group with no formal education. The share of the global population aged 15+ with no formal education was then computed for each year. - - - Historical data from van Zanden, J. et al. (2014) with estimates from 1870 to 2005 was combined with educational attainment estimates from Wittgenstein Centre for Demography and Global Human Capita. - - + Historical data from van Zanden, J. et al. (2014) with estimates from 1870 to 1950 was combined with educational attainment estimates from Wittgenstein Centre for Demography and Global Human Capita. To calculate the share of the population with at least some basic education, the share of the population with no formal education was subtracted from 100%. diff --git a/etl/steps/data/garden/education/2024-12-11/people_with_education.py b/etl/steps/data/garden/education/2024-12-11/people_with_education.py index f05fbe4529d..4f9a79e7bad 100644 --- a/etl/steps/data/garden/education/2024-12-11/people_with_education.py +++ b/etl/steps/data/garden/education/2024-12-11/people_with_education.py @@ -21,69 +21,66 @@ def run(dest_dir: str) -> None: ds_oecd = paths.load_dataset("oecd_education") tb_oecd = ds_oecd.read("oecd_education") - # Filter the for years above 2020 (New Wittgenstein Center data starts at 2020) - tb_oecd = tb_oecd.loc[ - tb_oecd["year"] < 2020, ["country", "year", "no_formal_education", "population_with_basic_education"] - ].reset_index(drop=True) - # # Process data. # - # tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) - tb_wc[ - (tb_wc.scenario == 2) - & (tb_wc.country == "World") - & (tb_wc.sex == "total") - & (tb_wc.age == "total") - & (tb_wc.education == "no_education") - ] - # Filter the dataset for individuals aged 15 and older and with 'No Education' - age_15_and_above = tb_wc["age_group"].apply(lambda x: x not in ["0-4", "5-9", "10-14"]) - no_education = tb_wc["educational_attainment"] == "No Education" - filtered_df = tb_wc[age_15_and_above & no_education] - - # Calculate the share of people 15+ with no formal education for each country and year - # First, calculate the total population aged 15+ for each country and year - total_population_15_plus = tb_wc[age_15_and_above].groupby(["country", "year"])["population"].sum() - - # Then, calculate the population with no formal education for each country and year - no_education_population = filtered_df.groupby(["country", "year"])["population"].sum() - - # Calculate the share - share_no_education = (no_education_population / total_population_15_plus) * 100 - - # Create a yearly global estimate - # Sum up the total population aged 15+ and no education population for each year - global_total_population_15_plus = total_population_15_plus.groupby("year").sum() - global_no_education_population = no_education_population.groupby("year").sum() - - # Calculate the global share for each year - global_share_no_education = (global_no_education_population / global_total_population_15_plus) * 100 - # Renaming the columns for clarity - share_no_education = share_no_education.rename("no_formal_education") - global_share_no_education = global_share_no_education.rename("no_formal_education") - - # Resetting the index to prepare for concatenation - share_no_education = share_no_education.reset_index() - global_share_no_education = global_share_no_education.reset_index() - global_share_no_education["country"] = "World" - - tb_combined = pr.concat([global_share_no_education, share_no_education]) - tb_combined["population_with_basic_education"] = 100 - tb_combined["no_formal_education"] - tb_combined_with_oecd = pr.merge( - tb_combined, - tb_below_2020, - on=["country", "year", "no_formal_education", "population_with_basic_education"], - how="outer", - ) - tb_combined_with_oecd = tb_combined_with_oecd.set_index(["country", "year"], verify_integrity=True) - tb_combined_with_oecd.metadata = tb.metadata + # Prepare OECD + tb_oecd = make_oecd(tb_oecd) + countries_oecd = set(tb_oecd["country"].unique()) + + # Prepare Wittgenstein Center + tb_wc = make_wc(tb_wc) + countries_wc = set(tb_wc["country"].unique()) + + # Combine tables + tb = pr.concat([tb_oecd, tb_wc], short_name="education") + # Keep only relevant countries + countries = countries_oecd.intersection(countries_wc) + tb = tb.loc[tb["country"].isin(countries)] + # Format + tb = tb.format(["country", "year"], short_name="people_with_education") # # Save outputs. # # Create a new garden dataset with the same metadata as the meadow dataset. - ds_garden = create_dataset(dest_dir, tables=[tb_combined_with_oecd], check_variables_metadata=True) + ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True) # Save changes in the new garden dataset. ds_garden.save() + + +def make_oecd(tb): + # Filter the for years above 2020 (New Wittgenstein Center data starts at 2020) + tb = tb.loc[ + tb["year"] < 1950, ["country", "year", "no_formal_education", "population_with_basic_education"] + ].reset_index(drop=True) + + # Rename columns + tb = tb.rename( + columns={ + "no_formal_education": "no_basic_education", + "population_with_basic_education": "basic_education", + } + ) + return tb + + +def make_wc(tb): + tb = tb.loc[ + (tb["scenario"] == 2) + # & (tb_wc["country"] == "World") + & (tb["sex"] == "total") + & (tb["age"] == "15+") + & (tb["education"].isin(["no_education"])), + ["country", "year", "prop"], + ] + assert tb.groupby(["country", "year"]).size().max() == 1, "Only 1 rows per country-year accepted" + + # Estimate "no formal education" + tb = tb.rename(columns={"prop": "no_basic_education"}) + + # Estimate "with basic education" + tb["basic_education"] = 100 - tb["no_basic_education"] + + return tb diff --git a/etl/steps/data/grapher/education/2024-12-11/people_with_education.py b/etl/steps/data/grapher/education/2024-12-11/people_with_education.py new file mode 100644 index 00000000000..7de59114b85 --- /dev/null +++ b/etl/steps/data/grapher/education/2024-12-11/people_with_education.py @@ -0,0 +1,32 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("people_with_education") + + # Read table from garden dataset. + tables = list(ds_garden) + + # + # Process data. + # + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=tables, check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save()