diff --git a/dag/health.yml b/dag/health.yml index 825473ed8b0..055219b6aab 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -563,6 +563,35 @@ steps: data://grapher/who/2024-04-26/avian_influenza_ah5n1: - data://garden/who/2024-04-26/avian_influenza_ah5n1 + + # WHO Preventive Chemotherapy - Neglected Tropical Diseases + # Lymphatic filariasis + data://meadow/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis: + - snapshot://neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.xlsx + data://garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis: + - data://meadow/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis + - data://garden/regions/2023-01-01/regions + data://grapher/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis: + - data://garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis + + # Schistosomiasis + data://meadow/neglected_tropical_diseases/2024-05-02/schistosomiasis: + - snapshot://neglected_tropical_diseases/2024-05-02/schistosomiasis.xlsx + data://garden/neglected_tropical_diseases/2024-05-02/schistosomiasis: + - data://meadow/neglected_tropical_diseases/2024-05-02/schistosomiasis + - data://garden/regions/2023-01-01/regions + data://grapher/neglected_tropical_diseases/2024-05-02/schistosomiasis: + - data://garden/neglected_tropical_diseases/2024-05-02/schistosomiasis + + # Soil-transmitted helminthiases + data://meadow/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases: + - snapshot://neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.xlsx + data://garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases: + - data://meadow/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases + - data://garden/regions/2023-01-01/regions + data://grapher/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases: + - data://garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases + # Neglected Tropical Diseases Funding data://meadow/neglected_tropical_diseases/2024-05-18/funding: - snapshot://neglected_tropical_diseases/2024-05-18/funding.xlsx @@ -570,3 +599,4 @@ steps: - data://meadow/neglected_tropical_diseases/2024-05-18/funding data://grapher/neglected_tropical_diseases/2024-05-18/funding: - data://garden/neglected_tropical_diseases/2024-05-18/funding + diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.countries.json b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.countries.json new file mode 100644 index 00000000000..86829941884 --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.countries.json @@ -0,0 +1,84 @@ +{ + "American Samoa": "American Samoa", + "Angola": "Angola", + "Bangladesh": "Bangladesh", + "Benin": "Benin", + "Brazil": "Brazil", + "Brunei Darussalam": "Brunei", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cabo Verde": "Cape Verde", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Comoros": "Comoros", + "Congo": "Congo", + "Cook Islands": "Cook Islands", + "Costa Rica": "Costa Rica", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Dominican Republic": "Dominican Republic", + "Egypt": "Egypt", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Ethiopia": "Ethiopia", + "Fiji": "Fiji", + "French Polynesia": "French Polynesia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Ghana": "Ghana", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "India": "India", + "Indonesia": "Indonesia", + "Kenya": "Kenya", + "Kiribati": "Kiribati", + "Lao People's Democratic Republic": "Laos", + "Liberia": "Liberia", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Maldives": "Maldives", + "Mali": "Mali", + "Marshall Islands": "Marshall Islands", + "Mauritius": "Mauritius", + "Micronesia (Federated States of)": "Micronesia (country)", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Nepal": "Nepal", + "New Caledonia": "New Caledonia", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Niue": "Niue", + "Palau": "Palau", + "Papua New Guinea": "Papua New Guinea", + "Philippines": "Philippines", + "Rwanda": "Rwanda", + "Samoa": "Samoa", + "Sao Tome and Principe": "Sao Tome and Principe", + "Senegal": "Senegal", + "Seychelles": "Seychelles", + "Sierra Leone": "Sierra Leone", + "Solomon Islands": "Solomon Islands", + "South Sudan": "South Sudan", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Tonga": "Tonga", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tuvalu": "Tuvalu", + "Uganda": "Uganda", + "United Republic of Tanzania": "Tanzania", + "Vanuatu": "Vanuatu", + "Viet Nam": "Vietnam", + "Wallis and Futuna": "Wallis and Futuna", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe" +} \ No newline at end of file diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.meta.yml b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.meta.yml new file mode 100644 index 00000000000..a063cb71708 --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.meta.yml @@ -0,0 +1,73 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Global Health + # - Neglected Tropical Diseases # Need to add once the tag is upgraded to topic tag and we have a slug for the page + processing_level: minor +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + title: Preventive Chemotherapy (PC) Data Portal +tables: + lymphatic_filariasis: + variables: + current_status_of_mda: + title: Current status of MDA + unit: "" + number_of_ius_covered: + title: Number of implementation units covered + unit: "" + display: + numDecimalPlaces: 0 + geographical_coverage__pct: + title: Geographical coverage (%) + description_short: "Geographical coverage of preventive chemotherapy for [lymphatic filariasis](#dod:lymphatic-filariasis)." + unit: "%" + display: + numDecimalPlaces: 1 + total_population_of_ius: + title: Total population of implementation units + description_short: "Total population of implementation units. Implementation units are defined as geographic areas where health interventions are specifically designed, executed, and monitored to control or eliminate neglected tropical diseases effectively." + unit: "people" + display: + numDecimalPlaces: 0 + reported_number_of_people_treated: + title: Reported number of people treated + description_short: "Reported number of people treated for [lymphatic filariasis](#dod:lymphatic-filariasis)." + unit: "people" + display: + numDecimalPlaces: 0 + programme__drug__coverage__pct: + title: Programme coverage + description_short: "Programme coverage for preventive chemotherapy for [lymphatic filariasis](#dod:lymphatic-filariasis). The share of people who require preventive chemotherapy for [lymphatic filariasis](#dod:lymphatic-filariasis) who actually receive it." + unit: "%" + short_unit: "%" + display: + numDecimalPlaces: 3 + lymphatic_filariasis_national: + variables: + national_coverage__pct: + title: National coverage + description_short: "Drug coverage out of estimated population who require it." + unit: "%" + short_unit: "%" + display: + numDecimalPlaces: 1 + population_requiring_pc_for_lf: + title: Population requiring preventive chemotherapy for lymphatic filariasis + description_short: "Population requiring preventive chemotherapy for [lymphatic filariasis](#dod:lymphatic-filariasis)." + unit: "people" + display: + numDecimalPlaces: 0 + estimated_number_of_people_treated: + title: Estimated number of people treated + description_short: "Estimated number of people treated for [lymphatic filariasis](#dod:lymphatic-filariasis)." + description_processing: To calculate the estimated number of people treated, we multiply the population requiring preventive chemotherapy by the national coverage. + unit: "people" + display: + numDecimalPlaces: 0 + processing_level: major diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py new file mode 100644 index 00000000000..9226808e2c0 --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py @@ -0,0 +1,84 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from typing import List + +import numpy as np +from owid.catalog import Dataset, Table +from owid.catalog import processing as pr + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) +REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania", "World"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("lymphatic_filariasis") + # Load regions dataset. + ds_regions = paths.load_dataset("regions") + + # Read table from meadow dataset. + tb = ds_meadow["lymphatic_filariasis"].reset_index() + # + # Harmonize countries + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + # Process data. + # There are separate rows for each combination of drugs used, but this is duplicate for `national_coverage__pct`, so we will extract this column and create a separate table for it + + # In many cases the are two identical values for 'national_coverage__pct', for each country year, this de-duplicates them + tb_nat = ( + tb[["country", "year", "national_coverage__pct", "population_requiring_pc_for_lf"]].copy().drop_duplicates() + ) + tb_nat["estimated_number_of_people_treated"] = ( + tb_nat["national_coverage__pct"] * tb_nat["population_requiring_pc_for_lf"] / 100 + ) + tb_nat = add_regions_to_selected_vars( + tb_nat, + cols=["country", "year", "population_requiring_pc_for_lf", "estimated_number_of_people_treated"], + ds_regions=ds_regions, + ) + # There are a few cases with two values for some country-year combos, here we drop them because we are not sure which is the correct value + tb_nat = tb_nat.drop_duplicates(subset=["country", "year"]) + tb_nat.metadata.short_name = "lymphatic_filariasis_national" + # Drop `national_coverage_pct` from tb + tb = tb.drop( + columns=["national_coverage__pct", "population_requiring_pc_for_lf", "region", "country_code", "mapping_status"] + ) + # Replace "No data" with NaN + tb = tb.replace("No data", np.nan) + # Format the tables + tb = tb.format(["country", "year", "type_of_mda"]) + tb_nat = tb_nat.format(["country", "year"]) + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb, tb_nat], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def add_regions_to_selected_vars(tb: Table, cols: List[str], ds_regions: Dataset) -> Table: + """ + Adding regions to selected variables in the table and then combining the table with the original table + """ + + tb_agg = geo.add_regions_to_table( + tb[cols], + regions=REGIONS, + ds_regions=ds_regions, + min_num_values_per_year=1, + ) + tb_agg = tb_agg[tb_agg["country"].isin(REGIONS)] + tb = pr.concat([tb, tb_agg], axis=0, ignore_index=True) + + return tb diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.countries.json b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.countries.json new file mode 100644 index 00000000000..0a002795c3e --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.countries.json @@ -0,0 +1,60 @@ +{ + "Angola": "Angola", + "Benin": "Benin", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "China": "China", + "Congo": "Congo", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Dominican Republic": "Dominican Republic", + "Egypt": "Egypt", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Ghana": "Ghana", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Indonesia": "Indonesia", + "Iraq": "Iraq", + "Kenya": "Kenya", + "Lao People's Democratic Republic": "Laos", + "Liberia": "Liberia", + "Libya": "Libya", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Mali": "Mali", + "Mauritania": "Mauritania", + "Mozambique": "Mozambique", + "Namibia": "Namibia", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Oman": "Oman", + "Philippines": "Philippines", + "Rwanda": "Rwanda", + "Sao Tome and Principe": "Sao Tome and Principe", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Sierra Leone": "Sierra Leone", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Sudan": "South Sudan", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Togo": "Togo", + "Uganda": "Uganda", + "United Republic of Tanzania": "Tanzania", + "Venezuela (Bolivarian Republic of)": "Venezuela", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe" +} \ No newline at end of file diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.meta.yml b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.meta.yml new file mode 100644 index 00000000000..f54f6863468 --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.meta.yml @@ -0,0 +1,59 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Global Health + # - Neglected Tropical Diseases # Need to add once the tag is upgraded to topic tag and we have a slug for the page + processing_level: minor +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + title: Preventive Chemotherapy (PC) Data Portal +tables: + schistosomiasis: + variables: + population_requiring_pc_for_sch_annually: + title: Population requiring preventive chemotherapy annually + description_short: "Population requiring preventive chemotherapy for schistosomiasis." + unit: "people" + display: + numDecimalPlaces: 0 + sac_population_requiring_pc_for_sch_annually: + title: School-age children population requiring preventive chemotherapy annually + description_short: "School-age children population requiring preventive chemotherapy for schistosomiasis." + unit: "school-age children" + display: + numDecimalPlaces: 0 + number_of_people_targeted: + title: Number of people targeted + description_short: "Number of people targeted for preventive chemotherapy for schistosomiasis." + unit: "people" + display: + numDecimalPlaces: 0 + reported_number_of_people_treated: + title: Reported number of people treated + description_short: "Reported number of people treated for schistosomiasis." + unit: "people" + display: + numDecimalPlaces: 0 + reported_number_of_sac_treated: + title: Reported number of school-age children treated + description_short: "Reported number of school-age children treated for schistosomiasis." + unit: "school-age children" + display: + numDecimalPlaces: 0 + programme_coverage__pct: + title: Programme coverage + description_short: "Programme coverage for preventive chemotherapy for schistosomiasis." + unit: "%" + display: + numDecimalPlaces: 1 + national_coverage__pct: + title: National coverage (%) + description_short: "Drug coverage out of estimated population who require it." + unit: "%" + display: + numDecimalPlaces: 1 diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.py b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.py new file mode 100644 index 00000000000..cc24350cbca --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/schistosomiasis.py @@ -0,0 +1,48 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import numpy as np + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) +REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania", "World"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("schistosomiasis") + # Load regions dataset. + ds_regions = paths.load_dataset("regions") + # Read table from meadow dataset. + tb = ds_meadow["schistosomiasis"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb = tb.drop(columns=["region", "age_group", "country_code"]) + tb = geo.add_regions_to_table( + tb, + regions=REGIONS, + ds_regions=ds_regions, + min_num_values_per_year=1, + ) + # Replace regional values in percentage columns with NaN + tb.loc[tb["country"].isin(REGIONS), ["programme_coverage__pct", "national_coverage__pct"]] = np.nan + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.countries.json b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.countries.json new file mode 100644 index 00000000000..6ee04e62bde --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.countries.json @@ -0,0 +1,140 @@ +{ + "Afghanistan": "Afghanistan", + "American Samoa": "American Samoa", + "Angola": "Angola", + "Antigua and Barbuda": "Antigua and Barbuda", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Azerbaijan": "Azerbaijan", + "Bahamas": "Bahamas", + "Bangladesh": "Bangladesh", + "Belize": "Belize", + "Benin": "Benin", + "Bhutan": "Bhutan", + "Bolivia (Plurinational State of)": "Bolivia", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Brunei Darussalam": "Brunei", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cabo Verde": "Cape Verde", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Comoros": "Comoros", + "Congo": "Congo", + "Cook Islands": "Cook Islands", + "Costa Rica": "Costa Rica", + "Cuba": "Cuba", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Democratic People's Republic of Korea": "North Korea", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Djibouti": "Djibouti", + "Dominica": "Dominica", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Fiji": "Fiji", + "French Polynesia": "French Polynesia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Georgia": "Georgia", + "Ghana": "Ghana", + "Grenada": "Grenada", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "India": "India", + "Indonesia": "Indonesia", + "Iraq": "Iraq", + "Jamaica": "Jamaica", + "Jordan": "Jordan", + "Kenya": "Kenya", + "Kiribati": "Kiribati", + "Kyrgyzstan": "Kyrgyzstan", + "Lao People's Democratic Republic": "Laos", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Maldives": "Maldives", + "Mali": "Mali", + "Marshall Islands": "Marshall Islands", + "Mauritania": "Mauritania", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Micronesia (Federated States of)": "Micronesia (country)", + "Montenegro": "Montenegro", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nauru": "Nauru", + "Nepal": "Nepal", + "New Caledonia": "New Caledonia", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Niue": "Niue", + "Pakistan": "Pakistan", + "Palau": "Palau", + "Panama": "Panama", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Republic of Moldova": "Moldova", + "Romania": "Romania", + "Rwanda": "Rwanda", + "Saint Kitts and Nevis": "Saint Kitts and Nevis", + "Saint Lucia": "Saint Lucia", + "Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines", + "Samoa": "Samoa", + "Sao Tome and Principe": "Sao Tome and Principe", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Sierra Leone": "Sierra Leone", + "Solomon Islands": "Solomon Islands", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Sudan": "South Sudan", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Syrian Arab Republic": "Syria", + "Tajikistan": "Tajikistan", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Tonga": "Tonga", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkey": "Turkey", + "Tuvalu": "Tuvalu", + "Uganda": "Uganda", + "United Republic of Tanzania": "Tanzania", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Vanuatu": "Vanuatu", + "Venezuela (Bolivarian Republic of)": "Venezuela", + "Viet Nam": "Vietnam", + "Wallis and Futuna": "Wallis and Futuna", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "Malasya": "Malaysia", + "The former Yugoslav Republic of Macedonia": "North Macedonia" +} \ No newline at end of file diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.meta.yml b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.meta.yml new file mode 100644 index 00000000000..42d04ce1f1a --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.meta.yml @@ -0,0 +1,103 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Global Health + # - Neglected Tropical Diseases # Need to add once the tag is upgraded to topic tag and we have a slug for the page + processing_level: minor +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + title: Preventive Chemotherapy (PC) Data Portal +tables: + soil_transmitted_helminthiases_pre_sac: + variables: + number_targeted: + title: Number of pre-school age children targeted + description_short: "Number of pre-school age children targeted for preventive chemotherapy for soil transmitted helminthiases." + unit: "pre-school age children" + display: + numDecimalPlaces: 0 + reported_number_treated: + title: Reported number of pre-school age children treated + description_short: "Reported number of pre-school age children treated for soil transmitted helminthiases." + unit: "pre-school age children" + display: + numDecimalPlaces: 0 + programme_coverage__pct: + title: Programme coverage of pre-school age children + description_short: "Programme coverage of pre-school age children for preventive chemotherapy for soil transmitted helminthiases." + unit: "%" + short_unit: "%" + display: + numDecimalPlaces: 1 + soil_transmitted_helminthiases_national_pre_sac: + variables: + national_coverage__pre_sac__pct: + title: National coverage pre-school age children + description_short: "Drug coverage out of estimated population of pre-school age children who require it." + unit: "%" + short_unit: "%" + display: + numDecimalPlaces: 1 + population_requiring_pc_for_sth__pre_sac: + title: Population of pre-school age children requiring preventive chemotherapy + description_short: "Population of pre-school age children requiring preventive chemotherapy for soil transmitted helminthiases." + unit: "pre-school age children" + display: + numDecimalPlaces: 0 + estimated_number_of_pre_sac_treated: + title: Estimated number of pre-school age children treated + description_short: "Estimated number of pre-school age children treated for soil transmitted helminthiases." + description_processing: To calculate the estimated number of people treated, we multiply the population requiring preventive chemotherapy by the national coverage. + unit: "pre-school age children" + processing_level: minor + display: + numDecimalPlaces: 0 + soil_transmitted_helminthiases_sac: + variables: + number_targeted: + title: Number of school age children targeted + unit: "school age children" + description_short: "Number of school age children targeted for preventive chemotherapy for soil transmitted helminthiases." + display: + numDecimalPlaces: 0 + reported_number_treated: + title: Reported number of school age children treated + unit: "school age children" + description_short: "Reported number of school age children treated for soil transmitted helminthiases." + display: + numDecimalPlaces: 0 + programme_coverage__pct: + title: Programme coverage of school age children + description_short: "Programme coverage of school age children for preventive chemotherapy for soil transmitted helminthiases." + unit: "%" + short_unit: "%" + display: + numDecimalPlaces: 1 + soil_transmitted_helminthiases_national_sac: + variables: + national_coverage__sac__pct: + title: National coverage school age children + unit: "%" + description_short: "Drug coverage out of estimated population of school age children who require it." + short_unit: "%" + display: + numDecimalPlaces: 1 + population_requiring_pc_for_sth__sac: + title: Population of school age children requiring preventive chemotherapy for soil transmitted helminthiases + unit: "school age children" + description_short: "Population of school age children requiring preventive chemotherapy for soil transmitted helminthiases." + display: + numDecimalPlaces: 0 + estimated_number_of_sac_treated: + title: Estimated number of pre-school age children treated + description_short: "Estimated number of pre-school age children treated for soil transmitted helminthiases." + description_processing: To calculate the estimated number of people treated, we multiply the population requiring preventive chemotherapy by the national coverage. + unit: "pre-school age children" + processing_level: minor + display: + numDecimalPlaces: 0 diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py new file mode 100644 index 00000000000..3227f1d1de7 --- /dev/null +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py @@ -0,0 +1,136 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from typing import List + +from owid.catalog import Dataset, Table +from owid.catalog import processing as pr + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) +REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania", "World"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("soil_transmitted_helminthiases") + # Load regions dataset. + ds_regions = paths.load_dataset("regions") + # Read table from meadow dataset. + tb = ds_meadow["soil_transmitted_helminthiases"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + # Split out the national coverage variables into separate tables, SAC = school age children, pre-SAC = pre-school age children + tb_nat_sac = ( + tb[["country", "year", "national_coverage__sac__pct", "population_requiring_pc_for_sth__sac"]] + .copy() + .drop_duplicates() + .dropna(subset=["national_coverage__sac__pct"]) + .drop_duplicates(subset=["country", "year"]) + ) + # Calculating the number of SAC treated + tb_nat_sac["estimated_number_of_sac_treated"] = ( + tb["national_coverage__sac__pct"] * tb["population_requiring_pc_for_sth__sac"] / 100 + ) + tb_nat_pre_sac = ( + tb[["country", "year", "national_coverage__pre_sac__pct", "population_requiring_pc_for_sth__pre_sac"]] + .copy() + .drop_duplicates() + .dropna(subset=["national_coverage__pre_sac__pct"]) + .drop_duplicates(subset=["country", "year"]) + ) + # Calculating the number of pre-SAC treated + tb_nat_pre_sac["estimated_number_of_pre_sac_treated"] = ( + tb["national_coverage__pre_sac__pct"] * tb["population_requiring_pc_for_sth__pre_sac"] / 100 + ) + # Adding region aggregates to selected variables + tb_nat_sac = add_regions_to_selected_vars( + tb_nat_sac, + cols=["country", "year", "population_requiring_pc_for_sth__sac", "estimated_number_of_sac_treated"], + ds_regions=ds_regions, + ) + tb_nat_pre_sac = add_regions_to_selected_vars( + tb_nat_pre_sac, + cols=["country", "year", "population_requiring_pc_for_sth__pre_sac", "estimated_number_of_pre_sac_treated"], + ds_regions=ds_regions, + ) + # Splitting the table into two tables for pre-sac and sac + age_groups = ["pre_sac", "sac"] + tbs = {} + for age_group in age_groups: + cols = [ + "country", + "year", + f"drug_combination__{age_group}", + f"number_of_{age_group}_targeted", + f"reported_number_of_{age_group}_treated", + f"programme_coverage__{age_group}__pct", + ] + tbs[f"tb_{age_group}"] = tb[cols].copy() + tbs[f"tb_{age_group}"].columns = [ + "country", + "year", + "drug_combination", + "number_targeted", + "reported_number_treated", + "programme_coverage__pct", + ] + tbs[f"tb_{age_group}"] = tbs[f"tb_{age_group}"].dropna( + subset=[ + "drug_combination", + "number_targeted", + "reported_number_treated", + "programme_coverage__pct", + ], + ) + # There are some rows which seem to be erroneous duplicates, we will drop these e.g. Burundi 2015 for sac + tbs[f"tb_{age_group}"] = tbs[f"tb_{age_group}"].drop_duplicates(subset=["country", "year", "drug_combination"]) + + tb_pre_sac = tbs["tb_pre_sac"] + tb_sac = tbs["tb_sac"] + + tb_sac = tb_sac.format(["country", "year", "drug_combination"], short_name="soil_transmitted_helminthiases_sac") + tb_pre_sac = tb_pre_sac.format( + ["country", "year", "drug_combination"], short_name="soil_transmitted_helminthiases_pre_sac" + ) + tb_nat_sac = tb_nat_sac.format(["country", "year"], short_name="soil_transmitted_helminthiases_national_sac") + tb_nat_pre_sac = tb_nat_pre_sac.format( + ["country", "year"], short_name="soil_transmitted_helminthiases_national_pre_sac" + ) + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, + tables=[tb_sac, tb_pre_sac, tb_nat_sac, tb_nat_pre_sac], + check_variables_metadata=True, + default_metadata=ds_meadow.metadata, + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def add_regions_to_selected_vars(tb: Table, cols: List[str], ds_regions: Dataset) -> Table: + """ + Adding regions to selected variables in the table and then combining the table with the original table + """ + + tb_agg = geo.add_regions_to_table( + tb[cols], + regions=REGIONS, + ds_regions=ds_regions, + min_num_values_per_year=1, + ) + tb_agg = tb_agg[tb_agg["country"].isin(REGIONS)] + tb = pr.concat([tb, tb_agg], axis=0, ignore_index=True) + + return tb diff --git a/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py new file mode 100644 index 00000000000..d66c00738d0 --- /dev/null +++ b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py @@ -0,0 +1,29 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("lymphatic_filariasis") + + # Read table from garden dataset. + tb = ds_garden["lymphatic_filariasis"] + tb_nat = ds_garden["lymphatic_filariasis_national"] + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb, tb_nat], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/schistosomiasis.py b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/schistosomiasis.py new file mode 100644 index 00000000000..22d80e83d42 --- /dev/null +++ b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/schistosomiasis.py @@ -0,0 +1,32 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("schistosomiasis") + + # Read table from garden dataset. + tb = ds_garden["schistosomiasis"] + + # + # Process data. + # + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py new file mode 100644 index 00000000000..c4a5367eea0 --- /dev/null +++ b/etl/steps/data/grapher/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py @@ -0,0 +1,33 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("soil_transmitted_helminthiases") + + # Read table from garden dataset. + tb_pre_sac = ds_garden["soil_transmitted_helminthiases_pre_sac"] + tb_sac = ds_garden["soil_transmitted_helminthiases_sac"] + tb_nat_pre_sac = ds_garden["soil_transmitted_helminthiases_national_sac"] + tb_nat_sac = ds_garden["soil_transmitted_helminthiases_national_pre_sac"] + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, + tables=[tb_pre_sac, tb_sac, tb_nat_pre_sac, tb_nat_sac], + check_variables_metadata=True, + default_metadata=ds_garden.metadata, + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py new file mode 100644 index 00000000000..1118bfa9fe2 --- /dev/null +++ b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py @@ -0,0 +1,36 @@ +"""Load a snapshot and create a meadow dataset.""" + +import numpy as np + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("lymphatic_filariasis.xlsx") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + tb = tb.rename(columns={"Country": "country", "Year": "year", "Type of MDA": "type_of_mda"}, errors="raise") + tb = tb.replace("No data", np.nan) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "type_of_mda"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/schistosomiasis.py b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/schistosomiasis.py new file mode 100644 index 00000000000..2902af5f166 --- /dev/null +++ b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/schistosomiasis.py @@ -0,0 +1,53 @@ +"""Load a snapshot and create a meadow dataset.""" + +import numpy as np +from owid.catalog import processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("schistosomiasis.xlsx") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + # Values to replace with NaN + values_to_replace = [ + "To be defined", + "No PC required", + "Surveillance", + " ", + ] # List of values you want to replace with NaN + + # Columns in which to replace the values + columns_to_check = [ + "Population requiring PC for SCH annually", + "SAC population requiring PC for SCH annually", + "Programme coverage (%)", + ] + + # Replacing the values + tb[columns_to_check] = tb[columns_to_check].replace(values_to_replace, np.nan) + tb["Programme coverage (%)"] = tb["Programme coverage (%)"].astype(float) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py new file mode 100644 index 00000000000..4cafdba50c6 --- /dev/null +++ b/etl/steps/data/meadow/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py @@ -0,0 +1,60 @@ +"""Load a snapshot and create a meadow dataset.""" + + +import numpy as np +import pandas as pd +from owid.catalog import processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("soil_transmitted_helminthiases.xlsx") + + # Load data from snapshot. + tb = snap.read() + tb = tb.drop(columns="country_code") + + # + # Process data. + # + values_to_replace = [ + "To be defined", + "No PC required", + "No data", + "No data available", + " ", + ] # List of values you want to replace with NaN + + # Columns in which to replace the values - so that they can be floats rather than objects + columns_to_check = [ + "Population requiring PC for STH, Pre-SAC", + "Programme coverage, Pre-SAC (%)", + "Population requiring PC for STH, SAC", + "Programme coverage, SAC (%)", + ] + + # Replacing the values + tb[columns_to_check] = tb[columns_to_check].replace(values_to_replace, np.nan) + + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + cols = ["country", "year", "Drug combination, Pre-SAC", "Drug combination, SAC"] + # For some reason format doesn't work here + # tb = tb.format(cols) + tb = tb.set_index(cols, verify_integrity=True) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py b/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py new file mode 100644 index 00000000000..cd0edc09d9d --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"neglected_tropical_diseases/{SNAPSHOT_VERSION}/lymphatic_filariasis.xlsx") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.xlsx.dvc b/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.xlsx.dvc new file mode 100644 index 00000000000..5aff99abae2 --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/lymphatic_filariasis.xlsx.dvc @@ -0,0 +1,33 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Preventive Chemotherapy (PC) Data Portal - Lymphatic filariasis + description: |- + Progress towards achieving the Roadmap targets for control and elimination of the Neglected Tropical Diseases. + + date_published: "2024-03-14" + title_snapshot: PCT Databank - Lymphatic filariasis + description_snapshot: |- + Lymphatic filariasis situational analyses, core implementation data and trend data are provided through direct access to the interactive reports. + + # Citation + producer: World Health Organization + citation_full: |- + PCT Databank - Lymphatic filariasis, World Health Organization (2024) + attribution_short: WHO + # Files + url_main: https://www.who.int/teams/control-of-neglected-tropical-diseases/data-platforms/pct-databank/lymphatic-filariasis + url_download: https://apps.who.int/neglected_diseases/ntddata/data/LF_data.xlsx?ua=1 + date_accessed: 2024-05-02 + + # License + license: + name: CC BY-NC-SA 3.0 IGO + url: https://www.who.int/about/policies/publishing/copyright + +outs: + - md5: 698b0341d985d5a568893f63dee4d5bd + size: 134496 + path: lymphatic_filariasis.xlsx diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.py b/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.py new file mode 100644 index 00000000000..3abc647e36c --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"neglected_tropical_diseases/{SNAPSHOT_VERSION}/schistosomiasis.xlsx") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.xlsx.dvc b/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.xlsx.dvc new file mode 100644 index 00000000000..d8302a945f7 --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/schistosomiasis.xlsx.dvc @@ -0,0 +1,33 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Preventive Chemotherapy (PC) Data Portal - Schistosomiasis + description: |- + Progress towards achieving the Roadmap targets for control and elimination of the Neglected Tropical Diseases. + + date_published: "2024-03-14" + title_snapshot: PCT Databank - Schistosomiasis + description_snapshot: |- + Schistosomiasis situational analyses, core implementation data and trend data are provided through direct access to the interactive reports. + + # Citation + producer: World Health Organization + citation_full: |- + PCT Databank - Schistosomiasis, World Health Organization (2024) + attribution_short: WHO + # Files + url_main: https://www.who.int/teams/control-of-neglected-tropical-diseases/data-platforms/pct-databank/schistosomiasis + url_download: https://apps.who.int/neglected_diseases/ntddata/data/SCH_data.xlsx?ua=1 + date_accessed: 2024-05-02 + + # License + license: + name: CC BY-NC-SA 3.0 IGO + url: https://www.who.int/about/policies/publishing/copyright + +outs: + - md5: b8e16cc7fb3aec5f8cdae5466123d914 + size: 87864 + path: schistosomiasis.xlsx diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py b/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py new file mode 100644 index 00000000000..205115749a0 --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"neglected_tropical_diseases/{SNAPSHOT_VERSION}/soil_transmitted_helminthiases.xlsx") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.xlsx.dvc b/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.xlsx.dvc new file mode 100644 index 00000000000..35ee86ddc23 --- /dev/null +++ b/snapshots/neglected_tropical_diseases/2024-05-02/soil_transmitted_helminthiases.xlsx.dvc @@ -0,0 +1,33 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Preventive Chemotherapy (PC) Data Portal - Soil-transmitted helminthiases + description: |- + Progress towards achieving the Roadmap targets for control and elimination of the Neglected Tropical Diseases. + + date_published: "2024-03-14" + title_snapshot: PCT Databank - Soil-transmitted helminthiases + description_snapshot: |- + Soil-transmitted helminthiases situational analyses, core implementation data and trend data are provided through direct access to the interactive reports. + + # Citation + producer: World Health Organization + citation_full: |- + PCT Databank - Soil-transmitted helminthiases, World Health Organization (2024) + attribution_short: WHO + # Files + url_main: https://www.who.int/teams/control-of-neglected-tropical-diseases/data-platforms/pct-databank/soil-transmitted-helminthiases + url_download: https://apps.who.int/neglected_diseases/ntddata/data/STH_data.xlsx?ua=1 + date_accessed: 2024-05-02 + + # License + license: + name: CC BY-NC-SA 3.0 IGO + url: https://www.who.int/about/policies/publishing/copyright + +outs: + - md5: ed7074d412dea543f937987eeeb62e0f + size: 328507 + path: soil_transmitted_helminthiases.xlsx