From e61483149383e94e84467c30e1ef2521a3897d22 Mon Sep 17 00:00:00 2001 From: Mojmir Vinkler Date: Thu, 22 Aug 2024 19:30:50 +0200 Subject: [PATCH] :honeybee: Upgrade pyright to match VSCode errors (#3148) * :honeybee: update pyright to version 1.1.373 --- apps/backport/datasync/datasync.py | 2 +- apps/metadata_migrate/cli.py | 8 +-- apps/metagpt/cli.py | 2 +- apps/utils/gpt.py | 6 +- apps/utils/profile.py | 2 +- apps/wizard/app_pages/fasttrack/load.py | 4 +- apps/wizard/app_pages/owidle.py | 10 +-- apps/wizard/cli.py | 4 +- apps/wizard/config/__init__.py | 3 +- apps/wizard/etl_steps/express.py | 2 +- apps/wizard/etl_steps/garden.py | 2 +- apps/wizard/etl_steps/grapher.py | 2 +- apps/wizard/etl_steps/meadow.py | 2 +- apps/wizard/etl_steps/snapshot.py | 4 +- apps/wizard/home.py | 2 +- apps/wizard/utils/__init__.py | 2 +- apps/wizard/utils/step_form.py | 4 +- etl/compare.py | 13 ++-- etl/data_helpers/geo.py | 7 +- etl/data_helpers/misc.py | 4 +- etl/datadiff.py | 2 +- etl/git_helpers.py | 6 +- etl/grapher_helpers.py | 6 +- etl/grapher_import.py | 8 +-- etl/helpers.py | 12 ++-- etl/match_variables.py | 2 +- .../archive/migrate_to_new_metadata.py | 2 +- etl/scripts/faostat/create_chart_revisions.py | 8 ++- etl/scripts/faostat/update_custom_metadata.py | 2 +- etl/snapshot.py | 1 + .../2024-05-24/world_inequality_database.py | 2 +- .../data/meadow/wvs/2023-03-08/wvs_trust.py | 3 +- etl/steps/open_numbers.py | 1 + lib/catalog/owid/catalog/datasets.py | 2 +- lib/catalog/owid/catalog/meta.py | 2 +- lib/catalog/owid/catalog/processing_log.py | 2 +- lib/catalog/owid/catalog/s3_utils.py | 1 + lib/catalog/owid/catalog/tables.py | 72 ++++++++++--------- lib/catalog/owid/catalog/variables.py | 40 +++++------ lib/catalog/poetry.lock | 21 ++---- lib/catalog/pyproject.toml | 5 +- lib/catalog/tests/mocking.py | 6 ++ lib/datautils/owid/datautils/dataframes.py | 6 +- lib/datautils/owid/datautils/google/api.py | 2 +- lib/datautils/owid/datautils/google/sheets.py | 8 +-- lib/datautils/poetry.lock | 21 ++---- lib/datautils/pyproject.toml | 2 +- lib/datautils/tests/google/test_config.py | 4 +- lib/datautils/tests/google/test_sheets.py | 2 +- lib/datautils/tests/io/test_df.py | 2 +- lib/repack/poetry.lock | 8 +-- lib/repack/pyproject.toml | 2 +- poetry.lock | 12 ++-- pyproject.toml | 4 +- scripts/add_ignore_pyright.py | 40 +++++++++++ .../2023-06-14/ai_private_investment.py | 2 +- .../biodiversity/2023-08-14/iucn_animal.py | 2 +- .../cardiovascular_diseases/2023-10-10/esc.py | 2 +- .../2024-01-31/climate_change_impacts.py | 4 +- .../2024-02-22/weekly_wildfires_2003_2023.py | 2 +- .../2024-03-11/climate_change_impacts.py | 4 +- .../2024-04-17/climate_change_impacts.py | 4 +- .../2024-05-20/climate_change_impacts.py | 4 +- .../2024-07-23/climate_change_impacts.py | 4 +- snapshots/climate/latest/weekly_wildfires.py | 2 +- .../2024-07-10/tree_cover_loss_by_driver.py | 2 +- snapshots/health/2023-08-09/unaids.py | 2 +- snapshots/ihme_gbd/2024-05-20/gbd_cause.py | 2 +- .../2024-05-20/gbd_child_mortality.py | 2 +- .../ihme_gbd/2024-05-20/gbd_drug_risk.py | 2 +- .../ihme_gbd/2024-05-20/gbd_mental_health.py | 2 +- .../2024-05-20/gbd_mental_health_burden.py | 2 +- .../ihme_gbd/2024-05-20/gbd_prevalence.py | 2 +- snapshots/ihme_gbd/2024-05-20/gbd_risk.py | 2 +- snapshots/ihme_gbd/2024-05-20/impairments.py | 2 +- .../news/2024-05-07/guardian_mentions.py | 2 +- snapshots/space/2024-01-04/object_launches.py | 2 +- snapshots/statins/2023-10-05/bmj_2022.py | 2 +- snapshots/statins/2023-10-05/lancet_2022.py | 2 +- ...cs_for_mineral_and_material_commodities.py | 6 +- snapshots/wb/2023-09-19/us_cpi.py | 2 +- snapshots/wb/2024-01-17/pip_api.py | 2 +- .../2024-03-26/food_prices_for_nutrition.py | 2 +- snapshots/wb/2024-03-27/pip_api.py | 4 +- snapshots/who/2022-09-30/ghe.py | 2 +- snapshots/who/2024-07-30/ghe.py | 2 +- tests/data_helpers/test_geo.py | 9 ++- 87 files changed, 270 insertions(+), 221 deletions(-) create mode 100755 scripts/add_ignore_pyright.py diff --git a/apps/backport/datasync/datasync.py b/apps/backport/datasync/datasync.py index b003261e6d3..a5c9a63c537 100644 --- a/apps/backport/datasync/datasync.py +++ b/apps/backport/datasync/datasync.py @@ -38,7 +38,7 @@ def upload_gzip_string(s: str, s3_path: str, private: bool = False) -> None: retry=retry_if_exception_type((EndpointConnectionError, SSLError)), ): with attempt: - client.put_object( + client.put_object( # type: ignore[reportAttributeAccessIssue] Bucket=bucket, Body=body_gzip, Key=key, diff --git a/apps/metadata_migrate/cli.py b/apps/metadata_migrate/cli.py index 63aece3326c..bc0c93955cd 100644 --- a/apps/metadata_migrate/cli.py +++ b/apps/metadata_migrate/cli.py @@ -324,22 +324,22 @@ def _create_origin_from_source(ds: Dataset, source: Source, license: Optional[Li description += source.description origin = Origin( - title=ds.metadata.title, - producer=source.name, + title=ds.metadata.title, # type: ignore[reportArgumentType] + producer=source.name, # type: ignore[reportArgumentType] citation_full=source.published_by, license=license, description=description, url_main=source.url, url_download=source.source_data_url, date_accessed=source.date_accessed, - date_published=source.publication_date or source.publication_year, + date_published=source.publication_date or source.publication_year, # type: ignore[reportArgumentType] ) if not origin.date_published: log.warning( f"missing publication_date and publication_year in source, using date_accessed: {origin.date_accessed}" ) - origin.date_published = origin.date_accessed + origin.date_published = origin.date_accessed # type: ignore return origin diff --git a/apps/metagpt/cli.py b/apps/metagpt/cli.py index 78ba022dc8e..f2cf33c96a0 100644 --- a/apps/metagpt/cli.py +++ b/apps/metagpt/cli.py @@ -87,7 +87,7 @@ def main(path_to_file: str, output_dir: str, overwrite: bool, model: str) -> Non class MetadataGPTUpdater: """Update metadata file using Chat GPT.""" - def __init__(self: Self, path_to_file: str, model: str) -> None: + def __init__(self: Self, path_to_file: str, model: str) -> None: # type: ignore[reportInvalidTypeVarUse] """Initialize the metadata updater.""" # Name of the model self.model: str = model diff --git a/apps/utils/gpt.py b/apps/utils/gpt.py index ac9c52a3b87..c3e2c63725c 100644 --- a/apps/utils/gpt.py +++ b/apps/utils/gpt.py @@ -72,7 +72,7 @@ class GPTResponse(ChatCompletion): message_content_dix: Optional[Dict[str, Any]] = field(default_factory=dict) - def __init__(self: Self, chat_completion_instance: ChatCompletion | None = None, **kwargs) -> None: + def __init__(self: Self, chat_completion_instance: ChatCompletion | None = None, **kwargs) -> None: # type: ignore[reportInvalidTypeVarUse] """Initialize OpenAI API wrapper.""" if chat_completion_instance: super().__init__(**chat_completion_instance.dict()) @@ -102,7 +102,7 @@ def message_content_as_dict(self: Self) -> Dict[str, Any]: self.message_content_dix = yaml.safe_load(self.message_content) else: raise ValueError("`message_content` is empty!") - return self.message_content_dix + return self.message_content_dix # type: ignore[reportReturnType] @property def cost(self) -> float | None: @@ -167,7 +167,7 @@ def to_dict(self: Self) -> Dict[str, Any]: class OpenAIWrapper(OpenAI): """Wrapper for OpenAI API.""" - def __init__(self: Self, **kwargs) -> None: + def __init__(self: Self, **kwargs) -> None: # type: ignore[reportInvalidTypeVarUse] """Initialize OpenAI API wrapper.""" super().__init__(**kwargs) diff --git a/apps/utils/profile.py b/apps/utils/profile.py index 2853e6203c8..de41bde1e13 100644 --- a/apps/utils/profile.py +++ b/apps/utils/profile.py @@ -86,7 +86,7 @@ def cli(step: str, cpu: bool, mem: bool, functions: tuple[str]) -> None: # Profile the run function if mem: - memory_usage((mem_profile(lp_wrapper), [dest_dir])) + memory_usage((mem_profile(lp_wrapper), [dest_dir])) # type: ignore[reportArgumentType] else: lp_wrapper(dest_dir) diff --git a/apps/wizard/app_pages/fasttrack/load.py b/apps/wizard/app_pages/fasttrack/load.py index 0b447be1ff0..38fe57d209c 100644 --- a/apps/wizard/app_pages/fasttrack/load.py +++ b/apps/wizard/app_pages/fasttrack/load.py @@ -208,7 +208,7 @@ def parse_metadata_from_csv( date_published=str(dt.date.today()), # type: ignore ) - return DatasetMeta(**dataset_dict), {k: VariableMeta(**v) for k, v in variables_dict.items()}, origin + return DatasetMeta(**dataset_dict), {k: VariableMeta(**v) for k, v in variables_dict.items()}, origin # type: ignore ################################### @@ -301,7 +301,7 @@ def _parse_sources(sources_meta_df: pd.DataFrame) -> Optional[Source]: # short_name is not used anymore source.pop("short_name", None) - return Source(**source) + return Source(**source) # type: ignore[reportCallIssue] def _parse_origins(origins_meta_df: pd.DataFrame) -> Optional[Origin]: diff --git a/apps/wizard/app_pages/owidle.py b/apps/wizard/app_pages/owidle.py index 616bf141490..6afe082f53d 100644 --- a/apps/wizard/app_pages/owidle.py +++ b/apps/wizard/app_pages/owidle.py @@ -298,7 +298,7 @@ def load_data(placeholder: str) -> Tuple[pd.DataFrame, gpd.GeoDataFrame]: ].drop_duplicates() # df_geo = df_geo.to_crs(3310) - return tb_indicator, df_geo + return tb_indicator, df_geo # type: ignore[reportReturnType] @st.cache_data @@ -827,16 +827,16 @@ def plot_chart_gdp_pc(countries_guessed: List[str], years_guessed: List[str], so countries_guessed, years_guessed=years_guessed, solution=solution, - column_indicator=GDP_INDICATOR, - title=gdp_indicator_titles[GDP_INDICATOR], + column_indicator=GDP_INDICATOR, # type: ignore[reportArgumentType] + title=gdp_indicator_titles[GDP_INDICATOR], # type: ignore[reportArgumentType] column_country="location", ) else: _plot_chart( countries_guessed, solution=solution, - column_indicator=GDP_INDICATOR, - title=gdp_indicator_titles[GDP_INDICATOR], + column_indicator=GDP_INDICATOR, # type: ignore[reportArgumentType] + title=gdp_indicator_titles[GDP_INDICATOR], # type: ignore[reportArgumentType] column_country="location", ) diff --git a/apps/wizard/cli.py b/apps/wizard/cli.py index a3317715dd9..c492d707ad0 100644 --- a/apps/wizard/cli.py +++ b/apps/wizard/cli.py @@ -30,7 +30,7 @@ @click.command(cls=RichCommand, context_settings=dict(show_default=True)) @click.argument( "phase", - type=click.Choice(WIZARD_PHASES.__args__), # type: ignore + type=click.Choice(WIZARD_PHASES), default="all", ) @click.option( @@ -52,7 +52,7 @@ help="Application port.", ) def cli( - phase: Iterable[WIZARD_PHASES], + phase: Iterable[str], run_checks: bool, dummy_data: bool, port: int, diff --git a/apps/wizard/config/__init__.py b/apps/wizard/config/__init__.py index 7c94d7bf6e0..0366998ded4 100644 --- a/apps/wizard/config/__init__.py +++ b/apps/wizard/config/__init__.py @@ -2,7 +2,6 @@ It basically reads the configuration from .wizard.yml and renders the home page and other details. """ -from typing import Literal import yaml @@ -106,7 +105,7 @@ def _check_wizard_config(config: dict): _aliases.append(app["alias"]) ## Add aliases from etl steps and 'all' _aliases = tuple(_aliases + list(WIZARD_CONFIG["etl"]["steps"].keys()) + ["all"]) -WIZARD_PHASES = Literal[_aliases] # type: ignore +WIZARD_PHASES = _aliases # Get all pages by alias _pages = [ww for w in WIZARD_CONFIG["sections"] for ww in w["apps"]] diff --git a/apps/wizard/etl_steps/express.py b/apps/wizard/etl_steps/express.py index ce3eb291ecb..bf9c516d0f8 100644 --- a/apps/wizard/etl_steps/express.py +++ b/apps/wizard/etl_steps/express.py @@ -90,7 +90,7 @@ class ExpressForm(utils.StepForm): namespace_custom: str | None = None update_period_date: date - def __init__(self: Self, **data: str | date | bool | int) -> None: + def __init__(self: Self, **data: str | date | bool | int) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct class.""" data["add_to_dag"] = data["dag_file"] != utils.ADD_DAG_OPTIONS[0] diff --git a/apps/wizard/etl_steps/garden.py b/apps/wizard/etl_steps/garden.py index 88a8340a8df..92926cf0964 100644 --- a/apps/wizard/etl_steps/garden.py +++ b/apps/wizard/etl_steps/garden.py @@ -83,7 +83,7 @@ class GardenForm(utils.StepForm): update_period_days: int topic_tags: List[str] - def __init__(self: Self, **data: str | bool) -> None: + def __init__(self: Self, **data: str | bool) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct class.""" data["add_to_dag"] = data["dag_file"] != utils.ADD_DAG_OPTIONS[0] diff --git a/apps/wizard/etl_steps/grapher.py b/apps/wizard/etl_steps/grapher.py index 8c9ae0396ce..85b8cf69f44 100644 --- a/apps/wizard/etl_steps/grapher.py +++ b/apps/wizard/etl_steps/grapher.py @@ -59,7 +59,7 @@ class GrapherForm(utils.StepForm): dag_file: str is_private: bool - def __init__(self: Self, **data: str | bool) -> None: + def __init__(self: Self, **data: str | bool) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct class.""" data["add_to_dag"] = data["dag_file"] != utils.ADD_DAG_OPTIONS[0] diff --git a/apps/wizard/etl_steps/meadow.py b/apps/wizard/etl_steps/meadow.py index 45b3a7ccf5e..ceb314294bb 100644 --- a/apps/wizard/etl_steps/meadow.py +++ b/apps/wizard/etl_steps/meadow.py @@ -65,7 +65,7 @@ class MeadowForm(utils.StepForm): generate_notebook: bool is_private: bool - def __init__(self: Self, **data: str | bool) -> None: + def __init__(self: Self, **data: str | bool) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct class.""" data["add_to_dag"] = data["dag_file"] != utils.ADD_DAG_OPTIONS[0] diff --git a/apps/wizard/etl_steps/snapshot.py b/apps/wizard/etl_steps/snapshot.py index d2895bfc002..e0869c78682 100644 --- a/apps/wizard/etl_steps/snapshot.py +++ b/apps/wizard/etl_steps/snapshot.py @@ -101,7 +101,7 @@ class SnapshotForm(utils.StepForm): license_url: str license_name: str - def __init__(self: Self, **data: str | int) -> None: + def __init__(self: Self, **data: str | int) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct form.""" # Change name for certain fields (and remove old ones) data["license_url"] = data["origin.license.url"] @@ -177,7 +177,7 @@ def validate(self: "SnapshotForm") -> None: self.errors["origin.attribution_custom"] = "Please introduce the name of the custom attribute!" @property - def metadata(self: Self) -> Dict[str, Any]: + def metadata(self: Self) -> Dict[str, Any]: # type: ignore[reportIncompatibleMethodOverride] """Define metadata for easy YAML-export.""" license_field = { "name": self.license_name, diff --git a/apps/wizard/home.py b/apps/wizard/home.py index 07b65633c0e..b3635a585cf 100644 --- a/apps/wizard/home.py +++ b/apps/wizard/home.py @@ -77,7 +77,7 @@ def create_card( # text=f"Press {i + 1}", # text=["This is a test card", "This is a subtext"], styles=styles, - on_click=lambda: None, + on_click=lambda: None, # type: ignore[reportArgumentType] ) if go_to_page: st.switch_page(entrypoint) diff --git a/apps/wizard/utils/__init__.py b/apps/wizard/utils/__init__.py index 2c47f2d0453..c812059dd9c 100644 --- a/apps/wizard/utils/__init__.py +++ b/apps/wizard/utils/__init__.py @@ -161,7 +161,7 @@ def remove_from_dag(step: str, dag_path: Path = DAG_WIZARD_PATH) -> None: class classproperty(property): """Decorator.""" - def __get__(self, owner_self: Self, owner_cls: Self): + def __get__(self, owner_self: Self, owner_cls: Self): # type: ignore[reportIncompatibleMethodOverride] return self.fget(owner_cls) # type: ignore diff --git a/apps/wizard/utils/step_form.py b/apps/wizard/utils/step_form.py index 30c185a2553..1d0a4ecea94 100644 --- a/apps/wizard/utils/step_form.py +++ b/apps/wizard/utils/step_form.py @@ -18,7 +18,7 @@ class StepForm(BaseModel): errors: Dict[str, Any] = {} step_name: str - def __init__(self: Self, **kwargs: str | int) -> None: + def __init__(self: Self, **kwargs: str | int) -> None: # type: ignore[reportInvalidTypeVarUse] """Construct parent class.""" super().__init__(**kwargs) self.validate() @@ -36,7 +36,7 @@ def from_state(cls: Type[Self]) -> Self: # st.write(data) return cls(**data) - def validate(self: Self) -> None: + def validate(self: Self) -> None: # type: ignore[reportIncompatibleMethodOverride] """Validate form fields.""" raise NotImplementedError("Needs to be implemented in the child class!") diff --git a/etl/compare.py b/etl/compare.py index a690224f3b3..2cb11f19de0 100644 --- a/etl/compare.py +++ b/etl/compare.py @@ -10,6 +10,7 @@ import rich from dotenv import dotenv_values from owid import catalog +from owid.catalog import CHANNEL from owid.repack import repack_frame from rich import print from rich_click.rich_command import RichCommand @@ -116,7 +117,7 @@ def diff_print( @click.pass_context def etl_catalog( ctx: click.core.Context, - channel: str, + channel: CHANNEL, namespace: str, dataset: str, table: str, @@ -140,7 +141,11 @@ def etl_catalog( """ try: remote_df = catalog.find_latest( - table=table, namespace=namespace, dataset=dataset, channels=[channel], version=version + table=table, + namespace=namespace, + dataset=dataset, + channels=[channel], + version=version, # type: ignore[reportArgumentType] ) except Exception as e: if debug: @@ -157,7 +162,7 @@ def etl_catalog( namespace=namespace, dataset=dataset, channel=cast(catalog.CHANNEL, channel), - version=version, + version=version, # type: ignore[reportArgumentType] ) except ValueError as e: # try again after reindexing @@ -168,7 +173,7 @@ def etl_catalog( namespace=namespace, dataset=dataset, channel=cast(catalog.CHANNEL, channel), - version=version, + version=version, # type: ignore[reportArgumentType] ) else: raise e diff --git a/etl/data_helpers/geo.py b/etl/data_helpers/geo.py index e96f8b0857c..7a1f2b59dee 100644 --- a/etl/data_helpers/geo.py +++ b/etl/data_helpers/geo.py @@ -5,7 +5,7 @@ import warnings from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Set, TypeVar, Union, cast +from typing import Any, Dict, Hashable, List, Literal, Optional, Set, TypeVar, Union, cast import numpy as np import owid.catalog.processing as pr @@ -848,7 +848,8 @@ def add_gdp_to_table( def create_table_of_regions_and_subregions(ds_regions: Dataset, subregion_type: str = "members") -> Table: # Subregion type can be "members" or "successors" (or in principle also "related"). # Get the main table from the regions dataset. - tb_regions = ds_regions["regions"][["name", subregion_type]] + tb_regions = ds_regions["regions"] + tb_regions = tb_regions.loc[:, ["name", subregion_type]] # Get a mapping from code to region name. mapping = tb_regions["name"].to_dict() @@ -1002,7 +1003,7 @@ def list_members_of_region( def detect_overlapping_regions( df: TableOrDataFrame, index_columns: List[str], - regions_and_members: Dict[str, List[str]], + regions_and_members: Dict[Hashable, List[str]], country_col: str = "country", year_col: str = "year", ignore_overlaps_of_zeros: bool = True, diff --git a/etl/data_helpers/misc.py b/etl/data_helpers/misc.py index bea29ab5feb..df8c687c155 100644 --- a/etl/data_helpers/misc.py +++ b/etl/data_helpers/misc.py @@ -165,7 +165,7 @@ def add_origins_to_mortality_database(tb_who: Table) -> Table: producer="World Health Organisation", url_main="https://platform.who.int/mortality/themes/theme-details/MDB/all-causes", date_accessed="2023-08-01", - date_published="2023-08-01", + date_published="2023-08-01", # type: ignore citation_full="Mortality Database, World Health Organization. Licence: CC BY-NC-SA 3.0 IGO.", description="The WHO mortality database is a collection death registration data including cause-of-death information from member states. Where they are collected, death registration data are the best source of information on key health indicators, such as life expectancy, and death registration data with cause-of-death information are the best source of information on mortality by cause, such as maternal mortality and suicide mortality. WHO requests from all countries annual data by age, sex, and complete ICD code (e.g., 4-digit code if the 10th revision of ICD was used). Countries have reported deaths by cause of death, year, sex, and age for inclusion in the WHO Mortality Database since 1950. Data are included only for countries reporting data properly coded according to the International Classification of Diseases (ICD). Today the database is maintained by the WHO Division of Data, Analytics and Delivery for Impact (DDI) and contains data from over 120 countries and areas. Data reported by member states and selected areas are displayed in this portal’s interactive visualizations if the data are reported to the WHO mortality database in the requested format and at least 65% of deaths were recorded in each country and year.", license=License(name="CC BY 4.0"), @@ -197,7 +197,7 @@ def add_origins_to_global_burden_of_disease(tb_gbd: Table) -> Table: producer="Institute of Health Metrics and Evaluation", url_main="https://vizhub.healthdata.org/gbd-results/", date_accessed="2021-12-01", - date_published="2020-10-17", + date_published="2020-10-17", # type: ignore citation_full="Global Burden of Disease Collaborative Network. Global Burden of Disease Study 2019 (GBD 2019). Seattle, United States: Institute for Health Metrics and Evaluation (IHME), 2020.", description="The Global Burden of Disease (GBD) provides a comprehensive picture of mortality and disability across countries, time, age, and sex. It quantifies health loss from hundreds of diseases, injuries, and risk factors, so that health systems can be improved and disparities eliminated. GBD research incorporates both the prevalence of a given disease or risk factor and the relative harm it causes. With these tools, decision-makers can compare different health issues and their effects.", license=License( diff --git a/etl/datadiff.py b/etl/datadiff.py index 8c6fc32c44a..37d3f382528 100644 --- a/etl/datadiff.py +++ b/etl/datadiff.py @@ -225,7 +225,7 @@ def _diff_tables(self, ds_a: Dataset, ds_b: Dataset, table_name: str): changed.append("changed [u]metadata[/u]") if new_index.any(): changed.append("new [u]data[/u]") - if (~eq_data[~new_index]).any(): + if (~eq_data[~new_index]).any(): # type: ignore[reportCallIssue] changed.append("changed [u]data[/u]") if changed: diff --git a/etl/git_helpers.py b/etl/git_helpers.py index 487a6311505..8b687aab963 100644 --- a/etl/git_helpers.py +++ b/etl/git_helpers.py @@ -49,9 +49,9 @@ def ensure_cloned(self, shallow: bool = True) -> None: if not dest_dir.is_dir(): dest_dir.parent.mkdir(parents=True, exist_ok=True) if shallow: - sh.git("clone", "--depth=1", self.github_url, dest_dir.as_posix(), _fg=True) + sh.git("clone", "--depth=1", self.github_url, dest_dir.as_posix(), _fg=True) # type: ignore[reportCallIssue] else: - sh.git("clone", self.github_url, dest_dir.as_posix(), _fg=True) + sh.git("clone", self.github_url, dest_dir.as_posix(), _fg=True) # type: ignore[reportCallIssue] else: self.update_and_reset() @@ -86,7 +86,7 @@ def _git(self, *args: str, **kwargs: Any) -> str: "Execute a git command in the context of this repo." return cast( str, - sh.git("--no-pager", *args, _cwd=self.cache_dir.as_posix(), **kwargs).stdout.decode("utf8").strip(), + sh.git("--no-pager", *args, _cwd=self.cache_dir.as_posix(), **kwargs).stdout.decode("utf8").strip(), # type: ignore[reportCallIssue] ) def is_up_to_date(self) -> bool: diff --git a/etl/grapher_helpers.py b/etl/grapher_helpers.py index 291d5b6bd4d..27adad7b3a4 100644 --- a/etl/grapher_helpers.py +++ b/etl/grapher_helpers.py @@ -392,11 +392,11 @@ def country_to_entity_id( assert by == "name", "create_entities works only with `by='name'`" ix = entity_id.isnull() # cast to float to fix issues with categories - entity_id[ix] = ( - country[ix].map(_get_and_create_entities_in_db(set(country[ix].unique()), engine=engine)).astype(float) + entity_id[ix] = ( # type: ignore[reportCallIssue] + country[ix].map(_get_and_create_entities_in_db(set(country[ix].unique()), engine=engine)).astype(float) # type: ignore[reportCallIssue] ) - assert not entity_id.isnull().any(), f"Some countries have not been mapped: {set(country[entity_id.isnull()])}" + assert not entity_id.isnull().any(), f"Some countries have not been mapped: {set(country[entity_id.isnull()])}" # type: ignore[reportCallIssue] return cast(pd.Series, entity_id.astype(int)) diff --git a/etl/grapher_import.py b/etl/grapher_import.py index 936e610a2e3..cab9413178f 100644 --- a/etl/grapher_import.py +++ b/etl/grapher_import.py @@ -221,11 +221,11 @@ def upsert_table( ) table = table.reorder_levels(["year", "entity_id"]) assert ( - table.index.dtypes.iloc[0] in gh.INT_TYPES - ), f"year must be of an integer type but was: {table.index.dtypes.iloc[0]}" + table.index.dtypes.iloc[0] in gh.INT_TYPES # type: ignore[reportAttributeAccessIssue] + ), f"year must be of an integer type but was: {table.index.dtypes.iloc[0]}" # type: ignore[reportAttributeAccessIssue] assert ( - table.index.dtypes.iloc[1] in gh.INT_TYPES - ), f"entity_id must be of an integer type but was: {table.index.dtypes.iloc[1]}" + table.index.dtypes.iloc[1] in gh.INT_TYPES # type: ignore[reportAttributeAccessIssue] + ), f"entity_id must be of an integer type but was: {table.index.dtypes.iloc[1]}" # type: ignore[reportAttributeAccessIssue] utils.validate_underscore(table.metadata.short_name, "Table's short_name") utils.validate_underscore(table.columns[0], "Variable's name") diff --git a/etl/helpers.py b/etl/helpers.py index fdc90dc341d..4933e9da18c 100644 --- a/etl/helpers.py +++ b/etl/helpers.py @@ -444,8 +444,8 @@ def dag(self): return self._dag @property - def channel(self) -> str: - return self.f.parent.parent.parent.name + def channel(self) -> CHANNEL: + return self.f.parent.parent.parent.name # type: ignore @property def namespace(self) -> str: @@ -605,7 +605,7 @@ def dependencies(self) -> List[str]: def get_dependency_step_name( self, short_name: str, - channel: Optional[str] = None, + channel: Optional[CHANNEL] = None, namespace: Optional[str] = None, version: Optional[Union[str, int]] = None, is_private: Optional[bool] = None, @@ -646,7 +646,7 @@ def get_dependency_step_name( def load_dependency( self, short_name: str, - channel: Optional[str] = None, + channel: Optional[CHANNEL] = None, namespace: Optional[str] = None, version: Optional[Union[str, int]] = None, is_private: Optional[bool] = None, @@ -673,7 +673,7 @@ def load_dependency( ) dataset = catalog.Dataset(dataset_path) - return dataset + return dataset # type: ignore[reportReturnType] def load_snapshot(self, short_name: Optional[str] = None, **kwargs) -> Snapshot: """Load snapshot dependency. short_name defaults to the current step's short_name.""" @@ -690,7 +690,7 @@ def read_snap_table(self, short_name: Optional[str] = None, **kwargs) -> Table: def load_dataset( self, short_name: Optional[str] = None, - channel: Optional[str] = None, + channel: Optional[CHANNEL] = None, namespace: Optional[str] = None, version: Optional[Union[str, int]] = None, ) -> catalog.Dataset: diff --git a/etl/match_variables.py b/etl/match_variables.py index d410506ab86..d791270e203 100644 --- a/etl/match_variables.py +++ b/etl/match_variables.py @@ -453,7 +453,7 @@ def consolidate_mapping_suggestions_with_user( name_old = suggestion["old"]["name_old"] id_old = suggestion["old"]["id_old"] missing_new = suggestion["new"] - missing_new = missing_new[~missing_new["id_new"].isin(ids_new_ignore)] + missing_new = missing_new[~missing_new["id_new"].isin(ids_new_ignore)] # type: ignore[reportCallIssue] new_indexes = missing_new.index.tolist() # display comparison to user diff --git a/etl/scripts/faostat/archive/migrate_to_new_metadata.py b/etl/scripts/faostat/archive/migrate_to_new_metadata.py index 8ec346ea94d..d750d9a3850 100644 --- a/etl/scripts/faostat/archive/migrate_to_new_metadata.py +++ b/etl/scripts/faostat/archive/migrate_to_new_metadata.py @@ -44,7 +44,7 @@ def main(): ) continue try: - dataset_id = db.get_dataset_id(dataset_name=dataset_name, version=VERSION) + dataset_id = db.get_dataset_id(dataset_name=dataset_name, version=VERSION) # type: ignore[reportArgumentType] except AssertionError: log.error( f"Grapher dataset for {domain} could not be found in the database. " diff --git a/etl/scripts/faostat/create_chart_revisions.py b/etl/scripts/faostat/create_chart_revisions.py index 2e2c4aec818..80c9b779846 100644 --- a/etl/scripts/faostat/create_chart_revisions.py +++ b/etl/scripts/faostat/create_chart_revisions.py @@ -97,14 +97,18 @@ def get_grapher_data_for_old_and_new_variables( try: # Get old and new dataset ids. dataset_id_old = db.get_dataset_id( - db_conn=db_conn, dataset_name=dataset_old.metadata.title, version=dataset_old.metadata.version + db_conn=db_conn, + dataset_name=dataset_old.metadata.title, # type: ignore + version=dataset_old.metadata.version, # type: ignore[reportArgumentType] ) except AssertionError: log.error(f"Dataset {dataset_old.metadata.title} not found in grapher DB.") return None, None try: dataset_id_new = db.get_dataset_id( - db_conn=db_conn, dataset_name=dataset_new.metadata.title, version=dataset_new.metadata.version + db_conn=db_conn, + dataset_name=dataset_new.metadata.title, # type: ignore + version=dataset_new.metadata.version, # type: ignore[reportArgumentType] ) except AssertionError: log.error(f"Dataset {dataset_new.metadata.title} not found in grapher DB.") diff --git a/etl/scripts/faostat/update_custom_metadata.py b/etl/scripts/faostat/update_custom_metadata.py index f4e739ee36d..3368fa6c1c1 100644 --- a/etl/scripts/faostat/update_custom_metadata.py +++ b/etl/scripts/faostat/update_custom_metadata.py @@ -13,7 +13,7 @@ import pandas as pd from owid.catalog import Dataset -from shared import INCLUDED_DATASETS_CODES, VERSION +from shared import INCLUDED_DATASETS_CODES, VERSION # type: ignore[reportMissingImports] from tqdm.auto import tqdm from etl.paths import DATA_DIR, STEP_DIR diff --git a/etl/snapshot.py b/etl/snapshot.py index ec89e107a88..ce19adf77e8 100644 --- a/etl/snapshot.py +++ b/etl/snapshot.py @@ -161,6 +161,7 @@ def dvc_add(self, upload: bool) -> None: # Upload to S3 md5 = checksum_file(self.path) bucket = config.R2_SNAPSHOTS_PUBLIC if self.metadata.is_public else config.R2_SNAPSHOTS_PRIVATE + assert self.metadata.is_public is not None s3_utils.upload(f"s3://{bucket}/{md5[:2]}/{md5[2:]}", str(self.path), public=self.metadata.is_public) # Update metadata file diff --git a/etl/steps/data/meadow/wid/2024-05-24/world_inequality_database.py b/etl/steps/data/meadow/wid/2024-05-24/world_inequality_database.py index 7ecd287a9c3..2778c1d89dc 100644 --- a/etl/steps/data/meadow/wid/2024-05-24/world_inequality_database.py +++ b/etl/steps/data/meadow/wid/2024-05-24/world_inequality_database.py @@ -208,7 +208,7 @@ def harmonize_countries(tb: Table, tb_regions: Table, iso2_missing: dict, iso_2_ ) # Drop rows without match (MER if there was not any error) - tb = tb[~(tb["name"] == "nan")].reset_index(drop=True) + tb = tb.loc[~(tb["name"] == "nan"), :].reset_index(drop=True) # Drop old country and ISO alpha 2 variable. Rename the newly built variable as `country` tb = tb.drop(columns=["country", "iso_alpha2"]) diff --git a/etl/steps/data/meadow/wvs/2023-03-08/wvs_trust.py b/etl/steps/data/meadow/wvs/2023-03-08/wvs_trust.py index 8139a8b5a9e..860858b4ace 100644 --- a/etl/steps/data/meadow/wvs/2023-03-08/wvs_trust.py +++ b/etl/steps/data/meadow/wvs/2023-03-08/wvs_trust.py @@ -5,7 +5,6 @@ from structlog import get_logger from etl.helpers import PathFinder, create_dataset -from etl.snapshot import Snapshot # Initialize logger. log = get_logger() @@ -118,7 +117,7 @@ def run(dest_dir: str) -> None: # Load inputs. # # Retrieve snapshot. - snap: Snapshot = paths.load_dependency("wvs_trust.csv") + snap = paths.load_snapshot("wvs_trust.csv") # Load data from snapshot. df = pd.read_csv(snap.path) diff --git a/etl/steps/open_numbers.py b/etl/steps/open_numbers.py index 80c09cd1731..607bda3a3de 100644 --- a/etl/steps/open_numbers.py +++ b/etl/steps/open_numbers.py @@ -1,3 +1,4 @@ +# type: ignore # # open_numbers.py # etl diff --git a/lib/catalog/owid/catalog/datasets.py b/lib/catalog/owid/catalog/datasets.py index 5a583589f3e..d98434fe34f 100644 --- a/lib/catalog/owid/catalog/datasets.py +++ b/lib/catalog/owid/catalog/datasets.py @@ -41,7 +41,7 @@ assert SUPPORTED_FORMATS[0] == PREFERRED_FORMAT # available channels in the catalog -CHANNEL = Literal["garden", "meadow", "grapher", "open_numbers", "examples", "explorers", "external"] +CHANNEL = Literal["snapshot", "garden", "meadow", "grapher", "open_numbers", "examples", "explorers", "external"] # all pandas nullable dtypes NULLABLE_DTYPES = [f"{sign}{typ}{size}" for typ in ("Int", "Float") for sign in ("", "U") for size in (8, 16, 32, 64)] diff --git a/lib/catalog/owid/catalog/meta.py b/lib/catalog/owid/catalog/meta.py index 47ec7897e45..406d0c0698a 100644 --- a/lib/catalog/owid/catalog/meta.py +++ b/lib/catalog/owid/catalog/meta.py @@ -35,7 +35,7 @@ def __hash__(self): """Hash that uniquely identifies an object (without needing frozen dataclass).""" return _hash_any(self) - def __eq__(self, other: Self) -> bool: + def __eq__(self, other: Self) -> bool: # type: ignore if not isinstance(other, self.__class__): return False return self.__hash__() == other.__hash__() diff --git a/lib/catalog/owid/catalog/processing_log.py b/lib/catalog/owid/catalog/processing_log.py index 7f77dc6349c..3db0e73097c 100644 --- a/lib/catalog/owid/catalog/processing_log.py +++ b/lib/catalog/owid/catalog/processing_log.py @@ -73,7 +73,7 @@ class ProcessingLog(List[LogEntry]): def as_dict(self) -> List[Dict[str, Any]]: return [r.to_dict() for r in self] - def clear(self) -> "ProcessingLog": + def clear(self) -> "ProcessingLog": # type: ignore if enabled(): super().clear() return self diff --git a/lib/catalog/owid/catalog/s3_utils.py b/lib/catalog/owid/catalog/s3_utils.py index 67dcbd9c6ff..9ce690591d2 100644 --- a/lib/catalog/owid/catalog/s3_utils.py +++ b/lib/catalog/owid/catalog/s3_utils.py @@ -8,6 +8,7 @@ from typing import Dict, List, Optional, Tuple from urllib.parse import urlparse +# import botocore.client.S3 as BaseClient import structlog from botocore.client import BaseClient from botocore.exceptions import ClientError diff --git a/lib/catalog/owid/catalog/tables.py b/lib/catalog/owid/catalog/tables.py index 5bb8bc0d511..1146b258fc1 100644 --- a/lib/catalog/owid/catalog/tables.py +++ b/lib/catalog/owid/catalog/tables.py @@ -13,6 +13,7 @@ Any, Callable, Dict, + Iterable, Iterator, List, Literal, @@ -571,21 +572,21 @@ def set_index( return cast(Table, t) @overload - def reset_index(self, *, inplace: Literal[True], **kwargs) -> None: + def reset_index(self, level=None, *, inplace: Literal[True], **kwargs) -> None: ... @overload - def reset_index(self, *, inplace: Literal[False], **kwargs) -> "Table": + def reset_index(self, level=None, *, inplace: Literal[False], **kwargs) -> "Table": ... @overload - def reset_index(self, **kwargs) -> "Table": + def reset_index(self, level=None, *, inplace: bool = False, **kwargs) -> "Table": ... - def reset_index(self, *args, **kwargs) -> Optional["Table"]: # type: ignore + def reset_index(self, level=None, *, inplace: bool = False, **kwargs) -> Optional["Table"]: # type: ignore """Fix type signature of reset_index.""" - t = super().reset_index(*args, **kwargs) - if kwargs.get("inplace"): + t = super().reset_index(level=level, inplace=inplace, **kwargs) # type: ignore + if inplace: return None else: # preserve metadata in _fields, calling reset_index() on a table drops it @@ -627,7 +628,7 @@ def join(self, other: Union[pd.DataFrame, "Table"], *args, **kwargs) -> "Table": return t # type: ignore def _repr_html_(self): - html = super()._repr_html_() + html = super()._repr_html_() # type: ignore if self.DEBUG: self.check_metadata() return f""" @@ -890,7 +891,7 @@ def sort_values(self, by: Union[str, List[str]], *args, **kwargs) -> "Table": def sum(self, *args, **kwargs) -> variables.Variable: variable_name = variables.UNNAMED_VARIABLE - variable = variables.Variable(super().sum(*args, **kwargs), name=variable_name) + variable = variables.Variable(super().sum(*args, **kwargs), name=variable_name) # type: ignore variable.metadata = variables.combine_variables_metadata( variables=[self[column] for column in self.columns], operation="+", name=variable_name ) @@ -899,7 +900,7 @@ def sum(self, *args, **kwargs) -> variables.Variable: def prod(self, *args, **kwargs) -> variables.Variable: variable_name = variables.UNNAMED_VARIABLE - variable = variables.Variable(super().prod(*args, **kwargs), name=variable_name) + variable = variables.Variable(super().prod(*args, **kwargs), name=variable_name) # type: ignore variable.metadata = variables.combine_variables_metadata( variables=[self[column] for column in self.columns], operation="*", name=variable_name ) @@ -909,8 +910,11 @@ def prod(self, *args, **kwargs) -> variables.Variable: def assign(self, *args, **kwargs) -> "Table": return super().assign(*args, **kwargs) # type: ignore + def reorder_levels(self, *args, **kwargs) -> "Table": + return super().reorder_levels(*args, **kwargs) # type: ignore + @staticmethod - def _update_log(tb: "Table", other: Union[Scalar, Series, variables.Variable, "Table"], operation: str) -> None: + def _update_log(tb: "Table", other: Union[Scalar, Series, variables.Variable, "Table"], operation: str) -> None: # type: ignore # The following would have a parents only the scalar, not the scalar and the corresponding variable. # tb = update_log(table=tb, operation="+", parents=[other], variable_names=tb.columns) # Instead, update the processing log of each variable in the table. @@ -921,60 +925,60 @@ def _update_log(tb: "Table", other: Union[Scalar, Series, variables.Variable, "T parents = [tb[column], other] tb[column].update_log(parents=parents, operation=operation) - def __add__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __add__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__add__(other=other)).copy_metadata(self)) self._update_log(tb, other, "+") return tb - def __iadd__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __iadd__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__add__(other) - def __sub__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __sub__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__sub__(other=other)).copy_metadata(self)) self._update_log(tb, other, "-") return tb - def __isub__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __isub__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__sub__(other) - def __mul__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __mul__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__mul__(other=other)).copy_metadata(self)) self._update_log(tb, other, "*") return tb - def __imul__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __imul__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__mul__(other) - def __truediv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __truediv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__truediv__(other=other)).copy_metadata(self)) self._update_log(tb, other, "/") return tb - def __itruediv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __itruediv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__truediv__(other) - def __floordiv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __floordiv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__floordiv__(other=other)).copy_metadata(self)) self._update_log(tb, other, "//") return tb - def __ifloordiv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __ifloordiv__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__floordiv__(other) - def __mod__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __mod__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__mod__(other=other)).copy_metadata(self)) self._update_log(tb, other, "%") return tb - def __imod__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __imod__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__mod__(other) - def __pow__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __pow__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore tb = cast(Table, Table(super().__pow__(other=other)).copy_metadata(self)) self._update_log(tb, other, "**") return tb - def __ipow__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": + def __ipow__(self, other: Union[Scalar, Series, variables.Variable, "Table"]) -> "Table": # type: ignore return self.__pow__(other) def sort_index(self, *args, **kwargs) -> "Table": @@ -986,7 +990,7 @@ def groupby(self, *args, **kwargs) -> "TableGroupBy": def rolling(self, *args, **kwargs) -> "TableRolling": """Rolling operation that preserves metadata.""" - return TableRolling(super().rolling(*args, **kwargs), self.metadata, self._fields) + return TableRolling(super().rolling(*args, **kwargs), self.metadata, self._fields) # type: ignore def check_metadata(self, ignore_columns: Optional[List[str]] = None) -> None: """Check that all variables in the table have origins.""" @@ -1394,7 +1398,7 @@ def concat( table = Table( # use our concatenate that gracefully handles categoricals dataframes.concatenate( - objs=objs, + objs=objs, # type: ignore axis=axis, # type: ignore join=join, ignore_index=ignore_index, @@ -1854,7 +1858,7 @@ def copy_metadata(from_table: Table, to_table: Table, deep=False) -> Table: return tab -def get_unique_sources_from_tables(tables: List[Table]) -> List[Source]: +def get_unique_sources_from_tables(tables: Iterable[Table]) -> List[Source]: # Make a list of all sources of all variables in all tables. sources = [] for table in tables: @@ -1864,7 +1868,7 @@ def get_unique_sources_from_tables(tables: List[Table]) -> List[Source]: return sources -def get_unique_licenses_from_tables(tables: List[Table]) -> List[License]: +def get_unique_licenses_from_tables(tables: Iterable[Table]) -> List[License]: # Make a list of all licenses of all variables in all tables. licenses = [] for table in tables: @@ -1874,7 +1878,7 @@ def get_unique_licenses_from_tables(tables: List[Table]) -> List[License]: return licenses -def _get_metadata_value_from_tables_if_all_identical(tables: List[Table], field: str) -> Optional[Any]: +def _get_metadata_value_from_tables_if_all_identical(tables: Iterable[Table], field: str) -> Optional[Any]: # Get unique values from list, ignoring Nones. unique_values = set( [getattr(table.metadata, field) for table in tables if getattr(table.metadata, field) is not None] @@ -1887,15 +1891,15 @@ def _get_metadata_value_from_tables_if_all_identical(tables: List[Table], field: return combined_value -def combine_tables_title(tables: List[Table]) -> Optional[str]: +def combine_tables_title(tables: Iterable[Table]) -> Optional[str]: return _get_metadata_value_from_tables_if_all_identical(tables=tables, field="title") -def combine_tables_description(tables: List[Table]) -> Optional[str]: +def combine_tables_description(tables: Iterable[Table]) -> Optional[str]: return _get_metadata_value_from_tables_if_all_identical(tables=tables, field="description") -def combine_tables_datasetmeta(tables: List[Table]) -> Optional[DatasetMeta]: +def combine_tables_datasetmeta(tables: Iterable[Table]) -> Optional[DatasetMeta]: return _get_metadata_value_from_tables_if_all_identical(tables=tables, field="dataset") @@ -1911,7 +1915,7 @@ def combine_tables_metadata(tables: List[Table], short_name: Optional[str] = Non return metadata -def combine_tables_update_period_days(tables: List[Table]) -> Optional[int]: +def combine_tables_update_period_days(tables: Iterable[Table]) -> Optional[int]: # NOTE: This is a metadata field that is extracted from the dataset, not the table itself. # Gather all update_period_days from all tables (technically, from their dataset metadata). @@ -1930,7 +1934,7 @@ def combine_tables_update_period_days(tables: List[Table]) -> Optional[int]: return update_period_days_combined -def check_all_variables_have_metadata(tables: List[Table], fields: Optional[List[str]] = None) -> None: +def check_all_variables_have_metadata(tables: Iterable[Table], fields: Optional[List[str]] = None) -> None: if fields is None: fields = ["origins"] diff --git a/lib/catalog/owid/catalog/variables.py b/lib/catalog/owid/catalog/variables.py index 9aab355c623..d32a7a3d318 100644 --- a/lib/catalog/owid/catalog/variables.py +++ b/lib/catalog/owid/catalog/variables.py @@ -167,67 +167,67 @@ def _repr_html_(self):
{}
""".format(self.name, html) - def __add__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __add__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__add__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="+", name=variable_name) return variable - def __iadd__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __iadd__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__add__(other) - def __sub__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __sub__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__sub__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="-", name=variable_name) return variable - def __isub__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __isub__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__sub__(other) - def __mul__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __mul__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__mul__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="*", name=variable_name) return variable - def __imul__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __imul__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__mul__(other) - def __truediv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __truediv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__truediv__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="/", name=variable_name) return variable - def __itruediv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __itruediv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__truediv__(other) - def __floordiv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __floordiv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__floordiv__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="//", name=variable_name) return variable - def __ifloordiv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __ifloordiv__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__floordiv__(other) - def __mod__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __mod__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__mod__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="%", name=variable_name) return variable - def __imod__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __imod__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__mod__(other) - def __pow__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __pow__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore variable_name = self.name or UNNAMED_VARIABLE variable = Variable(super().__pow__(other), name=variable_name) variable.metadata = combine_variables_metadata(variables=[self, other], operation="**", name=variable_name) return variable - def __ipow__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": + def __ipow__(self, other: Union[Scalar, Series, "Variable"]) -> "Variable": # type: ignore return self.__pow__(other) def fillna(self, value=None, *args, **kwargs) -> "Variable": @@ -260,27 +260,27 @@ def dropna(self, *args, **kwargs) -> "Variable": ) return variable - def add(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": + def add(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": # type: ignore if args or kwargs: raise NotImplementedError("This feature may exist in pandas, but not in owid.catalog.") return self.__add__(other=other) - def sub(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": + def sub(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": # type: ignore if args or kwargs: raise NotImplementedError("This feature may exist in pandas, but not in owid.catalog.") return self.__sub__(other=other) - def mul(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": + def mul(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": # type: ignore if args or kwargs: raise NotImplementedError("This feature may exist in pandas, but not in owid.catalog.") return self.__mul__(other=other) - def truediv(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": + def truediv(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": # type: ignore if args or kwargs: raise NotImplementedError("This feature may exist in pandas, but not in owid.catalog.") return self.__truediv__(other=other) - def div(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": + def div(self, other: Union[Scalar, Series, "Variable"], *args, **kwargs) -> "Variable": # type: ignore return self.truediv(other=other, *args, **kwargs) def pct_change(self, *args, **kwargs) -> "Variable": @@ -320,7 +320,7 @@ def update_log( def rolling(self, *args, **kwargs) -> "VariableRolling": """Rolling operation that preserves metadata.""" - return VariableRolling(super().rolling(*args, **kwargs), self.metadata.copy(), self.name) + return VariableRolling(super().rolling(*args, **kwargs), self.metadata.copy(), self.name) # type: ignore def copy_metadata(self, from_variable: "Variable", inplace: bool = False) -> Optional["Variable"]: return copy_metadata(to_variable=self, from_variable=from_variable, inplace=inplace) # type: ignore diff --git a/lib/catalog/poetry.lock b/lib/catalog/poetry.lock index b6a3b440edc..d545179ebcb 100644 --- a/lib/catalog/poetry.lock +++ b/lib/catalog/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "argh" @@ -1376,13 +1376,13 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pyright" -version = "1.1.288" +version = "1.1.373" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.288-py3-none-any.whl", hash = "sha256:ab5da004e2de3b0567c685aa8d38bba68d872b1b4a20f1013400ace571a7efc7"}, - {file = "pyright-1.1.288.tar.gz", hash = "sha256:c45594c5833b01d5125bc291d2498d4ed0f2c2e3dd4fd8236fbdaf597099f617"}, + {file = "pyright-1.1.373-py3-none-any.whl", hash = "sha256:b805413227f2c209f27b14b55da27fe5e9fb84129c9f1eb27708a5d12f6f000e"}, + {file = "pyright-1.1.373.tar.gz", hash = "sha256:f41bcfc8b9d1802b09921a394d6ae1ce19694957b628bc657629688daf8a83ff"}, ] [package.dependencies] @@ -1481,7 +1481,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1489,16 +1488,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1515,7 +1506,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1523,7 +1513,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2008,4 +1997,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "11848a884ed393ee7234ffa9bf949d2a58db62cc7abfc36e527e7e2b0f519426" +content-hash = "46b18e901be825f0acf871b4a0c4b2d13cd1e1845837ccab6f7021349d830fa5" diff --git a/lib/catalog/pyproject.toml b/lib/catalog/pyproject.toml index d124d8c5415..0056787ce06 100644 --- a/lib/catalog/pyproject.toml +++ b/lib/catalog/pyproject.toml @@ -35,7 +35,7 @@ pytest-cov = ">=2.12.1" watchdog = ">=2.1.5" argh = ">=0.26.2" # unpinning those would introduce tons of type errors -pyright = "1.1.288" +pyright = "1.1.373" pandas-stubs = "1.2.0.62" ruff = "0.1.6" @@ -43,6 +43,9 @@ ruff = "0.1.6" extend = "../../pyproject.toml" [tool.pyright] +reportIncompatibleMethodOverride = "none" +reportOverlappingOverload = "none" + [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/lib/catalog/tests/mocking.py b/lib/catalog/tests/mocking.py index 6be8d42463e..cf42a213e5e 100644 --- a/lib/catalog/tests/mocking.py +++ b/lib/catalog/tests/mocking.py @@ -81,6 +81,12 @@ def mock(_type: type) -> Any: elif getattr(_type, "__origin__", None) == Literal: return random.choice(_type.__args__) # type: ignore + elif getattr(_type, "__origin__", None) == Union: + return mock(random.choice(_type.__args__)) + + elif _type is type(None): + return None + raise ValueError(f"don't know how to mock type: {_type}") diff --git a/lib/datautils/owid/datautils/dataframes.py b/lib/datautils/owid/datautils/dataframes.py index f47d139205b..8eac2d987a2 100644 --- a/lib/datautils/owid/datautils/dataframes.py +++ b/lib/datautils/owid/datautils/dataframes.py @@ -225,7 +225,7 @@ def are_equal( absolute_tolerance=absolute_tolerance, relative_tolerance=relative_tolerance, ) - all_values_equal = compared.all().all() + all_values_equal = compared.all().all() # type: ignore if not all_values_equal: summary += "\n* Values differ by more than the given absolute and relative" " tolerances." @@ -392,7 +392,7 @@ def multi_merge(dfs: List[pd.DataFrame], on: Union[List[str], str], how: str = " """ merged = dfs[0].copy() for df in dfs[1:]: - merged = pd.merge(merged, df, how=how, on=on) + merged = pd.merge(merged, df, how=how, on=on) # type: ignore return merged @@ -470,7 +470,7 @@ def map_series( missing = series_mapped.isnull() & (~series.isin(values_mapped_to_nan)) if missing.any(): # Replace those nans by their original values. - series_mapped.loc[missing] = series[missing] + series_mapped.loc[missing] = series[missing] # type: ignore[reportCallIssue] if warn_on_missing_mappings: unmapped = set(series) - set(mapping) diff --git a/lib/datautils/owid/datautils/google/api.py b/lib/datautils/owid/datautils/google/api.py index 03624995dec..981b5e65a2a 100644 --- a/lib/datautils/owid/datautils/google/api.py +++ b/lib/datautils/owid/datautils/google/api.py @@ -132,4 +132,4 @@ def list_files(self, parent_id: str) -> GoogleDriveFileList: request = f"'{parent_id}' in parents and trashed=false" # Get list of files files = self.drive.ListFile({"q": request}).GetList() - return files + return files # type: ignore[reportReturnType] diff --git a/lib/datautils/owid/datautils/google/sheets.py b/lib/datautils/owid/datautils/google/sheets.py index 0aafbce8078..b966d2a1676 100644 --- a/lib/datautils/owid/datautils/google/sheets.py +++ b/lib/datautils/owid/datautils/google/sheets.py @@ -54,8 +54,8 @@ def get(self, spreadsheet_id: str, worksheet_id: Optional[int] = None) -> Union[ """ ssheet = self.sheets.get(spreadsheet_id) if worksheet_id: - return ssheet.get(worksheet_id) - return ssheet + return ssheet.get(worksheet_id) # type: ignore[reportReturnType] + return ssheet # type: ignore[reportReturnType] def download_worksheet( self, @@ -84,7 +84,7 @@ def download_worksheet( """ sheet = self.get(spreadsheet_id, worksheet_id) if output_path: - sheet.to_csv(output_path, encoding=encoding, **kwargs) + sheet.to_csv(output_path, encoding=encoding, **kwargs) # type: ignore[reportCallIssue] else: make_filename = "%(title)s.csv" sheet.to_csv(make_filename=make_filename, encoding=encoding, **kwargs) @@ -129,5 +129,5 @@ def worksheet_to_df(self, spreadsheet_id: str, worksheet_id: int) -> pd.DataFram Dataframe with the data from the worksheet. """ ws = self.get(spreadsheet_id, worksheet_id) - df: pd.DataFrame = ws.to_frame() + df: pd.DataFrame = ws.to_frame() # type: ignore[reportAttributeAccessIssue] return df diff --git a/lib/datautils/poetry.lock b/lib/datautils/poetry.lock index 686d57c0fd0..3a531eb8504 100644 --- a/lib/datautils/poetry.lock +++ b/lib/datautils/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -2170,13 +2170,13 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pyright" -version = "1.1.288" +version = "1.1.373" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.288-py3-none-any.whl", hash = "sha256:ab5da004e2de3b0567c685aa8d38bba68d872b1b4a20f1013400ace571a7efc7"}, - {file = "pyright-1.1.288.tar.gz", hash = "sha256:c45594c5833b01d5125bc291d2498d4ed0f2c2e3dd4fd8236fbdaf597099f617"}, + {file = "pyright-1.1.373-py3-none-any.whl", hash = "sha256:b805413227f2c209f27b14b55da27fe5e9fb84129c9f1eb27708a5d12f6f000e"}, + {file = "pyright-1.1.373.tar.gz", hash = "sha256:f41bcfc8b9d1802b09921a394d6ae1ce19694957b628bc657629688daf8a83ff"}, ] [package.dependencies] @@ -2298,7 +2298,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2306,16 +2305,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2332,7 +2323,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2340,7 +2330,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2950,4 +2939,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "acbb4c176e2ce346657f271298da3ab55ebe27adce8ace9bb9e065eb56ffad9e" +content-hash = "4d481445b3eab71b784f4250893a07ac4fd9cb07efb6039899661c0f9f86d1bc" diff --git a/lib/datautils/pyproject.toml b/lib/datautils/pyproject.toml index b0c7b7bbdaa..87730e6a857 100644 --- a/lib/datautils/pyproject.toml +++ b/lib/datautils/pyproject.toml @@ -44,7 +44,7 @@ ipdb = ">=0.13.9" ipykernel = ">=6.13.0" openpyxl = ">=3.0.10" lxml = ">=4.9.1" -pyright = "1.1.288" +pyright = "1.1.373" ruff = "0.1.6" [tool.ruff] diff --git a/lib/datautils/tests/google/test_config.py b/lib/datautils/tests/google/test_config.py index 6ab64b81706..b1404869ea0 100644 --- a/lib/datautils/tests/google/test_config.py +++ b/lib/datautils/tests/google/test_config.py @@ -78,8 +78,8 @@ def test_google_config_init_error(): google_config_init(client_secrets_file) -@mock.patch.object(pydrive2.auth.GoogleAuth, "__init__", return_value=None) -@mock.patch.object(pydrive2.auth.GoogleAuth, "CommandLineAuth", return_value=None) +@mock.patch.object(pydrive2.auth.GoogleAuth, "__init__", return_value=None) # type: ignore[reportAttributeAccessIssue] +@mock.patch.object(pydrive2.auth.GoogleAuth, "CommandLineAuth", return_value=None) # type: ignore[reportAttributeAccessIssue] def test_google_config_init_1(mocker_google_1, mocker_google_2): # with tempfile.TemporaryDirectory() as config_dir: config_dir = next(tempfile._get_candidate_names()) # type: ignore diff --git a/lib/datautils/tests/google/test_sheets.py b/lib/datautils/tests/google/test_sheets.py index 6427f63e83b..8783e015be5 100644 --- a/lib/datautils/tests/google/test_sheets.py +++ b/lib/datautils/tests/google/test_sheets.py @@ -55,7 +55,7 @@ def test_init(self, mock_init): def test_sheets(self, mock_init, mock_sheets_from_files): api = GSheetsApi(self.clients_secrets, self.credentials_path) _ = api.sheets - assert api.sheets.name == "hello" + assert api.sheets.name == "hello" # type: ignore[reportAttributeAccessIssue] @mock.patch("gsheets.Sheets.from_files", side_effect=MockSheets.from_files) @mock.patch("gsheets.Sheets.get", side_effect=MockSheets.get) diff --git a/lib/datautils/tests/io/test_df.py b/lib/datautils/tests/io/test_df.py index 2b255c2784a..7e8f29bf280 100644 --- a/lib/datautils/tests/io/test_df.py +++ b/lib/datautils/tests/io/test_df.py @@ -13,7 +13,7 @@ class TestLoadDf: - df_original = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) + df_original = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) # type: ignore def test_from_file_basic(self, tmpdir): output_methods = { diff --git a/lib/repack/poetry.lock b/lib/repack/poetry.lock index 2a2d760f204..017190a82ca 100644 --- a/lib/repack/poetry.lock +++ b/lib/repack/poetry.lock @@ -193,13 +193,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pyright" -version = "1.1.365" +version = "1.1.373" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.365-py3-none-any.whl", hash = "sha256:194d767a039f9034376b7ec8423841880ac6efdd061f3e283b4ad9fcd484a659"}, - {file = "pyright-1.1.365.tar.gz", hash = "sha256:d7e69000939aed4bf823707086c30c84c005bdd39fac2dfb370f0e5be16c2ef2"}, + {file = "pyright-1.1.373-py3-none-any.whl", hash = "sha256:b805413227f2c209f27b14b55da27fe5e9fb84129c9f1eb27708a5d12f6f000e"}, + {file = "pyright-1.1.373.tar.gz", hash = "sha256:f41bcfc8b9d1802b09921a394d6ae1ce19694957b628bc657629688daf8a83ff"}, ] [package.dependencies] @@ -318,4 +318,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9" -content-hash = "529eb9776d8049cf0a0998146761d7b0c9afc679b56525d7c57fcdd5feeccc1b" +content-hash = "eb08f539f8afc33c41296f42099951825103f79718f7987377ab91915a30d170" diff --git a/lib/repack/pyproject.toml b/lib/repack/pyproject.toml index dd1b6f1fee3..b26a7424933 100644 --- a/lib/repack/pyproject.toml +++ b/lib/repack/pyproject.toml @@ -16,7 +16,7 @@ pandas = "^2.2.1" [tool.poetry.group.dev.dependencies] pytest = ">=7.2.0" -pyright = ">=1.1.285" +pyright = "1.1.373" ruff = "0.1.6" [tool.ruff] diff --git a/poetry.lock b/poetry.lock index c3b2d40cd50..1cb91ceb593 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "affine" @@ -5347,13 +5347,13 @@ certifi = "*" [[package]] name = "pyright" -version = "1.1.288" +version = "1.1.373" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.288-py3-none-any.whl", hash = "sha256:ab5da004e2de3b0567c685aa8d38bba68d872b1b4a20f1013400ace571a7efc7"}, - {file = "pyright-1.1.288.tar.gz", hash = "sha256:c45594c5833b01d5125bc291d2498d4ed0f2c2e3dd4fd8236fbdaf597099f617"}, + {file = "pyright-1.1.373-py3-none-any.whl", hash = "sha256:b805413227f2c209f27b14b55da27fe5e9fb84129c9f1eb27708a5d12f6f000e"}, + {file = "pyright-1.1.373.tar.gz", hash = "sha256:f41bcfc8b9d1802b09921a394d6ae1ce19694957b628bc657629688daf8a83ff"}, ] [package.dependencies] @@ -6992,7 +6992,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} typing-extensions = ">=4.6.0" [package.extras] @@ -8375,4 +8375,4 @@ test = ["pytest", "pytest-cov"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.12" -content-hash = "34ba37a861e229ba8035b2e6be9d61fc1c17fdfb153c82228dc92d73b36e37e9" +content-hash = "504c3f7b7b0dfaddd6112254c3733102bbd488a7a92df5ba60b1ed99090da07d" diff --git a/pyproject.toml b/pyproject.toml index 16691c5fc7e..e115ba0fc51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,7 @@ hydra-core = ">=1.3.0" boto3-stubs = {version = "^1.34.154", extras = ["s3"]} gspread = "^5.10.0" # unpinning those would introduce tons of type errors -pyright = "1.1.288" +pyright = "1.1.373" pandas-stubs = "1.2.0.62" jsonref = "^1.1.0" mkdocs-jupyter = "^0.24.2" @@ -153,6 +153,8 @@ exclude = [ "playground/", ".cachedir/", ".cache/", + "etl/steps", + "snapshots/", ] [tool.pytest.ini_options] diff --git a/scripts/add_ignore_pyright.py b/scripts/add_ignore_pyright.py new file mode 100755 index 00000000000..a6fb8c8a173 --- /dev/null +++ b/scripts/add_ignore_pyright.py @@ -0,0 +1,40 @@ +import json +import sys + +"""Usage: + +poetry run pyright owid tests --outputjson | python ../../scripts/add_ignore_pyright.py +""" + + +def add_type_ignore_to_lines(file_path, line_number, rule): + """Reads a file, adds # type: ignore to the specified line, and writes it back.""" + with open(file_path, "r") as file: + lines = file.readlines() + + # We need to insert # type: ignore[rule] at the end of the line + target_line = lines[line_number - 1].rstrip() # Pyright uses 1-based indexing for lines + if "# type: ignore" not in target_line: + lines[line_number - 1] = f"{target_line} # type: ignore[{rule}]\n" + + with open(file_path, "w") as file: + file.writelines(lines) + + +def process_pyright_json_from_pipe(): + """Reads JSON input from stdin (pipe) and adds # type: ignore for errors.""" + data = json.load(sys.stdin) # Reading from stdin + + for diagnostic in data.get("generalDiagnostics", []): + file_path = diagnostic["file"] + line_number = diagnostic["range"]["start"]["line"] + 1 # Pyright uses 0-based index + rule = diagnostic["rule"] + print(f"Adding # type: ignore[{rule}] to {file_path} on line {line_number}") + + # Add the # type: ignore comment to the corresponding file and line + add_type_ignore_to_lines(file_path, line_number, rule) + + +# Entry point for reading from stdin +if __name__ == "__main__": + process_pyright_json_from_pipe() diff --git a/snapshots/artificial_intelligence/2023-06-14/ai_private_investment.py b/snapshots/artificial_intelligence/2023-06-14/ai_private_investment.py index 98593d34d0c..dd732c9735b 100644 --- a/snapshots/artificial_intelligence/2023-06-14/ai_private_investment.py +++ b/snapshots/artificial_intelligence/2023-06-14/ai_private_investment.py @@ -23,7 +23,7 @@ def main(upload: bool) -> None: # Create a new snapshot. snap = Snapshot(f"artificial_intelligence/{SNAPSHOT_VERSION}/ai_private_investment.csv") all_dfs = get_data() - df_to_file(all_dfs, file_path=snap.path) + df_to_file(all_dfs, file_path=snap.path) # type: ignore[reportArgumentType] # Add file to DVC and upload to S3. snap.dvc_add(upload=upload) diff --git a/snapshots/biodiversity/2023-08-14/iucn_animal.py b/snapshots/biodiversity/2023-08-14/iucn_animal.py index 3178e0a69cb..b410e2ded0a 100644 --- a/snapshots/biodiversity/2023-08-14/iucn_animal.py +++ b/snapshots/biodiversity/2023-08-14/iucn_animal.py @@ -41,7 +41,7 @@ def main(upload: bool) -> None: driver = webdriver.Chrome(options=chrome_options) if snap.metadata.source is not None: # Only access "url" if snap.metadata.source is not None - driver.get(snap.metadata.source.url) + driver.get(snap.metadata.source.url) # type: ignore[reportArgumentType] else: # Handle the case when snap.metadata.source is None print("snap.metadata.source is None, cannot access 'url'.") diff --git a/snapshots/cardiovascular_diseases/2023-10-10/esc.py b/snapshots/cardiovascular_diseases/2023-10-10/esc.py index 4392bb8a5e8..3481fb49975 100644 --- a/snapshots/cardiovascular_diseases/2023-10-10/esc.py +++ b/snapshots/cardiovascular_diseases/2023-10-10/esc.py @@ -68,7 +68,7 @@ def main(upload: bool) -> None: dfs.append(df) all_dfs = pd.concat(dfs, ignore_index=True) - df_to_file(all_dfs, file_path=snap.path) + df_to_file(all_dfs, file_path=snap.path) # type: ignore[reportArgumentType] # Download data from source, add file to DVC and upload to S3. snap.dvc_add(upload=upload) diff --git a/snapshots/climate/2024-01-31/climate_change_impacts.py b/snapshots/climate/2024-01-31/climate_change_impacts.py index 61063ec48b8..f406eac8dbf 100644 --- a/snapshots/climate/2024-01-31/climate_change_impacts.py +++ b/snapshots/climate/2024-01-31/climate_change_impacts.py @@ -129,7 +129,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "Last updated" in line: @@ -148,7 +148,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "File Creation" in line: diff --git a/snapshots/climate/2024-02-22/weekly_wildfires_2003_2023.py b/snapshots/climate/2024-02-22/weekly_wildfires_2003_2023.py index 825dd0017dc..dbe6fd3f990 100644 --- a/snapshots/climate/2024-02-22/weekly_wildfires_2003_2023.py +++ b/snapshots/climate/2024-02-22/weekly_wildfires_2003_2023.py @@ -164,7 +164,7 @@ def main(upload: bool) -> None: # Combine both fires and emissions data into a final DataFrame. df_final = pd.concat([dfs_fires, dfs_emissions]) # Save the final DataFrame to the specified file path in the snapshot. - df_to_file(df_final, file_path=snap.path) + df_to_file(df_final, file_path=snap.path) # type: ignore[reportArgumentType] # Add the file to DVC and optionally upload it to S3, based on the `upload` parameter. snap.dvc_add(upload=upload) diff --git a/snapshots/climate/2024-03-11/climate_change_impacts.py b/snapshots/climate/2024-03-11/climate_change_impacts.py index 61063ec48b8..f406eac8dbf 100644 --- a/snapshots/climate/2024-03-11/climate_change_impacts.py +++ b/snapshots/climate/2024-03-11/climate_change_impacts.py @@ -129,7 +129,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "Last updated" in line: @@ -148,7 +148,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "File Creation" in line: diff --git a/snapshots/climate/2024-04-17/climate_change_impacts.py b/snapshots/climate/2024-04-17/climate_change_impacts.py index 61063ec48b8..f406eac8dbf 100644 --- a/snapshots/climate/2024-04-17/climate_change_impacts.py +++ b/snapshots/climate/2024-04-17/climate_change_impacts.py @@ -129,7 +129,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "Last updated" in line: @@ -148,7 +148,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "File Creation" in line: diff --git a/snapshots/climate/2024-05-20/climate_change_impacts.py b/snapshots/climate/2024-05-20/climate_change_impacts.py index 61063ec48b8..f406eac8dbf 100644 --- a/snapshots/climate/2024-05-20/climate_change_impacts.py +++ b/snapshots/climate/2024-05-20/climate_change_impacts.py @@ -129,7 +129,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "Last updated" in line: @@ -148,7 +148,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "File Creation" in line: diff --git a/snapshots/climate/2024-07-23/climate_change_impacts.py b/snapshots/climate/2024-07-23/climate_change_impacts.py index 61063ec48b8..f406eac8dbf 100644 --- a/snapshots/climate/2024-07-23/climate_change_impacts.py +++ b/snapshots/climate/2024-07-23/climate_change_impacts.py @@ -129,7 +129,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "Last updated" in line: @@ -148,7 +148,7 @@ def find_date_published(snap: Snapshot) -> Optional[str]: # Extract text from data file. url = snap.metadata.origin.url_download # type: ignore - response = requests.get(url) + response = requests.get(url) # type: ignore[reportArgumentType] for line in response.text.split("\n"): # At the top of the file, there is a line like "Last updated 11 December 2023 by J.E. Dore". if "File Creation" in line: diff --git a/snapshots/climate/latest/weekly_wildfires.py b/snapshots/climate/latest/weekly_wildfires.py index c4261e3537d..3ee9c51ba59 100644 --- a/snapshots/climate/latest/weekly_wildfires.py +++ b/snapshots/climate/latest/weekly_wildfires.py @@ -166,7 +166,7 @@ def main(upload: bool) -> None: # Combine both fires and emissions data into a final DataFrame. df_final = pd.concat([dfs_fires, dfs_emissions]) # Save the final DataFrame to the specified file path in the snapshot. - df_to_file(df_final, file_path=snap.path) + df_to_file(df_final, file_path=snap.path) # type: ignore[reportArgumentType] # Add date_accessed snap = modify_metadata(snap) diff --git a/snapshots/forests/2024-07-10/tree_cover_loss_by_driver.py b/snapshots/forests/2024-07-10/tree_cover_loss_by_driver.py index b106c7b7e82..50639f7a73a 100644 --- a/snapshots/forests/2024-07-10/tree_cover_loss_by_driver.py +++ b/snapshots/forests/2024-07-10/tree_cover_loss_by_driver.py @@ -48,7 +48,7 @@ def main(upload: bool) -> None: tb = pd.concat(tables) # Download data from source, add file to DVC and upload to S3. - snap.create_snapshot(upload=upload, data=tb) + snap.create_snapshot(upload=upload, data=tb) # type: ignore[reportArgumentType] if __name__ == "__main__": diff --git a/snapshots/health/2023-08-09/unaids.py b/snapshots/health/2023-08-09/unaids.py index e7b75a2a985..ad3091695df 100644 --- a/snapshots/health/2023-08-09/unaids.py +++ b/snapshots/health/2023-08-09/unaids.py @@ -98,7 +98,7 @@ def get_all_data_from_api(path: str) -> pd.DataFrame: # Define requests session session = requests.Session() - retry = Retry(connect=3, backoff_factor=0.5) + retry = Retry(connect=3, backoff_factor=0.5) # type: ignore[reportArgumentType] adapter = HTTPAdapter(max_retries=retry) session.mount("http://", adapter) session.mount("https://", adapter) diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_cause.py b/snapshots/ihme_gbd/2024-05-20/gbd_cause.py index da248df1477..ab60d4d31f8 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_cause.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_cause.py @@ -25,7 +25,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_child_mortality.py b/snapshots/ihme_gbd/2024-05-20/gbd_child_mortality.py index d81b11ad59a..d669455d64d 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_child_mortality.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_child_mortality.py @@ -43,7 +43,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_drug_risk.py b/snapshots/ihme_gbd/2024-05-20/gbd_drug_risk.py index b97f1881de5..4363b21e6a5 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_drug_risk.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_drug_risk.py @@ -34,7 +34,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_mental_health.py b/snapshots/ihme_gbd/2024-05-20/gbd_mental_health.py index 17817618cbf..9ee4afb93bd 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_mental_health.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_mental_health.py @@ -24,7 +24,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_mental_health_burden.py b/snapshots/ihme_gbd/2024-05-20/gbd_mental_health_burden.py index 636199b2174..510b37bea82 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_mental_health_burden.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_mental_health_burden.py @@ -24,7 +24,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_prevalence.py b/snapshots/ihme_gbd/2024-05-20/gbd_prevalence.py index 2956d6e3c5a..da6ab0f5cc8 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_prevalence.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_prevalence.py @@ -24,7 +24,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/gbd_risk.py b/snapshots/ihme_gbd/2024-05-20/gbd_risk.py index 169c8a74246..4898b4e17da 100644 --- a/snapshots/ihme_gbd/2024-05-20/gbd_risk.py +++ b/snapshots/ihme_gbd/2024-05-20/gbd_risk.py @@ -37,7 +37,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/ihme_gbd/2024-05-20/impairments.py b/snapshots/ihme_gbd/2024-05-20/impairments.py index 5a75bdf3d0a..d73d0442231 100644 --- a/snapshots/ihme_gbd/2024-05-20/impairments.py +++ b/snapshots/ihme_gbd/2024-05-20/impairments.py @@ -27,7 +27,7 @@ import pandas as pd from owid.datautils.dataframes import concatenate from owid.repack import repack_frame -from shared import download_data +from shared import download_data # type: ignore[reportMissingImports] from structlog import get_logger from etl.snapshot import Snapshot diff --git a/snapshots/news/2024-05-07/guardian_mentions.py b/snapshots/news/2024-05-07/guardian_mentions.py index 91d6382a2a7..17fcdd0d858 100644 --- a/snapshots/news/2024-05-07/guardian_mentions.py +++ b/snapshots/news/2024-05-07/guardian_mentions.py @@ -707,7 +707,7 @@ def get_country_name_variations(country_names: Optional[Set[str]] = None): "United Kingdom": "UK", "United States": "US", } - name_variations = {country_names_guardian.get(c, c): names for c, names in name_variations.items()} + name_variations = {country_names_guardian.get(c, c): names for c, names in name_variations.items()} # type: ignore[reportCallIssue] # Sort names_sorted = sorted(name_variations) # type: ignore diff --git a/snapshots/space/2024-01-04/object_launches.py b/snapshots/space/2024-01-04/object_launches.py index c7806df16a6..7454c653a50 100644 --- a/snapshots/space/2024-01-04/object_launches.py +++ b/snapshots/space/2024-01-04/object_launches.py @@ -54,7 +54,7 @@ def main(upload: bool) -> None: assert len(data) == n # Add file to DVC and upload to S3. - snap.create_snapshot(data=data, upload=upload) + snap.create_snapshot(data=data, upload=upload) # type: ignore[reportArgumentType] if __name__ == "__main__": diff --git a/snapshots/statins/2023-10-05/bmj_2022.py b/snapshots/statins/2023-10-05/bmj_2022.py index 6ee3ed9e801..0a5230df6c5 100644 --- a/snapshots/statins/2023-10-05/bmj_2022.py +++ b/snapshots/statins/2023-10-05/bmj_2022.py @@ -39,7 +39,7 @@ def main(upload: bool) -> None: # Attempt to fetch data from the source URL. if snap.metadata.origin is not None: - response = requests.get(snap.metadata.origin.url_download, headers=headers) + response = requests.get(snap.metadata.origin.url_download, headers=headers) # type: ignore[reportArgumentType] # Proceed only if the request was successful (HTTP Status Code 200). if response.status_code == 200: diff --git a/snapshots/statins/2023-10-05/lancet_2022.py b/snapshots/statins/2023-10-05/lancet_2022.py index ee1f8216f1d..8cd5426610c 100644 --- a/snapshots/statins/2023-10-05/lancet_2022.py +++ b/snapshots/statins/2023-10-05/lancet_2022.py @@ -36,7 +36,7 @@ def main(upload: bool) -> None: # Attempt to fetch data from the source URL. if snap.metadata.origin is not None: - response = requests.get(snap.metadata.origin.url_download, headers=headers) + response = requests.get(snap.metadata.origin.url_download, headers=headers) # type: ignore[reportArgumentType] # Proceed only if the request was successful (HTTP Status Code 200). if response.status_code == 200: diff --git a/snapshots/usgs/2024-07-15/historical_statistics_for_mineral_and_material_commodities.py b/snapshots/usgs/2024-07-15/historical_statistics_for_mineral_and_material_commodities.py index 854dc10b7e2..911e9df6b16 100644 --- a/snapshots/usgs/2024-07-15/historical_statistics_for_mineral_and_material_commodities.py +++ b/snapshots/usgs/2024-07-15/historical_statistics_for_mineral_and_material_commodities.py @@ -132,12 +132,14 @@ def download_all_files(df: pd.DataFrame, snapshot_path: Path) -> None: for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Downloading files"): if row["supply_demand_url"] != "NA": download_file_from_url( - url=row["supply_demand_url"], local_path=supply_demand_dir / f"{underscore(row['commodity'])}.xlsx" + url=row["supply_demand_url"], + local_path=supply_demand_dir / f"{underscore(row['commodity'])}.xlsx", # type: ignore[reportArgumentType] ) if row["end_use_url"] != "NA": download_file_from_url( - url=row["end_use_url"], local_path=end_use_dir / f"{underscore(row['commodity'])}.xlsx" + url=row["end_use_url"], + local_path=end_use_dir / f"{underscore(row['commodity'])}.xlsx", # type: ignore[reportArgumentType] ) # Create the zip file at the snapshot path. diff --git a/snapshots/wb/2023-09-19/us_cpi.py b/snapshots/wb/2023-09-19/us_cpi.py index 85e932b71ea..5002fe9d18a 100644 --- a/snapshots/wb/2023-09-19/us_cpi.py +++ b/snapshots/wb/2023-09-19/us_cpi.py @@ -22,7 +22,7 @@ def main(upload: bool) -> None: url = snap.metadata.origin.url_download # type: ignore df = import_US_cpi_API(url) # Save the resulting dataframe to a single csv file - df_to_file(df, file_path=snap.path) + df_to_file(df, file_path=snap.path) # type: ignore[reportArgumentType] # Add the snapshot to DVC snap.dvc_add(upload=upload) diff --git a/snapshots/wb/2024-01-17/pip_api.py b/snapshots/wb/2024-01-17/pip_api.py index 31a81deb553..c2cace2143b 100644 --- a/snapshots/wb/2024-01-17/pip_api.py +++ b/snapshots/wb/2024-01-17/pip_api.py @@ -41,7 +41,7 @@ import requests from botocore.exceptions import ClientError from joblib import Memory -from owid.catalog import connect_r2_cached +from owid.catalog import connect_r2_cached # type: ignore[reportAttributeAccessIssue] from structlog import get_logger from tenacity import retry from tenacity.stop import stop_after_attempt diff --git a/snapshots/wb/2024-03-26/food_prices_for_nutrition.py b/snapshots/wb/2024-03-26/food_prices_for_nutrition.py index 20c8ea3d804..15f477867cc 100644 --- a/snapshots/wb/2024-03-26/food_prices_for_nutrition.py +++ b/snapshots/wb/2024-03-26/food_prices_for_nutrition.py @@ -33,7 +33,7 @@ def main(upload: bool) -> None: # Load data for each variable. # Get data for all variables one by one. - data = [] + data: list[pd.DataFrame] = [] # Note: This takes a few minutes and could possibly be parallelized. for variable in tqdm(variables.items): # Load data for current variable and add it to the list of all dataframes. diff --git a/snapshots/wb/2024-03-27/pip_api.py b/snapshots/wb/2024-03-27/pip_api.py index 862d218ecc8..c231e912907 100644 --- a/snapshots/wb/2024-03-27/pip_api.py +++ b/snapshots/wb/2024-03-27/pip_api.py @@ -258,7 +258,7 @@ def _fetch_csv(url: str) -> pd.DataFrame: # try to get it from cache try: - obj = r2.get_object(Bucket=r2_bucket, Key=r2_key) + obj = r2.get_object(Bucket=r2_bucket, Key=r2_key) # type: ignore[reportAttributeAccessIssue] s = obj["Body"].read().decode("utf-8") # we might have cached invalid responses, in that case fetch it again if "Server Error" not in s: @@ -275,7 +275,7 @@ def _fetch_csv(url: str) -> pd.DataFrame: log.info("fetch_csv.success", url=url, t=response.elapsed.total_seconds()) # save the result to R2 cache - r2.put_object( + r2.put_object( # type: ignore[reportAttributeAccessIssue] Body=response.content, Bucket=r2_bucket, Key=r2_key, diff --git a/snapshots/who/2022-09-30/ghe.py b/snapshots/who/2022-09-30/ghe.py index 52106be4778..49a0ae0372f 100644 --- a/snapshots/who/2022-09-30/ghe.py +++ b/snapshots/who/2022-09-30/ghe.py @@ -66,7 +66,7 @@ def download_cause_data(causes) -> pd.DataFrame: all_df = pd.concat(all_data) all_df = all_df.reset_index() - return repack_frame(all_df) + return repack_frame(all_df) # type: ignore[reportArgumentType] if __name__ == "__main__": diff --git a/snapshots/who/2024-07-30/ghe.py b/snapshots/who/2024-07-30/ghe.py index 467742ca26a..e0ef674f57f 100644 --- a/snapshots/who/2024-07-30/ghe.py +++ b/snapshots/who/2024-07-30/ghe.py @@ -112,7 +112,7 @@ def download_cause_data(causes) -> pd.DataFrame: all_df = pd.concat(all_data) all_df = all_df.reset_index() - return repack_frame(all_df) + return repack_frame(all_df) # type: ignore[reportArgumentType] if __name__ == "__main__": diff --git a/tests/data_helpers/test_geo.py b/tests/data_helpers/test_geo.py index 3f9aed78550..637cd6e6d38 100644 --- a/tests/data_helpers/test_geo.py +++ b/tests/data_helpers/test_geo.py @@ -5,11 +5,12 @@ import json import unittest import warnings +from typing import cast from unittest.mock import mock_open, patch import numpy as np import pandas as pd -from owid.catalog import Table +from owid.catalog import Dataset, Table from owid.datautils import dataframes from pytest import warns from structlog.testing import capture_logs @@ -851,10 +852,12 @@ def __getitem__(self, name: str) -> Table: return mock_tb_income_groups elif name == "income_groups_latest": return mock_tb_income_groups_latest + else: + raise KeyError(f"Table {name} not found.") -ds_regions = MockRegionsDataset() -ds_income_groups = MockIncomeGroupsDataset() +ds_regions = cast(Dataset, MockRegionsDataset()) +ds_income_groups = cast(Dataset, MockIncomeGroupsDataset()) class TestAddRegionsToTable(unittest.TestCase):