From 3ce39a14252743178112ef0458009e02689fba5f Mon Sep 17 00:00:00 2001 From: Stefan Krawczyk Date: Mon, 19 Aug 2024 17:48:56 -0700 Subject: [PATCH] Completes B letter flake8-bugbear rules (#1099) * Completes Ruff B letter flake8-bugbear rules Refactors to take this rule into account. --- .../skrawcz/customize_embeddings/__init__.py | 2 +- .../user/zilto/lancedb_vdb/__init__.py | 4 +-- .../zilto/nixtla_statsforecast/__init__.py | 4 +-- .../contrib/user/zilto/webscraper/__init__.py | 4 +-- .../user/zilto/xgboost_optuna/__init__.py | 4 ++- contrib/setup.py | 4 +-- .../LLM_Workflows/GraphRAG/ingest_fighters.py | 2 +- .../image_telephone/streamlit.py | 3 +- .../knowledge_retrieval/state.py | 2 +- .../knowledge_retrieval/summarize_text.py | 2 +- .../backend/server.py | 2 +- .../spark/doc_pipeline.py | 4 +-- .../spark/spark_pipeline.py | 4 ++- .../dagster/dagster_code/tutorial/assets.py | 2 +- .../tutorial/resources/__init__.py | 2 +- examples/dagster/hamilton_code/dataflow.py | 2 +- examples/dagster/hamilton_code/mock_api.py | 2 +- examples/decoupling_io/adapters.py | 4 +-- examples/dlt/slack/__init__.py | 28 +++++++++++-------- .../probability_estimation.py | 5 ++-- examples/people_data_labs/analysis.py | 2 +- examples/prefect/run.py | 4 +-- .../zone_features__spark_v1.py | 7 ++--- hamilton/cli/__main__.py | 6 ++-- hamilton/cli/logic.py | 10 +++---- hamilton/dataflows/__init__.py | 4 +-- hamilton/execution/executors.py | 2 +- hamilton/execution/state.py | 6 ++-- .../decorators/parameterize_frame.py | 2 +- hamilton/function_modifiers/adapters.py | 2 +- hamilton/function_modifiers/expanders.py | 14 +++++----- hamilton/function_modifiers/recursive.py | 14 +++++----- hamilton/graph.py | 2 +- hamilton/htypes.py | 2 +- hamilton/io/materialization.py | 2 +- hamilton/lifecycle/default.py | 2 +- hamilton/plugins/dask_extensions.py | 4 +-- hamilton/plugins/dlt_extensions.py | 2 +- hamilton/plugins/geopandas_extensions.py | 4 +-- hamilton/plugins/h_experiments/__main__.py | 4 +-- hamilton/plugins/h_schema.py | 4 +-- hamilton/plugins/h_spark.py | 8 +++--- hamilton/plugins/h_vaex.py | 6 ++-- hamilton/plugins/huggingface_extensions.py | 4 +-- hamilton/plugins/ibis_extensions.py | 4 +-- hamilton/plugins/lightgbm_extensions.py | 4 +-- hamilton/plugins/matplotlib_extensions.py | 4 +-- hamilton/plugins/mlflow_extensions.py | 12 ++++---- hamilton/plugins/numpy_extensions.py | 4 +-- hamilton/plugins/pandas_extensions.py | 4 +-- hamilton/plugins/plotly_extensions.py | 4 +-- hamilton/plugins/polars_extensions.py | 4 +-- .../plugins/polars_lazyframe_extensions.py | 4 +-- .../plugins/polars_pre_1_0_0_extension.py | 4 +-- hamilton/plugins/pyspark_pandas_extensions.py | 4 +-- hamilton/plugins/sklearn_plot_extensions.py | 4 +-- hamilton/plugins/spark_extensions.py | 4 +-- hamilton/plugins/vaex_extensions.py | 4 +-- hamilton/plugins/xgboost_extensions.py | 4 +-- hamilton/plugins/yaml_extensions.py | 4 +-- hamilton/registry.py | 2 +- hamilton/telemetry.py | 2 +- pyproject.toml | 2 +- setup.py | 2 +- tests/test_ad_hoc_utils.py | 2 +- tests/test_node.py | 2 +- .../tests/test_lifecycle/test_templates.py | 2 +- .../server/trackingserver_projects/api.py | 2 +- .../server/trackingserver_run_tracking/api.py | 6 ++-- 69 files changed, 151 insertions(+), 146 deletions(-) diff --git a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py index 5054ebefa..fb301ea02 100644 --- a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py +++ b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py @@ -262,7 +262,7 @@ def construct_df( negatives_per_positive: int = 1, random_seed: int = 123, ) -> pd.DataFrame: - f"""Return dataframe of {base_df} paris with negatives added.""" + """Return dataframe of {base_df} paris with negatives added.""" return pd.concat( [ base_df, diff --git a/contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py b/contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py index cb85698d3..1367c03bd 100644 --- a/contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py +++ b/contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py @@ -59,9 +59,9 @@ def table_ref( try: table = client.open_table(table_name) - except FileNotFoundError: + except FileNotFoundError as e: if schema is None: - raise ValueError("`schema` must be provided to create table.") + raise ValueError("`schema` must be provided to create table.") from e table = _create_table( client=client, diff --git a/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/__init__.py b/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/__init__.py index c12ea651b..97b4d9b63 100644 --- a/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/__init__.py +++ b/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/__init__.py @@ -125,7 +125,7 @@ def best_model_per_series(cross_validation_evaluation: pd.DataFrame) -> pd.Serie def inference_predictions( forecaster: StatsForecast, inference_forecast_steps: int = 12, - inference_confidence_percentile: list[float] = [90.0], + inference_confidence_percentile: list[float] = [90.0], # noqa: B006 ) -> pd.DataFrame: """Infer values using the training harness. Fitted models aren't stored @@ -141,7 +141,7 @@ def plotting_config( plot_uids: Optional[list[str]] = None, plot_models: Optional[list[str]] = None, plot_anomalies: bool = False, - plot_confidence_percentile: list[float] = [90.0], + plot_confidence_percentile: list[float] = [90.0], # noqa: B006 plot_engine: str = "matplotlib", ) -> dict: """Configuration for plotting functions""" diff --git a/contrib/hamilton/contrib/user/zilto/webscraper/__init__.py b/contrib/hamilton/contrib/user/zilto/webscraper/__init__.py index 82be60952..6677221ef 100644 --- a/contrib/hamilton/contrib/user/zilto/webscraper/__init__.py +++ b/contrib/hamilton/contrib/user/zilto/webscraper/__init__.py @@ -54,8 +54,8 @@ def html_page(url: str) -> str: def parsed_html( url: str, html_page: str, - tags_to_extract: List[str] = ["p", "li", "div"], - tags_to_remove: List[str] = ["script", "style"], + tags_to_extract: List[str] = ["p", "li", "div"], # noqa: B006 + tags_to_remove: List[str] = ["script", "style"], # noqa: B006 ) -> ParsingResult: """Parse an HTML string using BeautifulSoup diff --git a/contrib/hamilton/contrib/user/zilto/xgboost_optuna/__init__.py b/contrib/hamilton/contrib/user/zilto/xgboost_optuna/__init__.py index 0740b92db..0ac4319ad 100644 --- a/contrib/hamilton/contrib/user/zilto/xgboost_optuna/__init__.py +++ b/contrib/hamilton/contrib/user/zilto/xgboost_optuna/__init__.py @@ -133,7 +133,7 @@ def cross_validation_folds( def study( higher_is_better: bool, - pruner: Optional[optuna.pruners.BasePruner] = optuna.pruners.MedianPruner(), + pruner: Optional[optuna.pruners.BasePruner] = None, sampler: Optional[optuna.samplers.BaseSampler] = None, study_storage: Optional[str] = None, study_name: Optional[str] = None, @@ -142,6 +142,8 @@ def study( """Create an optuna study; use the XGBoost + Optuna integration for pruning ref: https://github.com/optuna/optuna-examples/blob/main/xgboost/xgboost_integration.py """ + if pruner is None: + pruner = optuna.pruners.MedianPruner() return optuna.create_study( direction="maximize" if higher_is_better else "minimize", pruner=pruner, diff --git a/contrib/setup.py b/contrib/setup.py index a8632890b..b7c4fe9e5 100644 --- a/contrib/setup.py +++ b/contrib/setup.py @@ -10,8 +10,8 @@ try: with open("README.md") as readme_file: readme = readme_file.read() -except Exception: - warnings.warn("README.md not found") +except FileNotFoundError: + warnings.warn("README.md not found") # noqa readme = None REQUIREMENTS_FILES = ["requirements.txt"] diff --git a/examples/LLM_Workflows/GraphRAG/ingest_fighters.py b/examples/LLM_Workflows/GraphRAG/ingest_fighters.py index 0cefdf89a..389095205 100644 --- a/examples/LLM_Workflows/GraphRAG/ingest_fighters.py +++ b/examples/LLM_Workflows/GraphRAG/ingest_fighters.py @@ -17,7 +17,7 @@ def raw_fighter_details() -> pd.DataFrame: def fighter(raw_fighter_details: pd.DataFrame) -> Parallelizable[pd.Series]: """We then want to do something for each record. That's what this code sets up""" - for idx, row in raw_fighter_details.iterrows(): + for _, row in raw_fighter_details.iterrows(): yield row diff --git a/examples/LLM_Workflows/image_telephone/streamlit.py b/examples/LLM_Workflows/image_telephone/streamlit.py index a03405756..59fd40293 100644 --- a/examples/LLM_Workflows/image_telephone/streamlit.py +++ b/examples/LLM_Workflows/image_telephone/streamlit.py @@ -403,8 +403,7 @@ def explore_display(): image_urls_to_display = image_urls[0 : len(projection)] if len(image_urls_to_display) != len(projection): image_url_length = len(image_urls_to_display) - for i in range(len(projection) - len(image_urls_to_display)): - image_urls_to_display.append(image_urls[image_url_length - 1]) + image_urls_to_display.append(image_urls[image_url_length - 1]) embedding_path_plot(projection, image_urls_to_display, selected_entry, prompt_path) # highlight_point(projection, selected_entry) diff --git a/examples/LLM_Workflows/knowledge_retrieval/state.py b/examples/LLM_Workflows/knowledge_retrieval/state.py index a36bcad55..ad177d196 100644 --- a/examples/LLM_Workflows/knowledge_retrieval/state.py +++ b/examples/LLM_Workflows/knowledge_retrieval/state.py @@ -137,7 +137,7 @@ def call_arxiv_function(messages, full_message): return response except Exception as e: logger.error(type(e)) - raise Exception("Function chat request failed") + raise Exception("Function chat request failed") from e elif full_message["message"]["function_call"]["name"] == "read_article_and_summarize": parsed_output = json.loads(full_message["message"]["function_call"]["arguments"]) diff --git a/examples/LLM_Workflows/knowledge_retrieval/summarize_text.py b/examples/LLM_Workflows/knowledge_retrieval/summarize_text.py index e86d07b7c..580756a49 100644 --- a/examples/LLM_Workflows/knowledge_retrieval/summarize_text.py +++ b/examples/LLM_Workflows/knowledge_retrieval/summarize_text.py @@ -56,7 +56,7 @@ def pdf_text(pdf_path: pd.Series) -> pd.Series: :return: Series of strings of the PDFs' contents """ _pdf_text = [] - for i, file_path in pdf_path.items(): + for _i, file_path in pdf_path.items(): # creating a pdf reader object reader = PdfReader(file_path) text = "" diff --git a/examples/LLM_Workflows/retrieval_augmented_generation/backend/server.py b/examples/LLM_Workflows/retrieval_augmented_generation/backend/server.py index 54b90b593..5d1690fb9 100644 --- a/examples/LLM_Workflows/retrieval_augmented_generation/backend/server.py +++ b/examples/LLM_Workflows/retrieval_augmented_generation/backend/server.py @@ -59,7 +59,7 @@ class SummaryResponse(pydantic.BaseModel): @app.post("/store_arxiv", tags=["Ingestion"]) -async def store_arxiv(arxiv_ids: list[str] = fastapi.Form(...)) -> JSONResponse: +async def store_arxiv(arxiv_ids: list[str] = fastapi.Form(...)) -> JSONResponse: # noqa: B008 """Retrieve PDF files of arxiv articles for arxiv_ids\n Read the PDF as text, create chunks, and embed them using OpenAI API\n Store chunks with embeddings in Weaviate. diff --git a/examples/LLM_Workflows/scraping_and_chunking/spark/doc_pipeline.py b/examples/LLM_Workflows/scraping_and_chunking/spark/doc_pipeline.py index b02f8cfdc..a71648795 100644 --- a/examples/LLM_Workflows/scraping_and_chunking/spark/doc_pipeline.py +++ b/examples/LLM_Workflows/scraping_and_chunking/spark/doc_pipeline.py @@ -27,8 +27,8 @@ def article_text(url: str, article_regex: str) -> str: """ try: html = requests.get(url) - except requests.exceptions.RequestException: - raise Exception(f"Failed to get URL: {url}") + except requests.exceptions.RequestException as e: + raise Exception(f"Failed to get URL: {url}") from e article = re.findall(article_regex, html.text, re.DOTALL) if not article: raise ValueError(f"No article found in {url}") diff --git a/examples/LLM_Workflows/scraping_and_chunking/spark/spark_pipeline.py b/examples/LLM_Workflows/scraping_and_chunking/spark/spark_pipeline.py index 9c1fe2212..fdb37a1c5 100644 --- a/examples/LLM_Workflows/scraping_and_chunking/spark/spark_pipeline.py +++ b/examples/LLM_Workflows/scraping_and_chunking/spark/spark_pipeline.py @@ -27,7 +27,9 @@ def sitemap_text(sitemap_url: str = "https://hamilton.dagworks.io/en/latest/site try: sitemap = requests.get(sitemap_url) except Exception as e: - raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}. Original error: {str(e)}") + raise RuntimeError( + f"Failed to fetch sitemap from {sitemap_url}. Original error: {str(e)}" + ) from e return sitemap.text diff --git a/examples/dagster/dagster_code/tutorial/assets.py b/examples/dagster/dagster_code/tutorial/assets.py index 7d38771c0..728a06c44 100644 --- a/examples/dagster/dagster_code/tutorial/assets.py +++ b/examples/dagster/dagster_code/tutorial/assets.py @@ -55,7 +55,7 @@ def most_frequent_words() -> MaterializeResult: for raw_title in topstories["title"]: title = raw_title.lower() for word in title.split(): - cleaned_word = word.strip(".,-!?:;()[]'\"-") + cleaned_word = word.strip(".,-!?:;()[]'\"-") # noqa if cleaned_word not in stopwords and len(cleaned_word) > 0: word_counts[cleaned_word] = word_counts.get(cleaned_word, 0) + 1 diff --git a/examples/dagster/dagster_code/tutorial/resources/__init__.py b/examples/dagster/dagster_code/tutorial/resources/__init__.py index 9cab9cb72..22c6e3591 100644 --- a/examples/dagster/dagster_code/tutorial/resources/__init__.py +++ b/examples/dagster/dagster_code/tutorial/resources/__init__.py @@ -93,7 +93,7 @@ def get_signups_for_date(self, date: datetime) -> Sequence[Signup]: signups = [] num_signups = self.random.randint(25, 100) - for i in range(num_signups): + for _ in range(num_signups): signup = self.generate_signup(date) signups.append(signup.to_dict()) diff --git a/examples/dagster/hamilton_code/dataflow.py b/examples/dagster/hamilton_code/dataflow.py index 29c7e45d0..3b4ec060e 100644 --- a/examples/dagster/hamilton_code/dataflow.py +++ b/examples/dagster/hamilton_code/dataflow.py @@ -31,7 +31,7 @@ def most_frequent_words(title: pd.Series) -> dict[str, int]: word_counts = {} for raw_title in title: for word in raw_title.lower().split(): - word = word.strip(".,-!?:;()[]'\"-") + word = word.strip(".,-!?:;()[]'\"-") # noqa if len(word) == 0: continue diff --git a/examples/dagster/hamilton_code/mock_api.py b/examples/dagster/hamilton_code/mock_api.py index c8a9a8fb4..7562d0dab 100644 --- a/examples/dagster/hamilton_code/mock_api.py +++ b/examples/dagster/hamilton_code/mock_api.py @@ -94,7 +94,7 @@ def get_signups_for_date(self, date: datetime) -> Sequence[Signup]: signups = [] num_signups = self.random.randint(25, 100) - for i in range(num_signups): + for _ in range(num_signups): signup = self.generate_signup(date) signups.append(signup.to_dict()) diff --git a/examples/decoupling_io/adapters.py b/examples/decoupling_io/adapters.py index 8e861356a..ccb6154d6 100644 --- a/examples/decoupling_io/adapters.py +++ b/examples/decoupling_io/adapters.py @@ -7,8 +7,8 @@ import sklearn.inspection import sklearn.metrics import sklearn.model_selection -except ImportError: - raise NotImplementedError("scikit-learn is not installed.") +except ImportError as e: + raise NotImplementedError("scikit-learn is not installed.") from e from hamilton import registry diff --git a/examples/dlt/slack/__init__.py b/examples/dlt/slack/__init__.py index e4fb90f70..3146ab5ae 100644 --- a/examples/dlt/slack/__init__.py +++ b/examples/dlt/slack/__init__.py @@ -168,12 +168,7 @@ def get_thread_replies(messages: List[Dict[str, Any]]) -> Iterable[TDataItem]: write_disposition=write_disposition, ) def messages_resource( - created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental( - "ts", - initial_value=start_dt, - end_value=end_dt, - allow_external_schedulers=True, - ), + created_at: dlt.sources.incremental[DateTime] = None, ) -> Iterable[TDataItem]: """ Yield all messages for a set of selected channels as a DLT resource. Keep blocks column without normalization. @@ -184,6 +179,13 @@ def messages_resource( Yields: Iterable[TDataItem]: A list of messages. """ + if created_at is None: + created_at = dlt.sources.incremental( + "ts", + initial_value=start_dt, + end_value=end_dt, + allow_external_schedulers=True, + ) start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True) end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True) for channel_data in fetched_selected_channels: @@ -191,12 +193,7 @@ def messages_resource( def per_table_messages_resource( channel_data: Dict[str, Any], - created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental( - "ts", - initial_value=start_dt, - end_value=end_dt, - allow_external_schedulers=True, - ), + created_at: dlt.sources.incremental[DateTime] = None, ) -> Iterable[TDataItem]: """Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization. @@ -207,6 +204,13 @@ def per_table_messages_resource( Yields: Iterable[TDataItem]: A list of messages. """ + if created_at is None: + created_at = dlt.sources.incremental( + "ts", + initial_value=start_dt, + end_value=end_dt, + allow_external_schedulers=True, + ) start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True) end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True) yield from get_messages(channel_data, start_date_ts, end_date_ts) diff --git a/examples/due_date_probabilities/probability_estimation.py b/examples/due_date_probabilities/probability_estimation.py index 5a53f2ab3..5c673e5ed 100644 --- a/examples/due_date_probabilities/probability_estimation.py +++ b/examples/due_date_probabilities/probability_estimation.py @@ -125,10 +125,9 @@ def raw_probabilities(raw_data: str) -> pd.DataFrame: def resampled(raw_probabilities: pd.DataFrame) -> List[int]: sample_data = [] - for index, row in raw_probabilities.iterrows(): + for _idx, row in raw_probabilities.iterrows(): count = row.probability * 1000 - for i in range(int(count)): - sample_data.append(row.days) + sample_data.extend([row.days] * int(count)) return sample_data diff --git a/examples/people_data_labs/analysis.py b/examples/people_data_labs/analysis.py index 8e2b6163e..ef527d643 100644 --- a/examples/people_data_labs/analysis.py +++ b/examples/people_data_labs/analysis.py @@ -115,7 +115,7 @@ def stock_growth_rate_since_last_funding_round( df = pd.merge(left=stock_data, right=period_start, on="ticker", how="inner") stock_growth = dict() - for idx, row in df.iterrows(): + for _, row in df.iterrows(): history = pd.json_normalize(row["historical_price"]).astype({"date": "datetime64[ns]"}) # skip ticker if history is empty diff --git a/examples/prefect/run.py b/examples/prefect/run.py index 9de3b09a2..b1a98ab5e 100644 --- a/examples/prefect/run.py +++ b/examples/prefect/run.py @@ -72,7 +72,7 @@ def train_and_evaluate_model_task( ) def absenteeism_prediction_flow( raw_data_location: str = "./data/Absenteeism_at_work.csv", - feature_set: list[str] = [ + feature_set: list[str] = [ # noqa: B006 "age_zero_mean_unit_variance", "has_children", "has_pet", @@ -80,7 +80,7 @@ def absenteeism_prediction_flow( "service_time", ], label: str = "absenteeism_time_in_hours", - validation_user_ids: list[str] = [ + validation_user_ids: list[str] = [ # noqa: B006 "1", "2", "4", diff --git a/examples/spark/world_of_warcraft/zone_features__spark_v1.py b/examples/spark/world_of_warcraft/zone_features__spark_v1.py index 59c2437bf..93ea23886 100644 --- a/examples/spark/world_of_warcraft/zone_features__spark_v1.py +++ b/examples/spark/world_of_warcraft/zone_features__spark_v1.py @@ -12,10 +12,9 @@ def world_of_warcraft(spark_session: ps.SparkSession) -> ps.DataFrame: def zone_flags(world_of_warcraft: ps.DataFrame) -> ps.DataFrame: zone_flags = world_of_warcraft - for zone in ["durotar", "darkshore"]: - zone_flags = zone_flags.withColumn( - "darkshore_flag", sf.when(sf.col("zone") == " Darkshore", 1).otherwise(0) - ).withColumn("durotar_flag", sf.when(sf.col("zone") == " Durotar", 1).otherwise(0)) + zone_flags = zone_flags.withColumn( + "darkshore_flag", sf.when(sf.col("zone") == " Darkshore", 1).otherwise(0) + ).withColumn("durotar_flag", sf.when(sf.col("zone") == " Durotar", 1).otherwise(0)) return zone_flags diff --git a/hamilton/cli/__main__.py b/hamilton/cli/__main__.py index 85e143a38..ff28d8c30 100644 --- a/hamilton/cli/__main__.py +++ b/hamilton/cli/__main__.py @@ -127,7 +127,7 @@ def _try_command(cmd: Callable, **cmd_kwargs) -> Any: command=cmd_name, success=False, message={"error": str(type(e)), "details": str(e)} ) logger.error(dataclasses.asdict(response)) - raise typer.Exit(code=1) + raise typer.Exit(code=1) from e return result @@ -297,12 +297,12 @@ def ui( """Runs the Hamilton UI on sqllite in port 8241""" try: from hamilton_ui import commands - except ImportError: + except ImportError as e: logger.error( "hamilton[ui] not installed -- you have to install this to run the UI. " 'Run `pip install "sf-hamilton[ui]"` to install and get started with the UI!' ) - raise typer.Exit(code=1) + raise typer.Exit(code=1) from e ctx.invoke( commands.run, diff --git a/hamilton/cli/logic.py b/hamilton/cli/logic.py index 999d89858..2cd6da66f 100644 --- a/hamilton/cli/logic.py +++ b/hamilton/cli/logic.py @@ -27,8 +27,8 @@ def get_git_base_directory() -> str: else: print("Error:", result.stderr.strip()) raise OSError(f"{result.stderr.strip()}") - except FileNotFoundError: - raise FileNotFoundError("Git command not found. Please make sure Git is installed.") + except FileNotFoundError as e: + raise FileNotFoundError("Git command not found. Please make sure Git is installed.") from e def get_git_reference(git_relative_path: Union[str, Path], git_reference: str) -> str: @@ -51,8 +51,8 @@ def get_git_reference(git_relative_path: Union[str, Path], git_reference: str) - return else: return - except FileNotFoundError: - raise FileNotFoundError("Git command not found. Please make sure Git is installed.") + except FileNotFoundError as e: + raise FileNotFoundError("Git command not found. Please make sure Git is installed.") from e def version_hamilton_functions(module: ModuleType) -> Dict[str, str]: @@ -184,7 +184,7 @@ def diff_versions(current_map: Dict[str, str], reference_map: Dict[str, str]) -> if v1 != v2: edit.append(node_name) - for node_name, v2 in reference_map.items(): + for node_name, _ in reference_map.items(): v1 = current_map.get(node_name) if v1 is None: reference_only.append(node_name) diff --git a/hamilton/dataflows/__init__.py b/hamilton/dataflows/__init__.py index b13163b9f..4ee85685e 100644 --- a/hamilton/dataflows/__init__.py +++ b/hamilton/dataflows/__init__.py @@ -498,10 +498,10 @@ def are_py_dependencies_satisfied(dataflow, user=None, version="latest"): else: package_name = line required_version = None - required_version # here for now... + required_version # noqa here for now... try: installed_version = pkg_version(package_name) - installed_version # here for now.. + installed_version # noqa here for now.. except PackageNotFoundError: logger.info(f"Package '{package_name}' is not installed.") return False diff --git a/hamilton/execution/executors.py b/hamilton/execution/executors.py index b2dab90a2..7bfb500d7 100644 --- a/hamilton/execution/executors.py +++ b/hamilton/execution/executors.py @@ -99,7 +99,7 @@ def base_execute_task(task: TaskImplementation) -> Dict[str, Any]: for node_ in task.nodes: if not getattr(node_, "callable_modified", False): node_._callable = _modify_callable(node_.node_role, node_.callable) - setattr(node_, "callable_modified", True) + node_.callable_modified = True if task.adapter.does_hook("pre_task_execute", is_async=False): task.adapter.call_all_lifecycle_hooks_sync( "pre_task_execute", diff --git a/hamilton/execution/state.py b/hamilton/execution/state.py index c69002c61..f12182611 100644 --- a/hamilton/execution/state.py +++ b/hamilton/execution/state.py @@ -307,7 +307,7 @@ def realize_parameterized_group( for dependency in new_task.base_dependencies: new_dependencies[dependency] = [] if dependency in task_names_in_group: - for group_name, name_map in name_maps.items(): + for _group_name, name_map in name_maps.items(): new_dependencies[dependency].append(name_map[dependency]) else: new_dependencies[dependency].append(dependency) @@ -403,10 +403,10 @@ def update_task_state( tasks_to_enqueue = [] # not efficient, TODO -- use a reverse dependency map - for key, task in self.task_pool.items(): + for _key, task in self.task_pool.items(): if self.task_states[task.task_id] == TaskState.INITIALIZED: should_launch = True - for base_dep_name, realized_dep_list in task.realized_dependencies.items(): + for _base_dep_name, realized_dep_list in task.realized_dependencies.items(): for dep in realized_dep_list: if self.task_states[dep] != TaskState.SUCCESSFUL: should_launch = False diff --git a/hamilton/experimental/decorators/parameterize_frame.py b/hamilton/experimental/decorators/parameterize_frame.py index fb995e2b6..313a229d2 100644 --- a/hamilton/experimental/decorators/parameterize_frame.py +++ b/hamilton/experimental/decorators/parameterize_frame.py @@ -159,7 +159,7 @@ def my_func_parameterized_extract( print("running my_func_parameterized_extract") return pd.concat([input1 * input2 * input3, input1 + input2 + input3], axis=1) - setattr(my_func_parameterized_extract, "decorated", "false") + my_func_parameterized_extract.decorated = "false" # Test by running the @parameterized_extract decorator from hamilton.ad_hoc_utils import create_temporary_module diff --git a/hamilton/function_modifiers/adapters.py b/hamilton/function_modifiers/adapters.py index 76f55a5a7..82845dc9b 100644 --- a/hamilton/function_modifiers/adapters.py +++ b/hamilton/function_modifiers/adapters.py @@ -190,7 +190,7 @@ def load_data( __load_type: Type[Type] = load_type, __resolved_kwargs=resolved_kwargs, __dependencies=dependencies_inverted, - __optional_params=loader_cls.get_optional_arguments(), + __optional_params=loader_cls.get_optional_arguments(), # noqa: B008 **input_kwargs: Any, ) -> Tuple[load_type, Dict[str, Any]]: input_args_with_fixed_dependencies = { diff --git a/hamilton/function_modifiers/expanders.py b/hamilton/function_modifiers/expanders.py index a375e206e..1b5e726d3 100644 --- a/hamilton/function_modifiers/expanders.py +++ b/hamilton/function_modifiers/expanders.py @@ -113,8 +113,8 @@ def __init__( for key, value in parametrization.items() } bad_values = [] - for assigned_output, mapping in self.parameterization.items(): - for parameter, val in mapping.items(): + for _assigned_output, mapping in self.parameterization.items(): + for _parameter, val in mapping.items(): if not isinstance(val, ParametrizedDependency): bad_values.append(val) if bad_values: @@ -177,7 +177,7 @@ def replacement_function( literal_dependencies=literal_dependencies, grouped_list_dependencies=grouped_list_dependencies, grouped_dict_dependencies=grouped_dict_dependencies, - former_inputs=list(node_.input_types.keys()), + former_inputs=list(node_.input_types.keys()), # noqa **kwargs, ): """This function rewrites what is passed in kwargs to the right kwarg for the function. @@ -284,7 +284,7 @@ def validate(self, fn: Callable): signature = inspect.signature(fn) func_param_names = set(signature.parameters.keys()) try: - for output_name, mappings in self.parameterization.items(): + for output_name, _mappings in self.parameterization.items(): # TODO -- separate out into the two dependency-types if output_name == self.PLACEHOLDER_PARAM_NAME: output_name = fn.__name__ @@ -310,7 +310,7 @@ def validate(self, fn: Callable): f"Parametrization is invalid: the following parameters don't appear in the function itself: {', '.join(missing_parameters)}" ) type_hints = typing.get_type_hints(fn) - for output_name, mapping in self.parameterization.items(): + for _output_name, mapping in self.parameterization.items(): # TODO -- look a the origin type and determine that its a sequence # We can just use the GroupedListDependency to do this invalid_types = [] @@ -596,12 +596,12 @@ def validate_return_type(fn: Callable): output_type = typing.get_type_hints(fn).get("return") try: registry.get_column_type_from_df_type(output_type) - except NotImplementedError: + except NotImplementedError as e: raise base.InvalidDecoratorException( # TODO: capture was dataframe libraries are supported and print here. f"Error {fn} does not output a type we know about. Is it a dataframe type we " f"support? " - ) + ) from e def validate(self, fn: Callable): """A function is invalid if it does not output a dataframe. diff --git a/hamilton/function_modifiers/recursive.py b/hamilton/function_modifiers/recursive.py index 965cf0ac3..e46401008 100644 --- a/hamilton/function_modifiers/recursive.py +++ b/hamilton/function_modifiers/recursive.py @@ -320,12 +320,12 @@ def add_namespace( for node_ in nodes: new_name = assign_namespace(node_.name, namespace) new_name_map[node_.name] = new_name - for dep, value in inputs.items(): + for dep, _value in inputs.items(): # We create nodes for both namespace assignment and source assignment # Why? Cause we need unique parameter names, and with source() some can share params new_name_map[dep] = assign_namespace(dep, namespace) - for dep, value in config.items(): + for dep, _value in config.items(): new_name_map[dep] = assign_namespace(dep, namespace) # Reassign sources @@ -343,9 +343,9 @@ def add_namespace( # around as sources can potentially serve multiple destinations (with the source()) decorator def fn( _callabl=node_.callable, - _kwarg_mapping=dict(kwarg_mapping), + _kwarg_mapping=dict(kwarg_mapping), # noqa: B006 _new_name=new_name, - _new_name_map=dict(new_name_map), + _new_name_map=dict(new_name_map), # noqa: B006 **kwargs, ): new_kwargs = {_kwarg_mapping[kwarg]: value for kwarg, value in kwargs.items()} @@ -353,9 +353,9 @@ def fn( async def async_fn( _callabl=node_.callable, - _kwarg_mapping=dict(kwarg_mapping), + _kwarg_mapping=dict(kwarg_mapping), # noqa: B006 _new_name=new_name, - _new_name_map=dict(new_name_map), + _new_name_map=dict(new_name_map), # noqa: B006 **kwargs, ): new_kwargs = {_kwarg_mapping[kwarg]: value for kwarg, value in kwargs.items()} @@ -438,7 +438,7 @@ def generate_nodes(self, fn: Callable, configuration: Dict[str, Any]) -> Collect def _validate_parameterization(self): invalid_values = [] - for key, value in self.inputs.items(): + for _key, value in self.inputs.items(): if not isinstance(value, dependencies.ParametrizedDependency): invalid_values.append(value) if invalid_values: diff --git a/hamilton/graph.py b/hamilton/graph.py index 1adb53d81..c4a18b655 100644 --- a/hamilton/graph.py +++ b/hamilton/graph.py @@ -165,7 +165,7 @@ def create_function_graph( functions = sum([find_functions(module) for module in modules], []) # create non-input nodes -- easier to just create this in one loop - for func_name, f in functions: + for _func_name, f in functions: for n in fm_base.resolve_nodes(f, config): if n.name in config: continue # This makes sure we overwrite things if they're in the config... diff --git a/hamilton/htypes.py b/hamilton/htypes.py index 7f515c2e4..195bbe077 100644 --- a/hamilton/htypes.py +++ b/hamilton/htypes.py @@ -210,7 +210,7 @@ def _is_valid_series_type(candidate_type: Type[Type]) -> bool: :param candidate_type: Type to check :return: Whether it is a series (column) type that we have registered """ - for key, types in DF_TYPE_AND_COLUMN_TYPES.items(): + for _key, types in DF_TYPE_AND_COLUMN_TYPES.items(): if COLUMN_TYPE not in types: continue if issubclass(candidate_type, types[COLUMN_TYPE]): diff --git a/hamilton/io/materialization.py b/hamilton/io/materialization.py index d99ff1e48..303689ce3 100644 --- a/hamilton/io/materialization.py +++ b/hamilton/io/materialization.py @@ -390,7 +390,7 @@ def with_modified_signature( # Combining old and new parameters # Checking for position of **kwargs and insert new params before - for idx, param in enumerate(original_parameters): + for idx, param in enumerate(original_parameters): # noqa if param.kind == inspect.Parameter.VAR_KEYWORD: break else: diff --git a/hamilton/lifecycle/default.py b/hamilton/lifecycle/default.py index 18c38370c..333bc4e41 100644 --- a/hamilton/lifecycle/default.py +++ b/hamilton/lifecycle/default.py @@ -718,7 +718,7 @@ def run_to_execute_node( can_inject = can_inject and self.allow_injection if not can_inject: - for key, value in node_kwargs.items(): + for _key, value in node_kwargs.items(): if type(self.sentinel_value) is type(value): if self.sentinel_value == value: # == versus is return default_return diff --git a/hamilton/plugins/dask_extensions.py b/hamilton/plugins/dask_extensions.py index fe979c81e..6bf9e664c 100644 --- a/hamilton/plugins/dask_extensions.py +++ b/hamilton/plugins/dask_extensions.py @@ -2,8 +2,8 @@ try: import dask.dataframe as dd -except ImportError: - raise NotImplementedError("Dask is not installed.") +except ImportError as e: + raise NotImplementedError("Dask is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/dlt_extensions.py b/hamilton/plugins/dlt_extensions.py index 858423aed..dabcdee57 100644 --- a/hamilton/plugins/dlt_extensions.py +++ b/hamilton/plugins/dlt_extensions.py @@ -11,7 +11,7 @@ except ImportError as e: # raise import error first - raise ImportError(f"Failed to import the DLT library. {e}") + raise ImportError(f"Failed to import the DLT library. {e}") from e except Exception as e: # raise import error with custom message raise ImportError("Failed to import the DLT library.") from e diff --git a/hamilton/plugins/geopandas_extensions.py b/hamilton/plugins/geopandas_extensions.py index de6ee631b..70e7e0135 100644 --- a/hamilton/plugins/geopandas_extensions.py +++ b/hamilton/plugins/geopandas_extensions.py @@ -2,8 +2,8 @@ try: import geopandas as gpd -except ImportError: - raise NotImplementedError("geopandas is not installed.") +except ImportError as e: + raise NotImplementedError("geopandas is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/h_experiments/__main__.py b/hamilton/plugins/h_experiments/__main__.py index ad60aaec0..6bc5bba97 100644 --- a/hamilton/plugins/h_experiments/__main__.py +++ b/hamilton/plugins/h_experiments/__main__.py @@ -11,10 +11,10 @@ def main(): import fastapi # noqa: F401 import fastui # noqa: F401 import uvicorn - except ModuleNotFoundError: + except ModuleNotFoundError as e: raise ModuleNotFoundError( "Some dependencies are missing. Make sure to `pip install sf-hamilton[experiments]`" - ) + ) from e if telemetry.is_telemetry_enabled(): telemetry.create_and_send_expt_server_event("startup") parser = argparse.ArgumentParser(prog="hamilton-experiments") diff --git a/hamilton/plugins/h_schema.py b/hamilton/plugins/h_schema.py index 49a4eafaf..6e936dd03 100644 --- a/hamilton/plugins/h_schema.py +++ b/hamilton/plugins/h_schema.py @@ -313,8 +313,8 @@ def _spark_to_arrow(type_): else: try: arrow_type = _from_pyspark_dtypes[type(type_)] - except KeyError: - raise NotImplementedError(f"Can't convert {type_} to pyarrow type.") + except KeyError as e: + raise NotImplementedError(f"Can't convert {type_} to pyarrow type.") from e return arrow_type diff --git a/hamilton/plugins/h_spark.py b/hamilton/plugins/h_spark.py index ea23e7b6d..07353c4cf 100644 --- a/hamilton/plugins/h_spark.py +++ b/hamilton/plugins/h_spark.py @@ -13,8 +13,8 @@ import pyspark.pandas as ps from pyspark.sql import Column, DataFrame, dataframe, types from pyspark.sql.functions import column, lit, pandas_udf, udf -except ImportError: - raise NotImplementedError("Pyspark is not installed.") +except ImportError as e: + raise NotImplementedError("Pyspark is not installed.") from e from hamilton import base, htypes, node from hamilton.execution import graph_functions @@ -222,7 +222,7 @@ def get_spark_type(return_type: Any) -> types.DataType: return python_to_spark_type(return_type) elif return_type in _list: return types.ArrayType(python_to_spark_type(return_type.__args__[0])) - elif hasattr(return_type, "__module__") and getattr(return_type, "__module__") == "numpy": + elif hasattr(return_type, "__module__") and return_type.__module__ == "numpy": return numpy_to_spark_type(return_type) else: raise ValueError( @@ -272,7 +272,7 @@ def _determine_parameters_to_bind( """ params_from_df = {} bind_parameters = {} - for input_name, (type_, dep_type) in node_input_types.items(): + for input_name, (type_, dep_type) in node_input_types.items(): # noqa if input_name in df_columns: params_from_df[input_name] = column(input_name) elif input_name in actual_kwargs and not isinstance(actual_kwargs[input_name], DataFrame): diff --git a/hamilton/plugins/h_vaex.py b/hamilton/plugins/h_vaex.py index f9d017220..bfcec84ec 100644 --- a/hamilton/plugins/h_vaex.py +++ b/hamilton/plugins/h_vaex.py @@ -7,8 +7,8 @@ try: import vaex -except ImportError: - raise NotImplementedError("Vaex is not installed.") +except ImportError as e: + raise NotImplementedError("Vaex is not installed.") from e class VaexDataFrameResult(base.ResultMixin): @@ -86,7 +86,7 @@ def build_result( raise NotImplementedError( "VaexDataFrameResult supports only one-dimensional Expression results" ) - for name, a in arrays.items(): + for _name, a in arrays.items(): if a.shape != first_expression_shape: raise NotImplementedError( "VaexDataFrameResult supports Expression results with same dimension only" diff --git a/hamilton/plugins/huggingface_extensions.py b/hamilton/plugins/huggingface_extensions.py index 5c5691403..a8133f50e 100644 --- a/hamilton/plugins/huggingface_extensions.py +++ b/hamilton/plugins/huggingface_extensions.py @@ -28,8 +28,8 @@ load_dataset, ) from datasets.formatting.formatting import LazyBatch -except ImportError: - raise NotImplementedError("huggingface datasets library is not installed.") +except ImportError as e: + raise NotImplementedError("huggingface datasets library is not installed.") from e try: import lancedb diff --git a/hamilton/plugins/ibis_extensions.py b/hamilton/plugins/ibis_extensions.py index 3f1c4f0f5..861312600 100644 --- a/hamilton/plugins/ibis_extensions.py +++ b/hamilton/plugins/ibis_extensions.py @@ -5,8 +5,8 @@ try: import ibis import ibis.expr.types as ir -except ImportError: - raise NotImplementedError("Ibis is not installed.") +except ImportError as e: + raise NotImplementedError("Ibis is not installed.") from e from hamilton.data_quality import base, default_validators diff --git a/hamilton/plugins/lightgbm_extensions.py b/hamilton/plugins/lightgbm_extensions.py index 2553e487e..6e85cc12c 100644 --- a/hamilton/plugins/lightgbm_extensions.py +++ b/hamilton/plugins/lightgbm_extensions.py @@ -4,8 +4,8 @@ try: import lightgbm -except ImportError: - raise NotImplementedError("LightGBM is not installed.") +except ImportError as e: + raise NotImplementedError("LightGBM is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/matplotlib_extensions.py b/hamilton/plugins/matplotlib_extensions.py index ab1a4000d..ae8e58fdd 100644 --- a/hamilton/plugins/matplotlib_extensions.py +++ b/hamilton/plugins/matplotlib_extensions.py @@ -6,8 +6,8 @@ from matplotlib.artist import Artist from matplotlib.figure import Figure from matplotlib.transforms import Bbox -except ImportError: - raise NotImplementedError("Matplotlib is not installed.") +except ImportError as e: + raise NotImplementedError("Matplotlib is not installed.") from e from hamilton import registry from hamilton.io import utils diff --git a/hamilton/plugins/mlflow_extensions.py b/hamilton/plugins/mlflow_extensions.py index 64553a862..0610fd75b 100644 --- a/hamilton/plugins/mlflow_extensions.py +++ b/hamilton/plugins/mlflow_extensions.py @@ -5,8 +5,8 @@ try: import mlflow -except ImportError: - raise NotImplementedError("MLFlow is not installed.") +except ImportError as e: + raise NotImplementedError("MLFlow is not installed.") from e from hamilton import registry from hamilton.io.data_adapters import DataLoader, DataSaver @@ -56,8 +56,8 @@ def save_data(self, data) -> Dict[str, Any]: # retrieve the `mlflow.FLAVOR` submodule to `.log_model()` try: flavor_module = getattr(mlflow, flavor) - except ImportError: - raise ImportError(f"Flavor {flavor} is unsupported by MLFlow") + except ImportError as e: + raise ImportError(f"Flavor {flavor} is unsupported by MLFlow") from e # handle `run_id` and active run conflicts if mlflow.active_run() and self.run_id: @@ -177,8 +177,8 @@ def load_data(self, type_: Type) -> Tuple[Any, Dict[str, Any]]: # retrieve the `mlflow.FLAVOR` submodule to `.log_model()` try: flavor_module = getattr(mlflow, flavor) - except ImportError: - raise ImportError(f"Flavor {flavor} is unsupported by MLFlow") + except ImportError as e: + raise ImportError(f"Flavor {flavor} is unsupported by MLFlow") from e model = flavor_module.load_model(model_uri=self.model_uri, **self.mlflow_kwargs) return model, metadata diff --git a/hamilton/plugins/numpy_extensions.py b/hamilton/plugins/numpy_extensions.py index 0b089c9f7..588916f12 100644 --- a/hamilton/plugins/numpy_extensions.py +++ b/hamilton/plugins/numpy_extensions.py @@ -4,8 +4,8 @@ try: import numpy as np -except ImportError: - raise NotImplementedError("Numpy is not installed.") +except ImportError as e: + raise NotImplementedError("Numpy is not installed.") from e from typing import Literal diff --git a/hamilton/plugins/pandas_extensions.py b/hamilton/plugins/pandas_extensions.py index 003b0fa9d..e212e5df8 100644 --- a/hamilton/plugins/pandas_extensions.py +++ b/hamilton/plugins/pandas_extensions.py @@ -9,8 +9,8 @@ try: import pandas as pd -except ImportError: - raise NotImplementedError("Pandas is not installed.") +except ImportError as e: + raise NotImplementedError("Pandas is not installed.") from e from typing import Literal diff --git a/hamilton/plugins/plotly_extensions.py b/hamilton/plugins/plotly_extensions.py index 39c45fa6a..625939d79 100644 --- a/hamilton/plugins/plotly_extensions.py +++ b/hamilton/plugins/plotly_extensions.py @@ -4,8 +4,8 @@ try: import plotly.graph_objects -except ImportError: - raise NotImplementedError("Plotly is not installed.") +except ImportError as e: + raise NotImplementedError("Plotly is not installed.") from e from hamilton import registry from hamilton.io import utils diff --git a/hamilton/plugins/polars_extensions.py b/hamilton/plugins/polars_extensions.py index 6c46eb664..7fe500c7d 100644 --- a/hamilton/plugins/polars_extensions.py +++ b/hamilton/plugins/polars_extensions.py @@ -14,8 +14,8 @@ try: import polars as pl -except ImportError: - raise NotImplementedError("Polars is not installed.") +except ImportError as e: + raise NotImplementedError("Polars is not installed.") from e pl_version = version.Version(pl.__version__) if pl_version < version.Version("1.0.0"): diff --git a/hamilton/plugins/polars_lazyframe_extensions.py b/hamilton/plugins/polars_lazyframe_extensions.py index ffc5347c0..6130dfce8 100644 --- a/hamilton/plugins/polars_lazyframe_extensions.py +++ b/hamilton/plugins/polars_lazyframe_extensions.py @@ -18,8 +18,8 @@ try: import polars as pl -except ImportError: - raise NotImplementedError("Polars is not installed.") +except ImportError as e: + raise NotImplementedError("Polars is not installed.") from e # for polars <0.16.0 we need to determine whether type_aliases exist. diff --git a/hamilton/plugins/polars_pre_1_0_0_extension.py b/hamilton/plugins/polars_pre_1_0_0_extension.py index 6d6d6c9b4..39b75c262 100644 --- a/hamilton/plugins/polars_pre_1_0_0_extension.py +++ b/hamilton/plugins/polars_pre_1_0_0_extension.py @@ -24,8 +24,8 @@ try: import polars as pl -except ImportError: - raise NotImplementedError("Polars is not installed.") +except ImportError as e: + raise NotImplementedError("Polars is not installed.") from e # for polars <0.16.0 we need to determine whether type_aliases exist. diff --git a/hamilton/plugins/pyspark_pandas_extensions.py b/hamilton/plugins/pyspark_pandas_extensions.py index 274289015..bb15bef1b 100644 --- a/hamilton/plugins/pyspark_pandas_extensions.py +++ b/hamilton/plugins/pyspark_pandas_extensions.py @@ -2,8 +2,8 @@ try: import pyspark.pandas as ps -except ImportError: - raise NotImplementedError("Pyspark is not installed.") +except ImportError as e: + raise NotImplementedError("Pyspark is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/sklearn_plot_extensions.py b/hamilton/plugins/sklearn_plot_extensions.py index 9ddcacebc..52cfe7dd5 100644 --- a/hamilton/plugins/sklearn_plot_extensions.py +++ b/hamilton/plugins/sklearn_plot_extensions.py @@ -7,8 +7,8 @@ import sklearn.metrics import sklearn.model_selection from matplotlib import pyplot -except ImportError: - raise NotImplementedError("scikit-learn is not installed.") +except ImportError as e: + raise NotImplementedError("scikit-learn is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/spark_extensions.py b/hamilton/plugins/spark_extensions.py index 268ca7e3e..6ab8cfd55 100644 --- a/hamilton/plugins/spark_extensions.py +++ b/hamilton/plugins/spark_extensions.py @@ -4,8 +4,8 @@ try: import pyspark.sql as ps -except ImportError: - raise NotImplementedError("Pyspark is not installed.") +except ImportError as e: + raise NotImplementedError("Pyspark is not installed.") from e from pandas import DataFrame from pyspark.sql import SparkSession diff --git a/hamilton/plugins/vaex_extensions.py b/hamilton/plugins/vaex_extensions.py index 2081ed4ef..208ff022f 100644 --- a/hamilton/plugins/vaex_extensions.py +++ b/hamilton/plugins/vaex_extensions.py @@ -6,8 +6,8 @@ try: import vaex -except ImportError: - raise NotImplementedError("Vaex is not installed.") +except ImportError as e: + raise NotImplementedError("Vaex is not installed.") from e DATAFRAME_TYPE = vaex.dataframe.DataFrame COLUMN_TYPE = vaex.expression.Expression diff --git a/hamilton/plugins/xgboost_extensions.py b/hamilton/plugins/xgboost_extensions.py index f20666465..230dfa868 100644 --- a/hamilton/plugins/xgboost_extensions.py +++ b/hamilton/plugins/xgboost_extensions.py @@ -4,8 +4,8 @@ try: import xgboost -except ImportError: - raise NotImplementedError("XGBoost is not installed.") +except ImportError as e: + raise NotImplementedError("XGBoost is not installed.") from e from hamilton import registry diff --git a/hamilton/plugins/yaml_extensions.py b/hamilton/plugins/yaml_extensions.py index aea49a00e..c09df64ca 100644 --- a/hamilton/plugins/yaml_extensions.py +++ b/hamilton/plugins/yaml_extensions.py @@ -1,7 +1,7 @@ try: import yaml -except ImportError: - raise NotImplementedError("yaml is not installed and is needed for yaml hamilton plugin") +except ImportError as e: + raise NotImplementedError("yaml is not installed and is needed for yaml hamilton plugin") from e import dataclasses import pathlib diff --git a/hamilton/registry.py b/hamilton/registry.py index 20260483e..a3d24aec7 100644 --- a/hamilton/registry.py +++ b/hamilton/registry.py @@ -196,7 +196,7 @@ def get_column_type_from_df_type(dataframe_type: Type) -> Type: :return: the column type. :raises: NotImplementedError if we don't know what the column type is. """ - for extension, type_map in DF_TYPE_AND_COLUMN_TYPES.items(): + for _extension, type_map in DF_TYPE_AND_COLUMN_TYPES.items(): if dataframe_type == type_map[DATAFRAME_TYPE]: return type_map[COLUMN_TYPE] raise NotImplementedError( diff --git a/hamilton/telemetry.py b/hamilton/telemetry.py index d497b4dcb..0a9523900 100644 --- a/hamilton/telemetry.py +++ b/hamilton/telemetry.py @@ -486,7 +486,7 @@ def get_result_builder_name(adapter: lifecycle_base.LifecycleAdapterSet) -> str: class_to_inspect = result_builders[0] # all_adapters = adapter.adapters if hasattr(class_to_inspect, "result_builder"): - class_to_inspect = getattr(class_to_inspect, "result_builder") + class_to_inspect = class_to_inspect.result_builder # Go by class itself if isinstance(class_to_inspect, base.StrictIndexTypePandasDataFrameResult): result_builder_name = "hamilton.base.StrictIndexTypePandasDataFrameResult" diff --git a/pyproject.toml b/pyproject.toml index bdc0d164e..5434a7ce5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,7 +183,7 @@ exclude = [ [tool.ruff.lint] extend-select = [ -# "B", # flake8-bugbear rules + "B", # flake8-bugbear rules # "C4", # Helps you write better list/set/dict comprehensions. "E", # pycodestyle errors "F", # pyflakes diff --git a/setup.py b/setup.py index b86fb306c..96a364194 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ with open("README.md") as readme_file: readme = readme_file.read() except Exception: - warnings.warn("README.md not found") + warnings.warn("README.md not found") # noqa readme = None diff --git a/tests/test_ad_hoc_utils.py b/tests/test_ad_hoc_utils.py index dd37c26cf..7f3a4ebd7 100644 --- a/tests/test_ad_hoc_utils.py +++ b/tests/test_ad_hoc_utils.py @@ -98,4 +98,4 @@ def _baz(bar: int) -> int: try: inspect.getsource(module.bar) except OSError as e: - assert False, f"module improperly added to linecache. {e}" + assert False, f"module improperly added to linecache. {e}" # noqa diff --git a/tests/test_node.py b/tests/test_node.py index c2da8cde1..6d6758f0c 100644 --- a/tests/test_node.py +++ b/tests/test_node.py @@ -112,7 +112,7 @@ class BrokenEquals: def __eq__(self, other): raise ValueError("I'm broken") - def foo(b: BrokenEquals = BrokenEquals()): + def foo(b: BrokenEquals = BrokenEquals()): # noqa pass param = DependencyType.from_parameter(inspect.signature(foo).parameters["b"]) diff --git a/ui/backend/server/tests/test_lifecycle/test_templates.py b/ui/backend/server/tests/test_lifecycle/test_templates.py index be9fdbb02..dbb41b0de 100644 --- a/ui/backend/server/tests/test_lifecycle/test_templates.py +++ b/ui/backend/server/tests/test_lifecycle/test_templates.py @@ -134,7 +134,7 @@ async def test_create_and_get_all_project_dag_templates(async_client: AsyncClien project_id, *_ = await _setup_sample_project(async_client, username) num_dag_templates = 4 dag_templates_created = [] - for i in range(num_dag_templates): + for _i in range(num_dag_templates): dag_template_to_generate = _generate_sample_dag_template( *_generate_some_sample_nodes(10, 5) ) diff --git a/ui/backend/server/trackingserver_projects/api.py b/ui/backend/server/trackingserver_projects/api.py index 50a5a2509..4eaaf4b73 100644 --- a/ui/backend/server/trackingserver_projects/api.py +++ b/ui/backend/server/trackingserver_projects/api.py @@ -125,7 +125,7 @@ async def get_project_by_id( try: project = await Project.objects.aget(id=project_id) except Project.DoesNotExist: - raise HttpError(404, f"Could not find project with ID: {project_id}") + raise HttpError(404, f"Could not find project with ID: {project_id}") from None role = await user_project_visibility(request, project=project) project_out = await ProjectOut.from_model(project, role) attributes = [ diff --git a/ui/backend/server/trackingserver_run_tracking/api.py b/ui/backend/server/trackingserver_run_tracking/api.py index f4f5de22e..dbebfacc7 100644 --- a/ui/backend/server/trackingserver_run_tracking/api.py +++ b/ui/backend/server/trackingserver_run_tracking/api.py @@ -105,7 +105,7 @@ async def get_latest_dag_runs( async def get_dag_runs( request, dag_run_ids: str, - attrs: List[str] = Query(default=None, alias="attr"), + attrs: List[str] = Query(default=None, alias="attr"), # noqa ) -> List[DAGRunOutWithData]: """Queries a DAG run with all the data. Note that you must pass an attribute filter, indicating @@ -169,7 +169,7 @@ async def update_dag_run(request, dag_run_id: int, dag_run: DAGRunUpdate) -> DAG try: dag_run_in_db = await DAGRun.objects.aget(id=dag_run_id) except DAGRun.DoesNotExist: - raise HttpError(404, f"DAG run with ID {dag_run_id} does not exist.") + raise HttpError(404, f"DAG run with ID {dag_run_id} does not exist.") from None for attr, value in dag_run.dict(exclude_unset=True).items(): if attr == "upsert_tags": upsert_tags = {} if dag_run.upsert_tags is None else dag_run.upsert_tags @@ -258,7 +258,7 @@ async def bulk_log( try: dag_run = await DAGRun.objects.aget(id=dag_run_id) except DAGRun.DoesNotExist: - raise HttpError(404, f"DAG run with ID {dag_run_id} does not exist.") + raise HttpError(404, f"DAG run with ID {dag_run_id} does not exist.") from None task_updates_to_save = process_task_updates(node_run_updates, dag_run_id=dag_run.id) # TODO -- determine if we can do this in one pass