Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Ruff instead of Black+isort+Flake8 #1089

Merged
merged 10 commits into from
Aug 9, 2024
35 changes: 10 additions & 25 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,16 @@
# $ pre-commit install
exclude: '^docs/code-comparisons/' # skip the code comparisons directory
repos:
- repo: https://github.com/ambv/black
rev: 24.8.0
hooks:
- id: black
args: [--line-length=100, --exclude=docs/*]
#- repo: https://github.com/astral-sh/ruff-pre-commit
# # Ruff version.
# rev: v0.5.6
# hooks:
# # Run the linter.
# - id: ruff
# args: [ --fix ]
# # Run the formatter.
# - id: ruff-format
# args: [ --diff ]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.7
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
# args: [ --diff ] # Use for previewing changes
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
Expand All @@ -30,16 +25,6 @@ repos:
- id: requirements-txt-fixer
# valid python file
- id: check-ast
# isort python package import sorting
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black",
"--line-length=100",
"--skip=docs/",
"--known-local-folder", "tests",
"-p", "hamilton"]
- repo: https://github.com/pycqa/flake8
rev: 7.1.1
hooks:
Expand Down
2 changes: 1 addition & 1 deletion README-DOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Instructions for managing documentation on read the docs.
To build locally, you need to run the following -- make sure you're in the root of the repo:

```bash
pip install -r requirements-docs.txt
pip install .[docs]
```
and then one of the following to build and view the documents:
```bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def collect_chunked_url_text(url_result: Collect[dict]) -> list:
if __name__ == "__main__":
# code here for quickly testing the build of the code here.
import __main__ as sphinx_doc_chunking

from hamilton import driver
from hamilton.execution import executors

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from hamilton import contrib

with contrib.catch_import_errors(__name__, __file__, logger):
import litellm
import tiktoken
from pypdf import PdfReader
from tenacity import retry, stop_after_attempt, wait_random_exponential
from tqdm import tqdm
import litellm

from hamilton.function_modifiers import config

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ def p_value(t_value: np.ndarray, dof: int) -> float:

if __name__ == "__main__":
import __main__ as analysis_flow

from hamilton import base, driver

# let's create a dictionary result -- since we want to get a few things from execution for inspection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@
import plotly.express as px # for plots
import plotly.graph_objs as go # for plot object type
import requests
import torch # for matrix optimization
from sklearn.model_selection import (
train_test_split,
) # for splitting train & test data
import torch # for matrix optimization
from tenacity import retry, stop_after_attempt, wait_random_exponential

from hamilton.function_modifiers import (
Expand Down Expand Up @@ -134,7 +134,8 @@ def snli_dataset(download_path: str = "data") -> pd.DataFrame:
@config.when(source="snli")
@check_output(schema=processed_dataset_schema, importance="fail")
def processed_local_dataset__snli(
snli_dataset: pd.DataFrame, num_pairs_to_embed: int = 1000 # 1000 is arbitrary
snli_dataset: pd.DataFrame,
num_pairs_to_embed: int = 1000, # 1000 is arbitrary
) -> pd.DataFrame:
"""Processes a raw dataset into a dataframe of text pairs to embed; and check that it matches the schema.

Expand Down
8 changes: 4 additions & 4 deletions contrib/hamilton/contrib/user/skrawcz/fine_tuning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@

with contrib.catch_import_errors(__name__, __file__, logger):
import evaluate
from datasets.combine import DatasetType
from datasets import Dataset, concatenate_datasets
import numpy as np
import pandas as pd
import torch
from datasets import Dataset, concatenate_datasets
from datasets.combine import DatasetType
from peft import (
LoraConfig,
PeftConfig,
Expand All @@ -22,15 +23,14 @@
prepare_model_for_int8_training,
)
from sklearn.model_selection import train_test_split
import torch
from tqdm import tqdm
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
DataCollatorForSeq2Seq,
PreTrainedTokenizerBase,
Seq2SeqTrainer,
Seq2SeqTrainingArguments,
PreTrainedTokenizerBase,
)

from hamilton.function_modifiers import extract_fields
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from hamilton import contrib

with contrib.catch_import_errors(__name__, __file__, logger):
import pyarrow as pa
import lancedb
import numpy as np
import pandas as pd
import pyarrow as pa
from lancedb.pydantic import LanceModel

from hamilton.function_modifiers import tag
Expand Down
2 changes: 1 addition & 1 deletion contrib/hamilton/contrib/user/zilto/webscraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from hamilton import contrib

with contrib.catch_import_errors(__name__, __file__, logger):
from bs4 import BeautifulSoup
import lxml # noqa: F401
import requests
from bs4 import BeautifulSoup
from tenacity import retry, stop_after_attempt, wait_random_exponential

import dataclasses
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

with contrib.catch_import_errors(__name__, __file__, logger):
import numpy as np
import optuna
import pandas as pd
import xgboost
import optuna
from optuna.distributions import IntDistribution, FloatDistribution
from optuna.distributions import FloatDistribution, IntDistribution
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import KFold, StratifiedKFold

Expand Down
2 changes: 1 addition & 1 deletion dev_tools/language_server/hamilton_lsp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import re
from typing import Type

from hamilton_lsp import __version__
from lsprotocol.types import (
TEXT_DOCUMENT_COMPLETION,
TEXT_DOCUMENT_DID_CHANGE,
Expand All @@ -28,6 +27,7 @@
from hamilton import ad_hoc_utils
from hamilton.graph import FunctionGraph, create_graphviz_graph
from hamilton.graph_types import HamiltonGraph
from hamilton_lsp import __version__


def _type_to_string(type_: Type):
Expand Down
1 change: 1 addition & 0 deletions dev_tools/language_server/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# limitations under the License. #
############################################################################
import pytest

from hamilton_lsp.server import HamiltonLanguageServer, register_server_features

from .ls_setup import NativeClientServer
Expand Down
5 changes: 1 addition & 4 deletions developer_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ Install the project's dependencies in your preferred method for managing python
python -m venv ./venv
. ./venv/bin/activate

pip install \
-r ./requirements.txt \
-r ./requirements-dev.txt \
-r ./requirements-test.txt
pip install .[dev,test]
```

Set up `pre-commit`, which will run some lightweight formatting and linting tasks on every commit.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def store(

if __name__ == "__main__":
import __main__ as doc_pipeline

from hamilton import driver

# create the driver
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def joke_response__anthropic(llm_client: anthropic.Anthropic, joke_prompt: str)

if __name__ == "__main__":
import hamilton_invoke_anthropic

from hamilton import driver

dr = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ async def joke_response(llm_client: openai.AsyncOpenAI, joke_messages: List[dict
import asyncio

import hamilton_async

from hamilton import base
from hamilton.experimental import h_async

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def joke_responses(joke_response: Collect[str]) -> List[str]:

if __name__ == "__main__":
import hamilton_batch

from hamilton import driver

dr = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def joke_response__chat(llm_client: openai.OpenAI, joke_messages: List[dict]) ->

if __name__ == "__main__":
import hamilton_completion

from hamilton import driver

dr = (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import hamilton_anthropic

from hamilton import driver

anthropic_driver = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def joke_response(llm_client: openai.OpenAI, joke_messages: List[dict]) -> str:

if __name__ == "__main__":
import hamilton_invoke

from hamilton import driver

dr = driver.Builder().with_modules(hamilton_invoke).build()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# run.py
import hamilton_anthropic

from hamilton import driver, lifecycle

dr = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def joke_response(llm_client: openai.OpenAI, joke_messages: List[dict]) -> Itera

if __name__ == "__main__":
import hamilton_streaming

from hamilton import driver

dr = driver.Builder().with_modules(hamilton_streaming).build()
Expand Down
1 change: 0 additions & 1 deletion examples/LLM_Workflows/modular_llm_stack/qdrant_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def initialize_vector_db_indices(
class_name: str,
embedding_dimension: int,
) -> bool:

if client_vector_db.collection_exists(class_name):
client_vector_db.delete_collection(class_name)

Expand Down
1 change: 0 additions & 1 deletion examples/LLM_Workflows/scraping_and_chunking/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from hamilton.execution import executors

if __name__ == "__main__":

dr = (
driver.Builder()
.with_modules(doc_pipeline)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def chunked_url_text(urls_from_sitemap: ps.DataFrame) -> ps.DataFrame:


if __name__ == "__main__":

import spark_pipeline

from hamilton import driver
Expand Down
2 changes: 1 addition & 1 deletion examples/airflow/dags/hamilton/hamilton_how_to_dag.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" This file shows different usage pattern to integrate Hamilton with Apache Airflow
"""This file shows different usage pattern to integrate Hamilton with Apache Airflow

For the purpose of this example, we will read and write data from the Airflow
installation location (${AIRFLOW_HOME}/plugins/data).
Expand Down
2 changes: 1 addition & 1 deletion examples/async/fastapi_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import async_module
import fastapi
from aiohttp import client_exceptions
from hamilton_sdk import adapters

from hamilton import async_driver
from hamilton_sdk import adapters

logger = logging.getLogger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions examples/aws/glue/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@

# awsglue is installed in the AWS Glue worker environment
from awsglue.utils import getResolvedOptions
from hamilton_functions import functions

from hamilton import driver
from hamilton_functions import functions

if __name__ == "__main__":

args = getResolvedOptions(sys.argv, ["input-table", "output-table"])

df = pd.read_csv(args["input_table"])
Expand Down
1 change: 0 additions & 1 deletion examples/aws/lambda/app/lambda_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


def lambda_handler(event, context):

df = pd.DataFrame(**event["body"])

dr = driver.Driver({}, functions)
Expand Down
1 change: 0 additions & 1 deletion examples/aws/sagemaker/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from hamilton import driver

if __name__ == "__main__":

df = pd.read_csv("/opt/ml/processing/input/data/input_table.csv")

dr = driver.Driver({}, functions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ class DataGeneratorResource(ConfigurableResource):
from dagster import Definitions, asset
from dagster_data_generator import DataGeneratorResource, DataGeneratorConfig


@asset
def my_table(data_gen: DataGeneratorConfig):
return data_gen.get_signups()
Expand Down
1 change: 1 addition & 0 deletions examples/dagster/hamilton_code/mock_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class DataGeneratorResource(ConfigurableResource):
from dagster import Definitions, asset
from dagster_data_generator import DataGeneratorResource, DataGeneratorConfig


@asset
def my_table(data_gen: DataGeneratorConfig):
return data_gen.get_signups()
Expand Down
6 changes: 4 additions & 2 deletions examples/dbt/python_transforms/feature_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
import pandas as pd

# from sklearn.preprocessing import OneHotEncoder
from sklearn import impute # import KNNImputer
from sklearn import preprocessing
from sklearn import (
impute, # import KNNImputer
preprocessing,
)

from hamilton.function_modifiers import check_output, config

Expand Down
4 changes: 1 addition & 3 deletions examples/due_date_probabilities/probability_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ def raw_data() -> str:
43 weeks, 3 days 99.8% 0.1% 0.1%
43 weeks, 4 days 99.9% 0.1% < 0.1%
43 weeks, 5 days 99.9% < 0.1% < 0.1%
43 weeks, 6 days > 99.9% < 0.1%""".replace(
"weeks\t", "weeks, 0 days "
)
43 weeks, 6 days > 99.9% < 0.1%""".replace("weeks\t", "weeks, 0 days ")
.replace("\t", " ")
.split("\n")
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""""
""" "
This is a simple example of a FastAPI server that uses Hamilton on the request
path to transform the data into features, and then uses a fake model to make
a prediction.
Expand Down
Loading
Loading