Skip to content

Commit

Permalink
Fixing so py docs examples are skipped from formatting
Browse files Browse the repository at this point in the history
This will exclude the code snippets from being formatted
because we need them to be skinny.

This also upgrades pre-commit hooks and fixes any
issues arizing from the new updates.
  • Loading branch information
skrawcz committed Feb 1, 2024
1 parent 175a726 commit 3ea0068
Show file tree
Hide file tree
Showing 59 changed files with 326 additions and 121 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# Then install the hooks within the repo:
# $ cd /PATH/TO/REPO
# $ pre-commit install

exclude: '^docs/code-comparisons/' # skip the code comparisons directory
repos:
- repo: https://github.com/ambv/black
rev: 23.11.0
rev: 24.1.1
hooks:
- id: black
args: [--line-length=100, --exclude=docs/*]
Expand All @@ -22,15 +22,15 @@ repos:
- id: check-ast
# isort python package import sorting
- repo: https://github.com/pycqa/isort
rev: '5.12.0'
rev: '5.13.2'
hooks:
- id: isort
args: ["--profile", "black",
"--line-length=100",
"--extend-skip=docs/*/*/*.py",
"--skip=docs/",
"--known-local-folder",
"tests", "-p", "hamilton"]
- repo: https://github.com/pycqa/flake8
rev: 6.1.0
rev: 7.0.0
hooks:
- id: flake8
1 change: 1 addition & 0 deletions contrib/docs/compile_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
dataflow python files and information we have.
6. We then will trigger a build of the docs; the docs can serve the latest commit version!
"""

import json
import os
import shutil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
SOFTWARE.
----------------------------------------------------------------------------------------------
"""

import logging
import os
import pickle # for saving the embeddings cache
Expand All @@ -42,7 +43,9 @@
import plotly.express as px # for plots
import plotly.graph_objs as go # for plot object type
import requests
from sklearn.model_selection import train_test_split # for splitting train & test data
from sklearn.model_selection import (
train_test_split,
) # for splitting train & test data
import torch # for matrix optimization
from tenacity import retry, stop_after_attempt, wait_random_exponential

Expand Down Expand Up @@ -243,8 +246,14 @@ def test_df_negatives(base_test_df: pd.DataFrame) -> pd.DataFrame:


@parameterize(
train_df={"base_df": source("base_train_df"), "df_negatives": source("train_df_negatives")},
test_df={"base_df": source("base_test_df"), "df_negatives": source("test_df_negatives")},
train_df={
"base_df": source("base_train_df"),
"df_negatives": source("train_df_negatives"),
},
test_df={
"base_df": source("base_test_df"),
"df_negatives": source("test_df_negatives"),
},
)
def construct_df(
base_df: pd.DataFrame,
Expand Down Expand Up @@ -631,7 +640,9 @@ def mse_loss(predictions, targets):
@inject(
optimization_result_matrices=group(*[source(k) for k in optimization_parameterization.keys()])
)
def optimization_results(optimization_result_matrices: List[pd.DataFrame]) -> pd.DataFrame:
def optimization_results(
optimization_result_matrices: List[pd.DataFrame],
) -> pd.DataFrame:
"""Combine optimization results into one dataframe."""
return pd.concat(optimization_result_matrices)

Expand Down Expand Up @@ -685,7 +696,9 @@ def customized_embeddings_dataframe(
return embedded_data_set


def customized_dataset_histogram(customized_embeddings_dataframe: pd.DataFrame) -> go.Figure:
def customized_dataset_histogram(
customized_embeddings_dataframe: pd.DataFrame,
) -> go.Figure:
"""Plot histogram of cosine similarities for the new customized embeddings.
The graphs show how much the overlap there is between the distribution of cosine similarities for similar and
Expand Down
28 changes: 16 additions & 12 deletions docs/data_adapters_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,22 @@ def from_loader(loader: Type[hamilton.io.data_adapters.DataLoader]) -> "AdapterI
key=loader.name(),
class_name=loader.__name__,
class_path=loader.__module__,
load_params=[
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
for p in dataclasses.fields(loader)
]
if issubclass(loader, hamilton.io.data_adapters.DataLoader)
else None,
save_params=[
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
for p in dataclasses.fields(loader)
]
if issubclass(loader, hamilton.io.data_adapters.DataSaver)
else None,
load_params=(
[
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
for p in dataclasses.fields(loader)
]
if issubclass(loader, hamilton.io.data_adapters.DataLoader)
else None
),
save_params=(
[
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
for p in dataclasses.fields(loader)
]
if issubclass(loader, hamilton.io.data_adapters.DataSaver)
else None
),
applicable_types=[get_class_repr(t) for t in loader.applicable_types()],
file_=inspect.getfile(loader),
line_nos=get_lines_for_class(loader),
Expand Down
1 change: 1 addition & 0 deletions examples/LLM_Workflows/knowledge_retrieval/functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to house functions for an LLM agent to use."""

import logging

import arxiv_articles
Expand Down
1 change: 1 addition & 0 deletions examples/LLM_Workflows/knowledge_retrieval/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Module that contains code to house state for an agent. The dialog
right now is hardcoded at the bottom of this file.
"""

import json
import logging
import sys
Expand Down
1 change: 1 addition & 0 deletions examples/airflow/plugins/function_modules/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
"""

from typing import List

import pandas as pd
Expand Down
1 change: 1 addition & 0 deletions examples/airflow/plugins/function_modules/feature_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
integration - see `examples/data_quality/pandera` for an example.
"""

import numpy as np
import pandas as pd

Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/pandera/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
"""

from typing import List

import pandas as pd
Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/pandera/feature_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
(4) If you require dataframe validation - see the examples here.
"""

import numpy as np
import pandas as pd
import pandera as pa
Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/pandera/feature_logic_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
2. The data type checks on the output of functions are different. E.g. float vs np.float64. Execution on spark
results in different data types.
"""

import numpy as np
import pandas as pd
import pandera as pa
Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/pandera/run_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
To run:
> python run_ray.py
"""

import logging
import sys

Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/simple/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
"""

from typing import List

import pandas as pd
Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/simple/feature_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
integration - see `examples/data_quality/pandera` for an example.
"""

import numpy as np
import pandas as pd

Expand Down
1 change: 1 addition & 0 deletions examples/data_quality/simple/run_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
To run:
> python run_ray.py
"""

import logging
import sys

Expand Down
1 change: 1 addition & 0 deletions examples/dbt/python_transforms/data_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module contains our data loading functions.
"""

from typing import List

import pandas as pd
Expand Down
1 change: 1 addition & 0 deletions examples/dbt/python_transforms/feature_transforms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This is a module that contains our feature transforms.
"""

import pickle
from typing import Set

Expand Down
5 changes: 4 additions & 1 deletion examples/dbt/python_transforms/model_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This is a module that contains our "model fitting and related" transforms.
"""

import pickle
from typing import Dict

Expand Down Expand Up @@ -43,7 +44,9 @@ def train_test_split(

@config.when(model_to_use="create_new")
def fit_model__create_new(
model_classifier: base.ClassifierMixin, train_set: pd.DataFrame, target_column_name: str
model_classifier: base.ClassifierMixin,
train_set: pd.DataFrame,
target_column_name: str,
) -> base.ClassifierMixin:
"""Fits a new model.
Expand Down
1 change: 1 addition & 0 deletions examples/decoupling_io/components/feature_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This is a module that contains our feature transforms.
"""

from typing import Dict, List, Set

import pandas as pd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Here we ONLY use Hamilton to create the features for your training set, with comment stubs for the rest of the ETL
that would normally be here.
"""

import features
import named_model_feature_sets
import offline_loader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Note (2): we can tag the `aggregation` features with whatever key value pair makes sense
for us to discern/identify that we should not compute these features in an online setting.
"""

import pandas as pd
import pandera as pa

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
for input to create features easily with Hamilton. Between these two options you should be able to find a solution
that works for you. If not, come ask us in slack.
"""

import features
import named_model_feature_sets
import offline_loader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
This means they need to be satisfied by either being passed in, or having another module define them.
We do the latter for this example, but having online_loader define them.
"""

import pandas as pd
import pandera as pa

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
This will print out predictions as they are computed.
"""

import datetime
import logging
import pathlib
Expand Down Expand Up @@ -46,7 +47,8 @@ def hamilton_predict(payload: dict):
for int_key in ["client_id", "budget", "age"]:
payload[int_key] = int(float(payload[int_key]))
series_out = dr.execute(
["predictions"], inputs={"survey_event": payload, "execution_time": datetime.datetime.now()}
["predictions"],
inputs={"survey_event": payload, "execution_time": datetime.datetime.now()},
)["predictions"]
return {"prediction": series_out.values[0], "client_id": payload["client_id"]}

Expand Down
1 change: 1 addition & 0 deletions examples/lineage/lineage_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
It mirrors the code that was presented for the Lineage + Hamilton in 10 minutes blog post.
"""

import pprint

import data_loading
Expand Down
6 changes: 5 additions & 1 deletion examples/numpy/air-quality-analysis/analysis_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* In real life, data is generally not normally distributed. There are tests for such non-normal data like the
Wilcoxon test.
"""

import typing
from functools import partial

Expand Down Expand Up @@ -199,7 +200,10 @@ def after_lock(


def before_lock(
aqi_array: np.ndarray, datetime_index: np.ndarray, after_lock: np.ndarray, before_lock_date: str
aqi_array: np.ndarray,
datetime_index: np.ndarray,
after_lock: np.ndarray,
before_lock_date: str,
) -> np.ndarray:
"""Grab period before lock down."""
return aqi_array[np.where(datetime_index <= np.datetime64(before_lock_date))][
Expand Down
1 change: 1 addition & 0 deletions examples/spark/pyspark_udfs/pandas_udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
5. You can have non-pandas_udf functions in the same file, and will be run as row based UDFs.
"""

import pandas as pd

from hamilton.htypes import column
Expand Down
1 change: 1 addition & 0 deletions hamilton/ad_hoc_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A suite of tools for ad-hoc use"""

import sys
import types
import uuid
Expand Down
1 change: 1 addition & 0 deletions hamilton/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
It should only import hamilton.node, numpy, pandas.
It cannot import hamilton.graph, or hamilton.driver.
"""

import abc
import collections
import logging
Expand Down
1 change: 1 addition & 0 deletions hamilton/contrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
It will get clobbered when sf-hamilton-contrib is installed, which is good.
"""

import logging
from contextlib import contextmanager

Expand Down
Loading

0 comments on commit 3ea0068

Please sign in to comment.