From 2b04f97ccaef7ff5c9c228623db1cd7d2d676d48 Mon Sep 17 00:00:00 2001 From: Siddharth Chaini <40721514+sidchaini@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:43:07 -0400 Subject: [PATCH] lint: Fixed flake8 fail --- distclassipy/classifier.py | 4 +- distclassipy/robustness.py | 106 ++++++++++++++++++------------------- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/distclassipy/classifier.py b/distclassipy/classifier.py index 12a0b40..2aeb7ae 100644 --- a/distclassipy/classifier.py +++ b/distclassipy/classifier.py @@ -28,10 +28,10 @@ import scipy from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.multiclass import unique_labels -from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split +from sklearn.utils.multiclass import unique_labels +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from .distances import Distance, _ALL_METRICS diff --git a/distclassipy/robustness.py b/distclassipy/robustness.py index 37dd20b..34261c8 100644 --- a/distclassipy/robustness.py +++ b/distclassipy/robustness.py @@ -1,70 +1,70 @@ -from typing import Callable, Tuple +# from typing import Callable, Tuple -import numpy as np +# import numpy as np -import pandas as pd +# import pandas as pd -import scipy +# import scipy -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.multiclass import unique_labels -from sklearn.utils.validation import check_X_y, check_array, check_is_fitted -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score +# from sklearn.base import BaseEstimator, ClassifierMixin +# from sklearn.utils.multiclass import unique_labels +# from sklearn.utils.validation import check_X_y, check_array, check_is_fitted +# from sklearn.model_selection import train_test_split +# from sklearn.metrics import accuracy_score -from .distances import Distance, _ALL_METRICS +# from .distances import Distance, _ALL_METRICS -def assemble_best_classifier( - clf: BaseEstimator, - X: np.ndarray, - y: np.ndarray, - feat_idx: int, - n_quantiles: int = 4, - metrics_to_consider: list = None, -) -> tuple: - X = check_array(X) - feature_labels = [f"Feature_{i}" for i in range(X.shape[1])] - feature_name = f"Feature_{feat_idx}" +# def assemble_best_classifier( +# clf: BaseEstimator, +# X: np.ndarray, +# y: np.ndarray, +# feat_idx: int, +# n_quantiles: int = 4, +# metrics_to_consider: list = None, +# ) -> tuple: +# X = check_array(X) +# feature_labels = [f"Feature_{i}" for i in range(X.shape[1])] +# feature_name = f"Feature_{feat_idx}" - if metrics_to_consider is None: - metrics_to_consider = _ALL_METRICS +# if metrics_to_consider is None: +# metrics_to_consider = _ALL_METRICS - X_df = pd.DataFrame(X, columns=feature_labels) - y_df = pd.DataFrame(y, columns=["Target"]) - quantiles = pd.qcut(X_df[feature_name], q=n_quantiles) +# X_df = pd.DataFrame(X, columns=feature_labels) +# y_df = pd.DataFrame(y, columns=["Target"]) +# quantiles = pd.qcut(X_df[feature_name], q=n_quantiles) - X_train, X_test, y_train, y_test = train_test_split( - X_df, y_df, test_size=0.33, stratify=quantiles - ) +# X_train, X_test, y_train, y_test = train_test_split( +# X_df, y_df, test_size=0.33, stratify=quantiles +# ) - clf.fit(X_train, y_train.to_numpy().ravel()) - grouped_test_data = X_test.groupby(quantiles, observed=False) +# clf.fit(X_train, y_train.to_numpy().ravel()) +# grouped_test_data = X_test.groupby(quantiles, observed=False) - quantile_scores = [] - for metric in metrics_to_consider: - scores_for_metric = [ - accuracy_score( - y_test.loc[subdf.index], clf.predict(subdf.to_numpy(), metric=metric) - ) - for _, subdf in grouped_test_data - ] - quantile_scores.append(scores_for_metric) +# quantile_scores = [] +# for metric in metrics_to_consider: +# scores_for_metric = [ +# accuracy_score( +# y_test.loc[subdf.index], clf.predict(subdf.to_numpy(), metric=metric) +# ) +# for _, subdf in grouped_test_data +# ] +# quantile_scores.append(scores_for_metric) - quantile_scores = np.array(quantile_scores) * 100 - quantile_scores_df = pd.DataFrame( - data=quantile_scores, - index=metrics_to_consider, - columns=[f"Quantile {i+1}" for i in range(n_quantiles)], - ) +# quantile_scores = np.array(quantile_scores) * 100 +# quantile_scores_df = pd.DataFrame( +# data=quantile_scores, +# index=metrics_to_consider, +# columns=[f"Quantile {i+1}" for i in range(n_quantiles)], +# ) - best_metrics_per_quantile = quantile_scores_df.idxmax() +# best_metrics_per_quantile = quantile_scores_df.idxmax() - # todo for pred during best: - # loop through each metric, merge quantiles for each metric - # pred on this +# # todo for pred during best: +# # loop through each metric, merge quantiles for each metric +# # pred on this - # alt, but slower: - # loop through each quantile, and append pred +# # alt, but slower: +# # loop through each quantile, and append pred - return quantile_scores_df, best_metrics_per_quantile +# return quantile_scores_df, best_metrics_per_quantile