Skip to content

Commit

Permalink
Pass pandas DataFrame to estimator when input is a pandas DataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
ClaudioSalvatoreArcidiacono committed Jan 15, 2024
1 parent d6ee3f2 commit 3ef9b40
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 10 deletions.
30 changes: 23 additions & 7 deletions felimination/rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,16 @@ def __init__(
importance_getter=importance_getter,
)

@staticmethod
def _select_X_with_remaining_features(X, support, n_features):
features = np.arange(n_features)[support]
if isinstance(X, pd.DataFrame):
feature_names = X.columns[support]
X_remaining_features = X[feature_names]
else:
X_remaining_features = X[:, features]
return X_remaining_features, features

def fit(self, X, y, groups=None, **fit_params):
"""Fit the RFE model and then the underlying estimator on the selected features.
Expand All @@ -354,7 +364,7 @@ def fit(self, X, y, groups=None, **fit_params):
"""
self._validate_params()
tags = self._get_tags()
X, y = self._validate_data(
self._validate_data(
X,
y,
accept_sparse="csc",
Expand Down Expand Up @@ -385,8 +395,9 @@ def fit(self, X, y, groups=None, **fit_params):
# Elimination
while current_number_of_features > n_features_to_select:
# Select remaining features
features = np.arange(n_features)[support_]
X_remaining_features = X[:, features]
X_remaining_features, features = self._select_X_with_remaining_features(
X, support=support_, n_features=n_features
)

if self.verbose > 0:
print(
Expand Down Expand Up @@ -456,8 +467,10 @@ def fit(self, X, y, groups=None, **fit_params):
# Set final attributes

# Estimate performances of final model
features = np.arange(n_features)[support_]
X_remaining_features = X[:, features]
X_remaining_features, features = self._select_X_with_remaining_features(
X, support=support_, n_features=n_features
)

cv_scores = cross_validate(
self.estimator,
X_remaining_features,
Expand All @@ -482,9 +495,12 @@ def fit(self, X, y, groups=None, **fit_params):
np.std(scores_per_fold)
)

features = np.arange(n_features)[support_]
X_remaining_features, features = self._select_X_with_remaining_features(
X, support=support_, n_features=n_features
)

self.estimator_ = clone(self.estimator)
self.estimator_.fit(X[:, features], y, **fit_params)
self.estimator_.fit(X_remaining_features, y, **fit_params)

self.n_features_ = support_.sum()
self.support_ = support_
Expand Down
16 changes: 13 additions & 3 deletions tests/test_rfe.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import numpy as np
import pandas as pd
import pytest
from matplotlib.testing.compare import compare_images
from sklearn.compose import ColumnTransformer
from sklearn.datasets import make_classification, make_friedman1
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import ShuffleSplit
from sklearn.exceptions import NotFittedError
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from felimination.rfe import PermutationImportanceRFECV


Expand Down Expand Up @@ -133,8 +136,15 @@ def test_perm_imp_rfecv_classification_base_case_pandas(
random_state,
):
X_with_rand, y = x_y_classification_with_rand_columns_pandas
ct = ColumnTransformer(
[
("scaler", StandardScaler(), ["x1"]),
],
remainder="passthrough",
)
estimator = Pipeline([("ct", ct), ("lr", LogisticRegression(random_state=42))])
selector = PermutationImportanceRFECV(
LogisticRegression(random_state=random_state),
estimator,
cv=cv,
n_features_to_select=n_useful_features_classification,
)
Expand Down

0 comments on commit 3ef9b40

Please sign in to comment.