Quantco · FrancescMartiEscofetQC · Jul 22, 2024 · Jul 19, 2024 · Jul 22, 2024 · kklein
@@ -7,6 +7,12 @@
 Changelog
 =========
 
+0.8.0 (2024-07-xx)
+------------------
+
+* Implement :meth:`metalearners.cross_fit_estimator.CrossFitEstimator.score`.
+
+
 0.7.0 (2024-07-12)
 ------------------
 

@@ -6,7 +6,8 @@
 from functools import partial
 
 import numpy as np
-from sklearn.base import is_classifier
+from sklearn.base import is_classifier, is_regressor
+from sklearn.metrics import accuracy_score, r2_score
 from sklearn.model_selection import (
     KFold,
     StratifiedKFold,
@@ -337,8 +338,28 @@
             oos_method=oos_method,
         )
 
-    def score(self, X, y, sample_weight=None, **kwargs):
-        raise NotImplementedError()
+    def score(
+        self,
+        X: Matrix,
+        y: Vector,
+        is_oos: bool,
+        oos_method: OosMethod | None = None,
+        sample_weight: Vector | None = None,
+    ) -> float:
+        """Return the coefficient of determination of the prediction if the estimator is
+        a regressor or the mean accuracy if it is a classifier."""
+        if is_classifier(self):
+            return accuracy_score(
+                y, self.predict(X, is_oos, oos_method), sample_weight=sample_weight
+            )
+        elif is_regressor(self):
+            return r2_score(
+                y, self.predict(X, is_oos, oos_method), sample_weight=sample_weight
+            )
+        else:
+            raise NotImplementedError(
+                "score is not implemented for this type of estimator."
+            )
 
     def set_params(self, **params):
         raise NotImplementedError()

@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 from lightgbm import LGBMClassifier, LGBMRegressor
+from sklearn.base import is_classifier, is_regressor
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.metrics import accuracy_score, log_loss
 from sklearn.model_selection import KFold
@@ -262,3 +263,20 @@ def test_validate_data_match(n_observations, test_indices, success):
             ValueError, match="rely on different numbers of observations"
         ):
             _validate_data_match_prior_split(n_observations, test_indices)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [LGBMClassifier, LGBMRegressor],
+)
+def test_score_smoke(estimator, rng):
+    n_samples = 1000
+    X = rng.standard_normal((n_samples, 3))
+    if is_classifier(estimator):
+        y = rng.integers(0, 4, n_samples)
+    elif is_regressor(estimator):
+        y = rng.standard_normal(n_samples)
+
+    cfe = CrossFitEstimator(5, estimator, {"n_estimators": 3})
+    cfe.fit(X, y)
+    cfe.score(X, y, False)