microsoft · jameslamb · Feb 9, 2021 · Feb 9, 2021 · Feb 9, 2021 · Feb 9, 2021
@@ -19,7 +19,14 @@
 from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
                      SKLEARN_INSTALLED, LGBMNotFittedError,
                      DASK_INSTALLED, dask_DataFrame, dask_Array, dask_Series, delayed, Client, default_client, get_worker, wait)
-from .sklearn import LGBMClassifier, LGBMModel, LGBMRegressor, LGBMRanker
+from .sklearn import (
+    _lgbmmodel_doc_fit,
+    _lgbmmodel_doc_predict,
+    LGBMClassifier,
+    LGBMModel,
+    LGBMRegressor,
+    LGBMRanker
+)
 
 _DaskCollection = Union[dask_Array, dask_DataFrame, dask_Series]
 _DaskMatrixLike = Union[dask_Array, dask_DataFrame]
@@ -578,13 +585,17 @@ def __init__(
 
     _base_doc = LGBMClassifier.__init__.__doc__
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs')
-    __init__.__doc__ = (
+    _base_doc = (
         _before_kwargs
         + 'client : dask.distributed.Client or None, optional (default=None)\n'
         + ' ' * 12 + 'Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.\n'
         + ' ' * 8 + _kwargs + _after_kwargs
     )
 
+    # the note on custom objective functions in LGBMModel.__init__ is not
+    # currently relevant for the Dask estimators
+    __init__.__doc__ = _base_doc[:_base_doc.find('Note\n')]
+
     def __getstate__(self) -> Dict[Any, Any]:
         return self._lgb_getstate()
 
@@ -604,7 +615,23 @@ def fit(
             **kwargs
         )
 
-    fit.__doc__ = LGBMClassifier.fit.__doc__
+    _base_doc = _lgbmmodel_doc_fit.format(
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        y_shape="dask Array, dask DataFrame or dask Series of shape = [n_samples]",
+        sample_weight_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        group_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
+    )
+
+    # DaskLGBMClassifier does not support init_score, evaluation data, or early stopping
+    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
+                 + _base_doc[_base_doc.find('verbose :'):])
+
+    # DaskLGBMClassifier support for callbacks and init_model is not tested
+    fit.__doc__ = (
+        _base_doc[:_base_doc.find('callbacks :')]
+        + '**kwargs\n'
+        + ' ' * 12 + 'Other parameters passed through to ``LGBMClassifier.fit()``\n'
+    )
 
     def predict(self, X: _DaskMatrixLike, **kwargs: Any) -> dask_Array:
         """Docstring is inherited from the lightgbm.LGBMClassifier.predict."""
@@ -615,7 +642,14 @@ def predict(self, X: _DaskMatrixLike, **kwargs: Any) -> dask_Array:
             **kwargs
         )
 
-    predict.__doc__ = LGBMClassifier.predict.__doc__
+    predict.__doc__ = _lgbmmodel_doc_predict.format(
+        description="Return the predicted value for each sample.",
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        output_name="predicted_result",
+        predicted_result_shape="dask Array of shape = [n_samples] or shape = [n_samples, n_classes]",
+        X_leaves_shape="dask Array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
+        X_SHAP_values_shape="dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes]"
+    )
 
     def predict_proba(self, X: _DaskMatrixLike, **kwargs: Any) -> dask_Array:
         """Docstring is inherited from the lightgbm.LGBMClassifier.predict_proba."""
@@ -626,7 +660,14 @@ def predict_proba(self, X: _DaskMatrixLike, **kwargs: Any) -> dask_Array:
             **kwargs
         )
 
-    predict_proba.__doc__ = LGBMClassifier.predict_proba.__doc__
+    predict_proba.__doc__ = _lgbmmodel_doc_predict.format(
+        description="Return the predicted probability for each class for each sample.",
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        output_name="predicted_probability",
+        predicted_result_shape="dask Array of shape = [n_samples, n_classes]",
+        X_leaves_shape="dask Array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
+        X_SHAP_values_shape="dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes]"
+    )
 
     def to_local(self) -> LGBMClassifier:
         """Create regular version of lightgbm.LGBMClassifier from the distributed version.
@@ -695,13 +736,17 @@ def __init__(
 
     _base_doc = LGBMRegressor.__init__.__doc__
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs')
-    __init__.__doc__ = (
+    _base_doc = (
         _before_kwargs
         + 'client : dask.distributed.Client or None, optional (default=None)\n'
         + ' ' * 12 + 'Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.\n'
         + ' ' * 8 + _kwargs + _after_kwargs
     )
 
+    # the note on custom objective functions in LGBMModel.__init__ is not
+    # currently relevant for the Dask estimators
+    __init__.__doc__ = _base_doc[:_base_doc.find('Note\n')]
+
     def __getstate__(self) -> Dict[Any, Any]:
         return self._lgb_getstate()
 
@@ -721,7 +766,23 @@ def fit(
             **kwargs
         )
 
-    fit.__doc__ = LGBMRegressor.fit.__doc__
+    _base_doc = _lgbmmodel_doc_fit.format(
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        y_shape="dask Array, dask DataFrame or dask Series of shape = [n_samples]",
+        sample_weight_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        group_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
+    )
+
+    # DaskLGBMRegressor does not support init_score, evaluation data, or early stopping
+    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
+                 + _base_doc[_base_doc.find('verbose :'):])
+
+    # DaskLGBMRegressor support for callbacks and init_model is not tested
+    fit.__doc__ = (
+        _base_doc[:_base_doc.find('callbacks :')]
+        + '**kwargs\n'
+        + ' ' * 12 + 'Other parameters passed through to ``LGBMRegressor.fit()``\n'
+    )
 
     def predict(self, X: _DaskMatrixLike, **kwargs) -> dask_Array:
         """Docstring is inherited from the lightgbm.LGBMRegressor.predict."""
@@ -731,7 +792,14 @@ def predict(self, X: _DaskMatrixLike, **kwargs) -> dask_Array:
             **kwargs
         )
 
-    predict.__doc__ = LGBMRegressor.predict.__doc__
+    predict.__doc__ = _lgbmmodel_doc_predict.format(
+        description="Return the predicted value for each sample.",
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        output_name="predicted_result",
+        predicted_result_shape="dask Array of shape = [n_samples]",
+        X_leaves_shape="dask Array of shape = [n_samples, n_trees]",
+        X_SHAP_values_shape="dask Array of shape = [n_samples, n_features + 1]"
+    )
 
     def to_local(self) -> LGBMRegressor:
         """Create regular version of lightgbm.LGBMRegressor from the distributed version.
@@ -800,13 +868,17 @@ def __init__(
 
     _base_doc = LGBMRanker.__init__.__doc__
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs')
-    __init__.__doc__ = (
+    _base_doc = (
         _before_kwargs
         + 'client : dask.distributed.Client or None, optional (default=None)\n'
         + ' ' * 12 + 'Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.\n'
         + ' ' * 8 + _kwargs + _after_kwargs
     )
 
+    # the note on custom objective functions in LGBMModel.__init__ is not
+    # currently relevant for the Dask estimators
+    __init__.__doc__ = _base_doc[:_base_doc.find('Note\n')]
+
     def __getstate__(self) -> Dict[Any, Any]:
         return self._lgb_getstate()
 
@@ -832,13 +904,39 @@ def fit(
             **kwargs
         )
 
-    fit.__doc__ = LGBMRanker.fit.__doc__
+    _base_doc = _lgbmmodel_doc_fit.format(
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        y_shape="dask Array, dask DataFrame or dask Series of shape = [n_samples]",
+        sample_weight_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        group_shape="dask Array, dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
+    )
+
+    # DaskLGBMRanker does not support init_score, evaluation data, or early stopping
+    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
+                 + _base_doc[_base_doc.find('init_score :'):])
+
+    _base_doc = (_base_doc[:_base_doc.find('eval_set :')]
+                 + _base_doc[_base_doc.find('verbose :'):])
+
+    # DaskLGBMRanker support for callbacks and init_model is not tested
+    fit.__doc__ = (
+        _base_doc[:_base_doc.find('callbacks :')]
+        + '**kwargs\n'
+        + ' ' * 12 + 'Other parameters passed through to ``LGBMRanker.fit()``\n'
+    )
 
     def predict(self, X: _DaskMatrixLike, **kwargs: Any) -> dask_Array:
         """Docstring is inherited from the lightgbm.LGBMRanker.predict."""
         return _predict(self.to_local(), X, **kwargs)
 
-    predict.__doc__ = LGBMRanker.predict.__doc__
+    predict.__doc__ = _lgbmmodel_doc_predict.format(
+        description="Return the predicted value for each sample.",
+        X_shape="dask Array or dask DataFrame of shape = [n_samples, n_features]",
+        output_name="predicted_result",
+        predicted_result_shape="dask Array of shape = [n_samples]",
+        X_leaves_shape="dask Array of shape = [n_samples, n_trees]",
+        X_SHAP_values_shape="dask Array of shape = [n_samples, n_features + 1]"
+    )
 
     def to_local(self) -> LGBMRanker:
         """Create regular version of lightgbm.LGBMRanker from the distributed version.