diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index ee1daf7f6510..0d6511a520a0 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -105,12 +105,17 @@ def _train_part(
     else:
         group = None
 
+    if 'init_score' in list_of_parts[0]:
+        init_score = _concat([x['init_score'] for x in list_of_parts])
+    else:
+        init_score = None
+
     try:
         model = model_factory(**params)
         if is_ranker:
-            model.fit(data, label, sample_weight=weight, group=group, **kwargs)
+            model.fit(data, label, sample_weight=weight, init_score=init_score, group=group, **kwargs)
         else:
-            model.fit(data, label, sample_weight=weight, **kwargs)
+            model.fit(data, label, sample_weight=weight, init_score=init_score, **kwargs)
 
     finally:
         _safe_call(_LIB.LGBM_NetworkFree())
@@ -168,6 +173,7 @@ def _train(
     params: Dict[str, Any],
     model_factory: Type[LGBMModel],
     sample_weight: Optional[_DaskCollection] = None,
+    init_score: Optional[_DaskCollection] = None,
     group: Optional[_DaskCollection] = None,
     **kwargs: Any
 ) -> LGBMModel:
@@ -187,6 +193,8 @@ def _train(
         Class of the local underlying model.
     sample_weight : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
         Weights of training data.
+    init_score : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
+        Init score of training data.
     group : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
         Group/query data.
         Only used in the learning-to-rank task.
@@ -289,6 +297,11 @@ def _train(
         for i in range(n_parts):
             parts[i]['group'] = group_parts[i]
 
+    if init_score is not None:
+        init_score_parts = _split_to_parts(data=init_score, is_matrix=False)
+        for i in range(n_parts):
+            parts[i]['init_score'] = init_score_parts[i]
+
     # Start computation in the background
     parts = list(map(delayed, parts))
     parts = client.compute(parts)
@@ -540,6 +553,7 @@ def _lgb_dask_fit(
         X: _DaskMatrixLike,
         y: _DaskCollection,
         sample_weight: Optional[_DaskCollection] = None,
+        init_score: Optional[_DaskCollection] = None,
         group: Optional[_DaskCollection] = None,
         **kwargs: Any
     ) -> "_DaskLGBMModel":
@@ -556,6 +570,7 @@ def _lgb_dask_fit(
             params=params,
             model_factory=model_factory,
             sample_weight=sample_weight,
+            init_score=init_score,
             group=group,
             **kwargs
         )
@@ -657,6 +672,7 @@ def fit(
         X: _DaskMatrixLike,
         y: _DaskCollection,
         sample_weight: Optional[_DaskCollection] = None,
+        init_score: Optional[_DaskCollection] = None,
         **kwargs: Any
     ) -> "DaskLGBMClassifier":
         """Docstring is inherited from the lightgbm.LGBMClassifier.fit."""
@@ -665,6 +681,7 @@ def fit(
             X=X,
             y=y,
             sample_weight=sample_weight,
+            init_score=init_score,
             **kwargs
         )
 
@@ -672,11 +689,12 @@ def fit(
         X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
         y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
         sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
         group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
     )
 
-    # DaskLGBMClassifier does not support init_score, evaluation data, or early stopping
-    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
+    # DaskLGBMClassifier does not support evaluation data, or early stopping
+    _base_doc = (_base_doc[:_base_doc.find('group :')]
                  + _base_doc[_base_doc.find('verbose :'):])
 
     # DaskLGBMClassifier support for callbacks and init_model is not tested
@@ -808,6 +826,7 @@ def fit(
         X: _DaskMatrixLike,
         y: _DaskCollection,
         sample_weight: Optional[_DaskCollection] = None,
+        init_score: Optional[_DaskCollection] = None,
         **kwargs: Any
     ) -> "DaskLGBMRegressor":
         """Docstring is inherited from the lightgbm.LGBMRegressor.fit."""
@@ -816,6 +835,7 @@ def fit(
             X=X,
             y=y,
             sample_weight=sample_weight,
+            init_score=init_score,
             **kwargs
         )
 
@@ -823,11 +843,12 @@ def fit(
         X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
         y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
         sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
         group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
     )
 
-    # DaskLGBMRegressor does not support init_score, evaluation data, or early stopping
-    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
+    # DaskLGBMRegressor does not support evaluation data, or early stopping
+    _base_doc = (_base_doc[:_base_doc.find('group :')]
                  + _base_doc[_base_doc.find('verbose :'):])
 
     # DaskLGBMRegressor support for callbacks and init_model is not tested
@@ -945,14 +966,12 @@ def fit(
         **kwargs: Any
     ) -> "DaskLGBMRanker":
         """Docstring is inherited from the lightgbm.LGBMRanker.fit."""
-        if init_score is not None:
-            raise RuntimeError('init_score is not currently supported in lightgbm.dask')
-
         return self._lgb_dask_fit(
             model_factory=LGBMRanker,
             X=X,
             y=y,
             sample_weight=sample_weight,
+            init_score=init_score,
             group=group,
             **kwargs
         )
@@ -961,13 +980,11 @@ def fit(
         X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
         y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
         sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
+        init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
         group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
     )
 
-    # DaskLGBMRanker does not support init_score, evaluation data, or early stopping
-    _base_doc = (_base_doc[:_base_doc.find('init_score :')]
-                 + _base_doc[_base_doc.find('init_score :'):])
-
+    # DaskLGBMRanker does not support evaluation data, or early stopping
     _base_doc = (_base_doc[:_base_doc.find('eval_set :')]
                  + _base_doc[_base_doc.find('verbose :'):])
 
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d6b882114c6a..3c6fbe772863 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -189,7 +189,7 @@ def __call__(self, preds, dataset):
         The target values (class labels in classification, real numbers in regression).
     sample_weight : {sample_weight_shape}
         Weights of training data.
-    init_score : array-like of shape = [n_samples] or None, optional (default=None)
+    init_score : {init_score_shape}
         Init score of training data.
     group : {group_shape}
         Group/query data.
@@ -706,6 +706,7 @@ def _get_meta_data(collection, name, i):
         X_shape="array-like or sparse matrix of shape = [n_samples, n_features]",
         y_shape="array-like of shape = [n_samples]",
         sample_weight_shape="array-like of shape = [n_samples] or None, optional (default=None)",
+        init_score_shape="array-like of shape = [n_samples] or None, optional (default=None)",
         group_shape="array-like or None, optional (default=None)"
     ) + "\n\n" + _lgbmmodel_doc_custom_eval_note
 
diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 5f7784190e4b..4599b080ab38 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -3,6 +3,7 @@
 
 import inspect
 import pickle
+import random
 import socket
 from itertools import groupby
 from os import getenv
@@ -1228,6 +1229,50 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(
     client.close(timeout=CLIENT_CLOSE_TIMEOUT)
 
 
+@pytest.mark.parametrize('task', tasks)
+@pytest.mark.parametrize('output', data_output)
+def test_init_score(
+        task,
+        output,
+        client):
+    if task == 'ranking' and output == 'scipy_csr_matrix':
+        pytest.skip('LGBMRanker is not currently tested on sparse matrices')
+
+    if task == 'ranking':
+        _, _, _, _, dX, dy, dw, dg = _create_ranking_data(
+            output=output,
+            group=None
+        )
+        model_factory = lgb.DaskLGBMRanker
+    else:
+        _, _, _, dX, dy, dw = _create_data(
+            objective=task,
+            output=output,
+        )
+        dg = None
+        if task == 'classification':
+            model_factory = lgb.DaskLGBMClassifier
+        elif task == 'regression':
+            model_factory = lgb.DaskLGBMRegressor
+
+    params = {
+        'n_estimators': 1,
+        'num_leaves': 2,
+        'time_out': 5
+    }
+    init_score = random.random()
+    if output.startswith('dataframe'):
+        init_scores = dy.map_partitions(lambda x: pd.Series([init_score] * x.size))
+    else:
+        init_scores = da.full_like(dy, fill_value=init_score, dtype=np.float64)
+    model = model_factory(client=client, **params)
+    model.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
+    # value of the root node is 0 when init_score is set
+    assert model.booster_.trees_to_dataframe()['value'][0] == 0
+
+    client.close(timeout=CLIENT_CLOSE_TIMEOUT)
+
+
 def sklearn_checks_to_run():
     check_names = [
         "check_estimator_get_tags_default_keys",