From 51b363ec3f2e78b8da1d10a48872c9a6cc010cbe Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 15 Feb 2025 18:06:25 -0500
Subject: [PATCH 1/8] ENH(string dtype): Implement cumsum for Python-backed
 strings

---
 doc/source/whatsnew/v2.3.0.rst           |  2 +-
 pandas/core/arrays/string_.py            | 94 ++++++++++++++++++++++++
 pandas/tests/apply/test_str.py           | 15 ++--
 pandas/tests/extension/test_string.py    |  6 +-
 pandas/tests/groupby/test_categorical.py | 12 +--
 pandas/tests/series/test_cumulative.py   |  6 +-
 6 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 8bdddb5b7f85d..1bd818a200928 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -37,7 +37,7 @@ Other enhancements
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
 - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
 - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype``  (:issue:`60663`)
-- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
+- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`, :issue:`???`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 623a6a10c75b5..a472339998944 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -870,6 +870,100 @@ def _reduce(
 
         raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
 
+    def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArray:
+        """
+        Return an ExtensionArray performing an accumulation operation.
+
+        The underlying data type might change.
+
+        Parameters
+        ----------
+        name : str
+            Name of the function, supported values are:
+            - cummin
+            - cummax
+            - cumsum
+            - cumprod
+        skipna : bool, default True
+            If True, skip NA values.
+        **kwargs
+            Additional keyword arguments passed to the accumulation function.
+            Currently, there is no supported kwarg.
+
+        Returns
+        -------
+        array
+
+        Raises
+        ------
+        NotImplementedError : subclass does not define accumulations
+        """
+        if is_string_dtype(self):
+            return self._str_accumulate(name=name, skipna=skipna, **kwargs)
+        return super()._accumulate(name=name, skipna=skipna, **kwargs)
+
+    def _str_accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> StringArray:
+        """
+        Accumulate implementation for strings, see `_accumulate` docstring for details.
+
+        pyarrow.compute does not implement these methods for strings.
+        """
+        if name == "cumprod":
+            msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
+            raise TypeError(msg)
+
+        # We may need to strip out trailing NA values
+        tail: np.array | None = None
+        na_mask: np.array | None = None
+        ndarray = self._ndarray
+        np_func = {
+            "cumsum": np.cumsum,
+            "cummin": np.minimum.accumulate,
+            "cummax": np.maximum.accumulate,
+        }[name]
+
+        from pandas.core import missing
+
+        if self._hasna:
+            na_mask = isna(ndarray)
+            if np.all(na_mask):
+                return type(self)(ndarray)
+            if skipna:
+                if name == "cumsum":
+                    ndarray = np.where(na_mask, "", ndarray)
+                else:
+                    # We can retain the running min/max by forward/backward filling.
+                    ndarray = ndarray.copy()
+                    missing.pad_or_backfill_inplace(
+                        ndarray.T,
+                        method="pad",
+                        axis=0,
+                    )
+                    missing.pad_or_backfill_inplace(
+                        ndarray.T,
+                        method="backfill",
+                        axis=0,
+                    )
+            else:
+                # When not skipping NA values, the result should be null from
+                # the first NA value onward.
+                idx = np.argmax(na_mask)
+                tail = np.empty(len(ndarray) - idx, dtype="object")
+                tail[:] = np.nan
+                ndarray = ndarray[:idx]
+
+        np_result = np_func(ndarray)
+
+        if tail is not None:
+            np_result = np.hstack((np_result, tail))
+        elif na_mask is not None:
+            np_result = np.where(na_mask, np.nan, np_result)
+
+        result = type(self)(np_result)
+        return result
+
     def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
         if self.dtype.na_value is np.nan and result is libmissing.NA:
             # the masked_reductions use pd.NA -> convert to np.nan
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index ce71cfec535e4..b7c454817cb53 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -5,7 +5,6 @@
 import pytest
 
 from pandas.compat import (
-    HAS_PYARROW,
     WASM,
 )
 
@@ -166,13 +165,13 @@ def test_agg_cython_table_transform_series(request, series, func, expected):
     # GH21224
     # test transforming functions in
     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
-    if series.dtype == "string" and func == "cumsum" and not HAS_PYARROW:
-        request.applymarker(
-            pytest.mark.xfail(
-                raises=NotImplementedError,
-                reason="TODO(infer_string) cumsum not yet implemented for string",
-            )
-        )
+    # if series.dtype == "string" and func == "cumsum" and not HAS_PYARROW:
+    #     request.applymarker(
+    #         pytest.mark.xfail(
+    #             raises=NotImplementedError,
+    #             reason="TODO(infer_string) cumsum not yet implemented for string",
+    #         )
+    #     )
     warn = None if isinstance(func, str) else FutureWarning
     with tm.assert_produces_warning(warn, match="is currently using Series.*"):
         result = series.agg(func)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 6ce48e434d329..25129111180d6 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -196,11 +196,7 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
 
     def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
         assert isinstance(ser.dtype, StorageExtensionDtype)
-        return ser.dtype.storage == "pyarrow" and op_name in [
-            "cummin",
-            "cummax",
-            "cumsum",
-        ]
+        return op_name in ["cummin", "cummax", "cumsum"]
 
     def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         dtype = cast(StringDtype, tm.get_dtype(obj))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e49be8c00b426..6f110ee8f8f21 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -325,9 +325,9 @@ def test_observed(request, using_infer_string, observed):
     # gh-8138 (back-compat)
     # gh-8869
 
-    if using_infer_string and not observed:
-        # TODO(infer_string) this fails with filling the string column with 0
-        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+    # if using_infer_string and not observed:
+    #     # TODO(infer_string) this fails with filling the string column with 0
+    #     request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
 
     cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
     cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
@@ -355,10 +355,12 @@ def test_observed(request, using_infer_string, observed):
     )
     result = gb.sum()
     if not observed:
+        fill_value = "" if using_infer_string else 0
         expected = cartesian_product_for_groupers(
-            expected, [cat1, cat2], list("AB"), fill_value=0
+            expected, [cat1, cat2], list("AB"), fill_value=fill_value
         )
-
+    print(result)
+    print(expected)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py
index 89882d9d797c5..9357c1f278b75 100644
--- a/pandas/tests/series/test_cumulative.py
+++ b/pandas/tests/series/test_cumulative.py
@@ -266,12 +266,12 @@ def test_cumprod_timedelta(self):
         ],
     )
     def test_cum_methods_pyarrow_strings(
-        self, pyarrow_string_dtype, data, op, skipna, expected_data
+        self, string_dtype_no_object, data, op, skipna, expected_data
     ):
         # https://github.com/pandas-dev/pandas/pull/60633
-        ser = pd.Series(data, dtype=pyarrow_string_dtype)
+        ser = pd.Series(data, dtype=string_dtype_no_object)
         method = getattr(ser, op)
-        expected = pd.Series(expected_data, dtype=pyarrow_string_dtype)
+        expected = pd.Series(expected_data, dtype=string_dtype_no_object)
         result = method(skipna=skipna)
         tm.assert_series_equal(result, expected)
 

From 188f92e349faa457bdbb838f71ef3ee33e9113f8 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 15 Feb 2025 18:10:11 -0500
Subject: [PATCH 2/8] cleanups

---
 doc/source/whatsnew/v2.3.0.rst           |  2 +-
 pandas/core/arrays/string_.py            |  5 +----
 pandas/tests/apply/test_str.py           |  9 +--------
 pandas/tests/groupby/test_categorical.py | 11 ++++-------
 pandas/tests/series/test_cumulative.py   |  5 +++--
 5 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 1bd818a200928..c21f8e8c46a6d 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -37,7 +37,7 @@ Other enhancements
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
 - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
 - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype``  (:issue:`60663`)
-- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`, :issue:`???`)
+- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`, :issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index a472339998944..10b2a62828ddb 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -49,6 +49,7 @@
 )
 
 from pandas.core import (
+    missing,
     nanops,
     ops,
 )
@@ -907,8 +908,6 @@ def _str_accumulate(
     ) -> StringArray:
         """
         Accumulate implementation for strings, see `_accumulate` docstring for details.
-
-        pyarrow.compute does not implement these methods for strings.
         """
         if name == "cumprod":
             msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
@@ -924,8 +923,6 @@ def _str_accumulate(
             "cummax": np.maximum.accumulate,
         }[name]
 
-        from pandas.core import missing
-
         if self._hasna:
             na_mask = isna(ndarray)
             if np.all(na_mask):
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index b7c454817cb53..e5a9492630b13 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -161,17 +161,10 @@ def test_agg_cython_table_series(series, func, expected):
         ),
     ),
 )
-def test_agg_cython_table_transform_series(request, series, func, expected):
+def test_agg_cython_table_transform_series(series, func, expected):
     # GH21224
     # test transforming functions in
     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
-    # if series.dtype == "string" and func == "cumsum" and not HAS_PYARROW:
-    #     request.applymarker(
-    #         pytest.mark.xfail(
-    #             raises=NotImplementedError,
-    #             reason="TODO(infer_string) cumsum not yet implemented for string",
-    #         )
-    #     )
     warn = None if isinstance(func, str) else FutureWarning
     with tm.assert_produces_warning(warn, match="is currently using Series.*"):
         result = series.agg(func)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 6f110ee8f8f21..e050d475b744f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -325,9 +325,9 @@ def test_observed(request, using_infer_string, observed):
     # gh-8138 (back-compat)
     # gh-8869
 
-    # if using_infer_string and not observed:
-    #     # TODO(infer_string) this fails with filling the string column with 0
-    #     request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+    if using_infer_string and not observed:
+        # TODO(infer_string) this fails with filling the string column with 0
+        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
 
     cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
     cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
@@ -355,12 +355,9 @@ def test_observed(request, using_infer_string, observed):
     )
     result = gb.sum()
     if not observed:
-        fill_value = "" if using_infer_string else 0
         expected = cartesian_product_for_groupers(
-            expected, [cat1, cat2], list("AB"), fill_value=fill_value
+            expected, [cat1, cat2], list("AB"), fill_value=0
         )
-    print(result)
-    print(expected)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py
index 9357c1f278b75..db83cf1112e74 100644
--- a/pandas/tests/series/test_cumulative.py
+++ b/pandas/tests/series/test_cumulative.py
@@ -265,10 +265,11 @@ def test_cumprod_timedelta(self):
             ([pd.NA, pd.NA, pd.NA], "cummax", False, [pd.NA, pd.NA, pd.NA]),
         ],
     )
-    def test_cum_methods_pyarrow_strings(
+    def test_cum_methods_ea_strings(
         self, string_dtype_no_object, data, op, skipna, expected_data
     ):
-        # https://github.com/pandas-dev/pandas/pull/60633
+        # https://github.com/pandas-dev/pandas/pull/60633 - pyarrow
+        # https://github.com/pandas-dev/pandas/pull/60938 - Python
         ser = pd.Series(data, dtype=string_dtype_no_object)
         method = getattr(ser, op)
         expected = pd.Series(expected_data, dtype=string_dtype_no_object)

From c04969e61ad4ac13a8ddb4f227571925612ba64f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 15 Feb 2025 18:11:33 -0500
Subject: [PATCH 3/8] cleanups

---
 pandas/core/arrays/string_.py            | 10 ----------
 pandas/tests/groupby/test_categorical.py |  1 +
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 10b2a62828ddb..69e3be87b2b9b 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -899,16 +899,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
         ------
         NotImplementedError : subclass does not define accumulations
         """
-        if is_string_dtype(self):
-            return self._str_accumulate(name=name, skipna=skipna, **kwargs)
-        return super()._accumulate(name=name, skipna=skipna, **kwargs)
-
-    def _str_accumulate(
-        self, name: str, *, skipna: bool = True, **kwargs
-    ) -> StringArray:
-        """
-        Accumulate implementation for strings, see `_accumulate` docstring for details.
-        """
         if name == "cumprod":
             msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
             raise TypeError(msg)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e050d475b744f..e49be8c00b426 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -358,6 +358,7 @@ def test_observed(request, using_infer_string, observed):
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0
         )
+
     tm.assert_frame_equal(result, expected)
 
 

From 7e116ef90e1cbd131f75bd699c46d2feb9ec9d7e Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 12:46:31 -0500
Subject: [PATCH 4/8] type-hint fixup

---
 pandas/core/arrays/string_.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 69e3be87b2b9b..806db64d107ee 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -904,8 +904,8 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
             raise TypeError(msg)
 
         # We may need to strip out trailing NA values
-        tail: np.array | None = None
-        na_mask: np.array | None = None
+        tail: np.ndarray | None = None
+        na_mask: np.ndarray | None = None
         ndarray = self._ndarray
         np_func = {
             "cumsum": np.cumsum,

From 0629fcfcc2bc7b9eb422a960c2985af5d55ad1ba Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 13:16:07 -0500
Subject: [PATCH 5/8] More type fixes

---
 pandas/core/arrays/string_.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 806db64d107ee..9c46f2c8c6b87 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -23,6 +23,7 @@
 )
 from pandas._libs.arrays import NDArrayBacked
 from pandas._libs.lib import ensure_string_array
+from pandas._typing import npt
 from pandas.compat import (
     HAS_PYARROW,
     pa_version_under10p1,
@@ -83,7 +84,6 @@
         NumpyValueArrayLike,
         Scalar,
         Self,
-        npt,
         type_t,
     )
 
@@ -914,7 +914,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
         }[name]
 
         if self._hasna:
-            na_mask = isna(ndarray)
+            na_mask = cast(npt.NDArray[np.bool_], isna(ndarray))
             if np.all(na_mask):
                 return type(self)(ndarray)
             if skipna:
@@ -941,7 +941,8 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
                 tail[:] = np.nan
                 ndarray = ndarray[:idx]
 
-        np_result = np_func(ndarray)
+        # mypy: Cannot call function of unknown type
+        np_result = np_func(ndarray)  # type: ignore[operator]
 
         if tail is not None:
             np_result = np.hstack((np_result, tail))

From d0f6673d94ac3e7cf63c62ef8ff4504d3cef0d01 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 14:02:34 -0500
Subject: [PATCH 6/8] Use quotes for cast

---
 pandas/core/arrays/string_.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 9c46f2c8c6b87..c8cba904e802d 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -23,7 +23,6 @@
 )
 from pandas._libs.arrays import NDArrayBacked
 from pandas._libs.lib import ensure_string_array
-from pandas._typing import npt
 from pandas.compat import (
     HAS_PYARROW,
     pa_version_under10p1,
@@ -84,6 +83,7 @@
         NumpyValueArrayLike,
         Scalar,
         Self,
+        npt,
         type_t,
     )
 
@@ -914,7 +914,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
         }[name]
 
         if self._hasna:
-            na_mask = cast(npt.NDArray[np.bool_], isna(ndarray))
+            na_mask = cast("npt.NDArray[np.bool_]", isna(ndarray))
             if np.all(na_mask):
                 return type(self)(ndarray)
             if skipna:

From 7f9571de0f13eb37630ae2a2d3e9d65adb1e978f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 17 Feb 2025 16:21:09 -0500
Subject: [PATCH 7/8] Refinements

---
 pandas/core/arrays/string_.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index c8cba904e802d..6a57762b6c6bf 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -924,12 +924,12 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
                     # We can retain the running min/max by forward/backward filling.
                     ndarray = ndarray.copy()
                     missing.pad_or_backfill_inplace(
-                        ndarray.T,
+                        ndarray,
                         method="pad",
                         axis=0,
                     )
                     missing.pad_or_backfill_inplace(
-                        ndarray.T,
+                        ndarray,
                         method="backfill",
                         axis=0,
                     )
@@ -938,7 +938,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
                 # the first NA value onward.
                 idx = np.argmax(na_mask)
                 tail = np.empty(len(ndarray) - idx, dtype="object")
-                tail[:] = np.nan
+                tail[:] = self.dtype.na_value
                 ndarray = ndarray[:idx]
 
         # mypy: Cannot call function of unknown type
@@ -947,7 +947,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
         if tail is not None:
             np_result = np.hstack((np_result, tail))
         elif na_mask is not None:
-            np_result = np.where(na_mask, np.nan, np_result)
+            np_result = np.where(na_mask, self.dtype.na_value, np_result)
 
         result = type(self)(np_result)
         return result

From 2fd977937e12e271256b2fe073c848ae47e2068a Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 17 Feb 2025 17:30:13 -0500
Subject: [PATCH 8/8] type-ignore

---
 pandas/core/arrays/string_.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 6a57762b6c6bf..7227ea77ca433 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -947,7 +947,8 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArra
         if tail is not None:
             np_result = np.hstack((np_result, tail))
         elif na_mask is not None:
-            np_result = np.where(na_mask, self.dtype.na_value, np_result)
+            # Argument 2 to "where" has incompatible type "NAType | float"
+            np_result = np.where(na_mask, self.dtype.na_value, np_result)  # type: ignore[arg-type]
 
         result = type(self)(np_result)
         return result