diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index fcc50c5b6b20f..e0c93db0afb07 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1644,7 +1644,11 @@ def _accumulate(
             else:
                 data_to_accum = data_to_accum.cast(pa.int64())
 
-        result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
+        try:
+            result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
+        except pa.ArrowNotImplementedError as err:
+            msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
+            raise TypeError(msg) from err
 
         if convert_to_int:
             result = result.cast(pa_dtype)
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index e19c21f81b3e1..0503bf9166ec7 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -218,18 +218,12 @@ def transform(row):
 def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
     # GH 21224
     if using_infer_string:
-        if df.dtypes.iloc[0].storage == "pyarrow":
-            import pyarrow as pa
-
-            # TODO(infer_string)
-            # should raise a proper TypeError instead of propagating the pyarrow error
-
-            expected = (expected, pa.lib.ArrowNotImplementedError)
-        else:
-            expected = (expected, NotImplementedError)
+        expected = (expected, NotImplementedError)
 
     msg = (
-        "can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform"
+        "can't multiply sequence by non-int of type 'str'"
+        "|cannot perform cumprod with type str"  # NotImplementedError python backend
+        "|operation 'cumprod' not supported for dtype 'str'"  # TypeError pyarrow
     )
     warn = None if isinstance(func, str) else FutureWarning
     with pytest.raises(expected, match=msg):
@@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri
     if func == "median" or func is np.nanmedian or func is np.median:
         msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
 
-    if using_infer_string:
-        if series.dtype.storage == "pyarrow":
-            import pyarrow as pa
-
-            # TODO(infer_string)
-            # should raise a proper TypeError instead of propagating the pyarrow error
-            expected = (expected, pa.lib.ArrowNotImplementedError)
-        else:
-            expected = (expected, NotImplementedError)
-    msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform"
+    if using_infer_string and func == "cumprod":
+        expected = (expected, NotImplementedError)
+
+    msg = (
+        msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation"
+    )
     warn = None if isinstance(func, str) else FutureWarning
 
     with pytest.raises(expected, match=msg):
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index 732652f24e2eb..c52168ae48ca8 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import WASM
 
 from pandas.core.dtypes.common import is_number
@@ -81,7 +79,6 @@ def test_apply_np_transformer(float_frame, op, how):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
@@ -140,7 +137,6 @@ def test_agg_cython_table_series(series, func, expected):
         assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
@@ -163,10 +159,17 @@ def test_agg_cython_table_series(series, func, expected):
         ),
     ),
 )
-def test_agg_cython_table_transform_series(series, func, expected):
+def test_agg_cython_table_transform_series(request, series, func, expected):
     # GH21224
     # test transforming functions in
     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
+    if series.dtype == "string" and func == "cumsum":
+        request.applymarker(
+            pytest.mark.xfail(
+                raises=(TypeError, NotImplementedError),
+                reason="TODO(infer_string) cumsum not yet implemented for string",
+            )
+        )
     warn = None if isinstance(func, str) else FutureWarning
     with tm.assert_produces_warning(warn, match="is currently using Series.*"):
         result = series.agg(func)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index f0ff11e5fa3f7..9defb97394635 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -441,7 +441,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
             request.applymarker(
                 pytest.mark.xfail(
                     reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
-                    raises=NotImplementedError,
+                    raises=TypeError,
                 )
             )
 
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index 7f367ded39863..7936982e4a055 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype,
     DatetimeTZDtype,
@@ -27,10 +25,6 @@
     set_default_names,
 )
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 @pytest.fixture
 def df_schema():
@@ -127,7 +121,7 @@ def test_multiindex(self, df_schema, using_infer_string):
             expected["fields"][0] = {
                 "name": "level_0",
                 "type": "any",
-                "extDtype": "string",
+                "extDtype": "str",
             }
             expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index d3328d1dfcaef..ad9dbf7554a8b 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -84,7 +84,7 @@ def datetime_frame(self):
         #  since that doesn't round-trip, see GH#33711
         df = DataFrame(
             np.random.default_rng(2).standard_normal((30, 4)),
-            columns=Index(list("ABCD"), dtype=object),
+            columns=Index(list("ABCD")),
             index=date_range("2000-01-01", periods=30, freq="B"),
         )
         df.index = df.index._with_freq(None)
@@ -184,7 +184,6 @@ def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
 
         assert_json_roundtrip_equal(result, expected, orient)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("dtype", [False, np.int64])
     @pytest.mark.parametrize("convert_axes", [True, False])
     def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
@@ -270,7 +269,6 @@ def test_roundtrip_empty(self, orient, convert_axes):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("convert_axes", [True, False])
     def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
         # TODO: improve coverage with date_format parameter
@@ -698,7 +696,6 @@ def test_series_roundtrip_simple(self, orient, string_series, using_infer_string
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize("dtype", [False, None])
     def test_series_roundtrip_object(self, orient, dtype, object_series):
         data = StringIO(object_series.to_json(orient=orient))
@@ -710,6 +707,9 @@ def test_series_roundtrip_object(self, orient, dtype, object_series):
         if orient != "split":
             expected.name = None
 
+        if using_string_dtype():
+            expected = expected.astype("str")
+
         tm.assert_series_equal(result, expected)
 
     def test_series_roundtrip_empty(self, orient):
@@ -808,7 +808,6 @@ def test_path(self, float_frame, int_frame, datetime_frame):
                 df.to_json(path)
                 read_json(path)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_axis_dates(self, datetime_series, datetime_frame):
         # frame
         json = StringIO(datetime_frame.to_json())
@@ -821,7 +820,6 @@ def test_axis_dates(self, datetime_series, datetime_frame):
         tm.assert_series_equal(result, datetime_series, check_names=False)
         assert result.name is None
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_convert_dates(self, datetime_series, datetime_frame):
         # frame
         df = datetime_frame
@@ -912,7 +910,6 @@ def test_convert_dates_infer(self, infer_word):
         result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize(
         "date,date_unit",
         [
@@ -973,7 +970,6 @@ def test_date_format_series_raises(self, datetime_series):
         with pytest.raises(ValueError, match=msg):
             ts.to_json(date_format="iso", date_unit="foo")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_date_unit(self, unit, datetime_frame):
         df = datetime_frame
         df["date"] = Timestamp("20130101 20:43:42").as_unit("ns")
@@ -1114,7 +1110,6 @@ def test_round_trip_exception(self, datapath):
         res = res.fillna(np.nan)
         tm.assert_frame_equal(res, df)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.network
     @pytest.mark.single_cpu
     @pytest.mark.parametrize(
@@ -1555,7 +1550,6 @@ def test_data_frame_size_after_to_json(self):
 
         assert size_before == size_after
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
     )
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 541cc39606047..b5e97314caf03 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     PyperclipException,
     PyperclipWindowsException,
@@ -26,10 +24,6 @@
     init_qt_clipboard,
 )
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 def build_kwargs(sep, excel):
     kwargs = {}
@@ -351,7 +345,7 @@ def test_raw_roundtrip(self, data):
 
     @pytest.mark.parametrize("engine", ["c", "python"])
     def test_read_clipboard_dtype_backend(
-        self, clipboard, string_storage, dtype_backend, engine
+        self, clipboard, string_storage, dtype_backend, engine, using_infer_string
     ):
         # GH#50502
         if dtype_backend == "pyarrow":
@@ -396,6 +390,11 @@ def test_read_clipboard_dtype_backend(
             )
             expected["g"] = ArrowExtensionArray(pa.array([None, None]))
 
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
+
         tm.assert_frame_equal(result, expected)
 
     def test_invalid_dtype_backend(self):
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 4f3f613f71542..506276a25dad6 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -140,7 +140,6 @@ def test_bytesiowrapper_returns_correct_bytes(self):
             assert result == data.encode("utf-8")
 
     # Test that pyarrow can handle a file opened with get_handle
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_get_handle_pyarrow_compat(self):
         pa_csv = pytest.importorskip("pyarrow.csv")
 
@@ -155,6 +154,8 @@ def test_get_handle_pyarrow_compat(self):
         s = StringIO(data)
         with icom.get_handle(s, "rb", is_text=False) as handles:
             df = pa_csv.read_csv(handles.handle).to_pandas()
+            # TODO will have to update this when pyarrow' to_pandas() is fixed
+            expected = expected.astype("object")
             tm.assert_frame_equal(df, expected)
             assert not s.closed
 
@@ -338,7 +339,6 @@ def test_read_fspath_all(self, reader, module, path, datapath):
             ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
         ],
     )
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_write_fspath_all(self, writer_name, writer_kwargs, module):
         if writer_name in ["to_latex"]:  # uses Styler implementation
             pytest.importorskip("jinja2")
@@ -365,7 +365,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
     def test_write_fspath_hdf5(self):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
@@ -438,14 +438,13 @@ def test_unknown_engine(self):
         with tm.ensure_clean() as path:
             df = pd.DataFrame(
                 1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD"), dtype=object),
-                index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+                columns=pd.Index(list("ABCD")),
+                index=pd.Index([f"i-{i}" for i in range(30)]),
             )
             df.to_csv(path)
             with pytest.raises(ValueError, match="Unknown engine"):
                 pd.read_csv(path, engine="pyt")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_binary_mode(self):
         """
         'encoding' shouldn't be passed to 'open' in binary mode.
@@ -455,8 +454,8 @@ def test_binary_mode(self):
         with tm.ensure_clean() as path:
             df = pd.DataFrame(
                 1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD"), dtype=object),
-                index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+                columns=pd.Index(list("ABCD")),
+                index=pd.Index([f"i-{i}" for i in range(30)]),
             )
             df.to_csv(path, mode="w+b")
             tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@@ -473,8 +472,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
         """
         df = pd.DataFrame(
             1.1 * np.arange(120).reshape((30, 4)),
-            columns=pd.Index(list("ABCD"), dtype=object),
-            index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
         )
         with tm.ensure_clean() as path:
             with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
@@ -504,15 +503,14 @@ def test_is_fsspec_url():
     assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize("encoding", [None, "utf-8"])
 @pytest.mark.parametrize("format", ["csv", "json"])
 def test_codecs_encoding(encoding, format):
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with tm.ensure_clean() as path:
         with codecs.open(path, mode="w", encoding=encoding) as handle:
@@ -525,13 +523,12 @@ def test_codecs_encoding(encoding, format):
     tm.assert_frame_equal(expected, df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_codecs_get_writer_reader():
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with tm.ensure_clean() as path:
         with open(path, "wb") as handle:
@@ -556,8 +553,8 @@ def test_explicit_encoding(io_class, mode, msg):
     # wrong mode is requested
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with io_class() as buffer:
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 5eb202dd5aa24..fd1e9b4fdf211 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -12,8 +12,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import is_platform_windows
 
 import pandas as pd
@@ -139,7 +137,6 @@ def test_compression_warning(compression_only):
                 df.to_csv(handles.handle, compression=compression_only)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_compression_binary(compression_only):
     """
     Binary file handles support compression.
@@ -148,8 +145,8 @@ def test_compression_binary(compression_only):
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
 
     # with a file
@@ -180,8 +177,8 @@ def test_gzip_reproducibility_file_name():
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     compression_options = {"method": "gzip", "mtime": 1}
 
@@ -203,8 +200,8 @@ def test_gzip_reproducibility_file_object():
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     compression_options = {"method": "gzip", "mtime": 1}
 
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index a9e7b2da03a4d..48580003f6c5e 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -158,7 +158,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
         assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
 def test_to_csv_compression_encoding_gcs(
     gcs_buffer, compression_only, encoding, compression_to_extension
@@ -171,8 +170,8 @@ def test_to_csv_compression_encoding_gcs(
     """
     df = DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=Index(list("ABCD"), dtype=object),
-        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
     )
 
     # reference of compressed and encoded file