From b9669867c8da4773db142c64c5f237deb98afc4f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 19 Mar 2025 19:30:07 +0100 Subject: [PATCH 1/5] String dtype: more informative repr (keeping brief __str__) --- pandas/core/arrays/string_.py | 7 ++----- pandas/tests/arrays/string_/test_string.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 7227ea77ca433..91f59a8fd9605 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -198,11 +198,8 @@ def __init__( self._na_value = na_value def __repr__(self) -> str: - if self._na_value is libmissing.NA: - return f"{self.name}[{self.storage}]" - else: - # TODO add more informative repr - return self.name + storage = "" if self.storage == "pyarrow" else "storage='python', " + return f"" def __eq__(self, other: object) -> bool: # we need to override the base class __eq__ because na_value (NA or NaN) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 336a0fef69170..5670fad7e2f4f 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -103,6 +103,18 @@ def test_repr(dtype): assert repr(df.A.array) == expected +def test_dtype_repr(dtype): + if dtype.storage == "pyarrow": + if dtype.na_value is pd.NA: + assert repr(dtype) == ")>" + else: + assert repr(dtype) == "" + elif dtype.na_value is pd.NA: + assert repr(dtype) == ")>" + else: + assert repr(dtype) == "" + + def test_none_to_nan(cls, dtype): a = cls._from_sequence(["a", None, "b"], dtype=dtype) assert a[1] is not None From 384eeddb2bb1fb9148934bd60c6e7ebde4c042b0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 20 Mar 2025 08:17:11 +0100 Subject: [PATCH 2/5] fix display in series --- pandas/io/formats/format.py | 3 --- pandas/tests/io/formats/test_to_string.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b7fbc4e5e22b7..36bfc261e75c1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -67,7 +67,6 @@ ExtensionArray, TimedeltaArray, ) -from pandas.core.arrays.string_ import StringDtype from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.api import ( @@ -1218,8 +1217,6 @@ def _format(x): return self.na_rep elif isinstance(x, PandasObject): return str(x) - elif isinstance(x, StringDtype): - return repr(x) else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 63c975fd831e7..0866581535c2f 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -777,9 +777,9 @@ def test_to_string_string_dtype(self): result = df.dtypes.to_string() expected = dedent( """\ - x string[pyarrow] - y string[python] - z int64[pyarrow]""" + x string + y string + z int64[pyarrow]""" ) assert result == expected From 78870e1e55b34e9359e297ee61b818ca8ac1d2ff Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 20 Mar 2025 08:20:23 +0100 Subject: [PATCH 3/5] update doctest --- pandas/core/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c3f535df9ce2..260fae663a4de 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6809,12 +6809,12 @@ def convert_dtypes( 2 3 z 20 200.0 >>> dfn.dtypes - a Int32 - b string[python] - c boolean - d string[python] - e Int64 - f Float64 + a Int32 + b string + c boolean + d string + e Int64 + f Float64 dtype: object Start with a Series of strings and missing data represented by ``np.nan``. From b710b239511af98a9d6d8c1a050a2a157093ffd8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 20 Mar 2025 08:55:51 +0100 Subject: [PATCH 4/5] update docstring --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 91f59a8fd9605..987cb47d0d179 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype): Examples -------- >>> pd.StringDtype() - string[python] + StringDtype(storage='python', na_value=)> >>> pd.StringDtype(storage="pyarrow") - string[pyarrow] + StringDtype(na_value=)> """ @property From 67b5964a58e96b8469af46965b37f15bdc0cd070 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 20 Mar 2025 09:33:55 +0100 Subject: [PATCH 5/5] fixup --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 987cb47d0d179..ac758d0ef093c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype): Examples -------- >>> pd.StringDtype() - StringDtype(storage='python', na_value=)> + )> >>> pd.StringDtype(storage="pyarrow") - StringDtype(na_value=)> + )> """ @property