Skip to content

Commit

Permalink
ENH (string dtype): accept string_view in addition to string/large_st…
Browse files Browse the repository at this point in the history
…ring for ArrowStringArray input (#60222)
  • Loading branch information
jorisvandenbossche authored Nov 6, 2024
1 parent 4b04a2f commit c15d823
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
7 changes: 7 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.compat import (
pa_version_under10p1,
pa_version_under13p0,
pa_version_under16p0,
)
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None:
raise ImportError(msg)


def _is_string_view(typ):
return not pa_version_under16p0 and pa.types.is_string_view(typ)


# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
# fallback for the ones that pyarrow doesn't yet support
Expand Down Expand Up @@ -128,11 +133,13 @@ def __init__(self, values) -> None:
_chk_pyarrow_available()
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
pa.types.is_string(values.type)
or _is_string_view(values.type)
or (
pa.types.is_dictionary(values.type)
and (
pa.types.is_string(values.type.value_type)
or pa.types.is_large_string(values.type.value_type)
or _is_string_view(values.type.value_type)
)
)
):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/arrays/string_/test_string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
assert pa.types.is_large_string(arr._pa_array.type)


@pytest.mark.parametrize("chunked", [True, False])
def test_constructor_valid_string_view(chunked):
# requires pyarrow>=18 for casting string_view to string
pa = pytest.importorskip("pyarrow", minversion="18")

arr = pa.array(["1", "2", "3"], pa.string_view())
if chunked:
arr = pa.chunked_array(arr)

arr = ArrowStringArray(arr)
# dictionary type get converted to dense large string array
assert pa.types.is_large_string(arr._pa_array.type)


def test_constructor_from_list():
# GH#27673
pytest.importorskip("pyarrow")
Expand Down

0 comments on commit c15d823

Please sign in to comment.