From d3f1611959fa01a41913eda8b3fa9d2c6b5baeea Mon Sep 17 00:00:00 2001
From: Dongdong Tian <seisman.info@gmail.com>
Date: Sun, 10 Nov 2024 16:05:55 +0800
Subject: [PATCH 1/4] clib.conversion._to_numpy: Add tests for pandas.Series
 with pandas string dtypes

---
 pygmt/tests/test_clib_to_numpy.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py
index 3624ed2be8d..f5cc3a6d24c 100644
--- a/pygmt/tests/test_clib_to_numpy.py
+++ b/pygmt/tests/test_clib_to_numpy.py
@@ -10,6 +10,7 @@
 import pytest
 from packaging.version import Version
 from pygmt.clib.conversion import _to_numpy
+from pygmt.helpers.testing import skip_if_no
 
 try:
     import pyarrow as pa
@@ -161,6 +162,31 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype):
     npt.assert_array_equal(result, series)
 
 
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        None,
+        np.str_,
+        "U10",
+        "string[python]",
+        pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")),
+        pytest.param("string[pyarrow_numpy]", marks=skip_if_no(package="pyarrow")),
+    ],
+)
+def test_to_numpy_pandas_series_pandas_dtypes_string(dtype):
+    """
+    Test the _to_numpy function with pandas.Series of pandas string types.
+
+    In pandas, string arrays can be specified in multiple ways.
+
+    Reference: https://pandas.pydata.org/docs/reference/api/pandas.StringDtype.html
+    """
+    array = pd.Series(["abc", "defg", "12345"], dtype=dtype)
+    result = _to_numpy(array)
+    _check_result(result, np.str_)
+    npt.assert_array_equal(result, array)
+
+
 ########################################################################################
 # Test the _to_numpy function with PyArrow arrays.
 #

From 01ba31786ddf424027b3c3472339cd43d6fb49d5 Mon Sep 17 00:00:00 2001
From: Dongdong Tian <seisman.info@gmail.com>
Date: Sun, 10 Nov 2024 16:18:50 +0800
Subject: [PATCH 2/4] Mapping 'string' to np.str_ explicitly

---
 pygmt/clib/conversion.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py
index af8eb3458d4..7b60b5e77e9 100644
--- a/pygmt/clib/conversion.py
+++ b/pygmt/clib/conversion.py
@@ -158,6 +158,9 @@ def _to_numpy(data: Any) -> np.ndarray:
     """
     # Mapping of unsupported dtypes to the expected NumPy dtype.
     dtypes: dict[str, type] = {
+        # For pandas string dtype, "string[python]", "string[pyarrow]" and
+        # "string[pyarrow_numpy]".
+        "string": np.str_,
         "date32[day][pyarrow]": np.datetime64,
         "date64[ms][pyarrow]": np.datetime64,
     }

From dac7e8eda7632464f556eaae5f1ac8ac34c9b6bd Mon Sep 17 00:00:00 2001
From: Dongdong Tian <seisman.info@gmail.com>
Date: Sun, 10 Nov 2024 16:23:57 +0800
Subject: [PATCH 3/4] Try to convert np.object_ into string type

---
 pygmt/clib/conversion.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py
index 7b60b5e77e9..c4fa39ecdba 100644
--- a/pygmt/clib/conversion.py
+++ b/pygmt/clib/conversion.py
@@ -2,6 +2,7 @@
 Functions to convert data types into ctypes friendly formats.
 """
 
+import contextlib
 import ctypes as ctp
 import warnings
 from collections.abc import Sequence
@@ -178,6 +179,11 @@ def _to_numpy(data: Any) -> np.ndarray:
     else:
         vec_dtype = str(getattr(data, "dtype", ""))
         array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype))
+
+    # Check if a np.object_ array can be converted to np.str_.
+    if array.dtype == np.object_:
+        with contextlib.suppress(TypeError, ValueError):
+            return np.ascontiguousarray(array, dtype=np.str_)
     return array
 
 

From 4364e0d93bf70298925b2b20e49c04d97770b946 Mon Sep 17 00:00:00 2001
From: Dongdong Tian <seisman.info@gmail.com>
Date: Sun, 10 Nov 2024 16:46:13 +0800
Subject: [PATCH 4/4] Remove the workaround in PR #684

---
 pygmt/clib/session.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
index 10c8770adaa..69922edc6dc 100644
--- a/pygmt/clib/session.py
+++ b/pygmt/clib/session.py
@@ -1475,7 +1475,7 @@ def virtualfile_from_vectors(
         # 2 columns contains coordinates like longitude, latitude, or datetime string
         # types.
         for col, array in enumerate(arrays[2:]):
-            if pd.api.types.is_string_dtype(array.dtype):
+            if np.issubdtype(array.dtype, np.str_):
                 columns = col + 2
                 break
 
@@ -1506,9 +1506,9 @@ def virtualfile_from_vectors(
                 strings = string_arrays[0]
             elif len(string_arrays) > 1:
                 strings = np.array(
-                    [" ".join(vals) for vals in zip(*string_arrays, strict=True)]
+                    [" ".join(vals) for vals in zip(*string_arrays, strict=True)],
+                    dtype=np.str_,
                 )
-            strings = np.asanyarray(a=strings, dtype=np.str_)
             self.put_strings(
                 dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings
             )