From 792e66ef47a6102e877765481b1fbdd93d3bb818 Mon Sep 17 00:00:00 2001 From: Simon Chen <1020359403@qq.com> Date: Thu, 4 Jul 2024 07:14:57 +0000 Subject: [PATCH] Support pandas StringArray and ArrowStringArray --- clickhouse_driver/numpy/helpers.py | 7 +++++-- tests/numpy/columns/test_string.py | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/clickhouse_driver/numpy/helpers.py b/clickhouse_driver/numpy/helpers.py index e1f813db..b80f8940 100644 --- a/clickhouse_driver/numpy/helpers.py +++ b/clickhouse_driver/numpy/helpers.py @@ -1,13 +1,16 @@ import numpy as np import pandas as pd +from pandas.core.arrays import ExtensionArray def column_chunks(columns, n): for column in columns: - if not isinstance(column, (np.ndarray, pd.DatetimeIndex)): + if not isinstance( + column, (np.ndarray, pd.DatetimeIndex, ExtensionArray) + ): raise TypeError( 'Unsupported column type: {}. ' - 'ndarray/DatetimeIndex is expected.' + 'ndarray/DatetimeIndex/ExtensionArray is expected.' .format(type(column)) ) diff --git a/tests/numpy/columns/test_string.py b/tests/numpy/columns/test_string.py index df241515..a5558e71 100644 --- a/tests/numpy/columns/test_string.py +++ b/tests/numpy/columns/test_string.py @@ -3,6 +3,11 @@ except ImportError: np = None +try: + import pandas as pd +except ImportError: + pd = None + from tests.numpy.testcase import NumpyBaseTestCase @@ -40,6 +45,28 @@ def test_nullable(self): ) self.assertEqual(inserted[0].dtype, object) + def test_insert_pandas_string(self): + with self.create_table('a String'): + df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='string') + self.client.insert_dataframe( + 'INSERT INTO test VALUES', dataframe=df + ) + + query = 'SELECT * FROM test' + inserted = self.emit_cli(query) + self.assertEqual(inserted, 'a\nb\nc\n') + + def test_insert_pandas_pyarrow_string(self): + with self.create_table('a String'): + df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='string[pyarrow]') + self.client.insert_dataframe( + 'INSERT INTO test VALUES', dataframe=df + ) + + query = 'SELECT * FROM test' + inserted = self.emit_cli(query) + self.assertEqual(inserted, 'a\nb\nc\n') + class ByteStringTestCase(NumpyBaseTestCase): client_kwargs = {'settings': {'strings_as_bytes': True, 'use_numpy': True}}