From babed36cfec7d97bb60a3dcef150bdf7bc939d65 Mon Sep 17 00:00:00 2001 From: KevsterAmp Date: Tue, 12 Nov 2024 20:40:25 +0800 Subject: [PATCH 1/5] add test func --- pandas/tests/io/test_common.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 4f3f613f71542..145a898f7e04d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -5,6 +5,7 @@ import codecs import errno from functools import partial +import io from io import ( BytesIO, StringIO, @@ -674,3 +675,11 @@ def test_pickle_reader(reader): # GH 22265 with BytesIO() as buffer: pickle.dump(reader, buffer) + + +def test_pyarrow_read_csv_datetime_dtype(): + data = "date,id\n20/12/2025,a\n,b\n31/12/2020,c" + df = pd.read_csv( + io.StringIO(data), parse_dates=["date"], dayfirst=True, dtype_backend="pyarrow" + ) + assert (df["date"].dtype) == "datetime64[s]" From 574d129a502738b714f42608094e908396021564 Mon Sep 17 00:00:00 2001 From: KevsterAmp Date: Tue, 12 Nov 2024 20:43:43 +0800 Subject: [PATCH 2/5] remove io since StringIO is already imported --- pandas/tests/io/test_common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 145a898f7e04d..0911aa580f87f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -5,7 +5,6 @@ import codecs import errno from functools import partial -import io from io import ( BytesIO, StringIO, @@ -680,6 +679,6 @@ def test_pickle_reader(reader): def test_pyarrow_read_csv_datetime_dtype(): data = "date,id\n20/12/2025,a\n,b\n31/12/2020,c" df = pd.read_csv( - io.StringIO(data), parse_dates=["date"], dayfirst=True, dtype_backend="pyarrow" + StringIO(data), parse_dates=["date"], dayfirst=True, dtype_backend="pyarrow" ) assert (df["date"].dtype) == "datetime64[s]" From 5a9a935fd4be721c5e26d4752f95d5109c3df37b Mon Sep 17 00:00:00 2001 From: KevsterAmp Date: Tue, 12 Nov 2024 21:01:55 +0800 Subject: [PATCH 3/5] add td.skip_if_no("pyarrow") from CI errors --- pandas/tests/io/test_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 0911aa580f87f..7cd342fa27553 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -676,6 +676,7 @@ def test_pickle_reader(reader): pickle.dump(reader, buffer) +@td.skip_if_no("pyarrow") def test_pyarrow_read_csv_datetime_dtype(): data = "date,id\n20/12/2025,a\n,b\n31/12/2020,c" df = pd.read_csv( From 30b214944a4a9557d88ecadef0eb604a79e79273 Mon Sep 17 00:00:00 2001 From: KevsterAmp Date: Thu, 14 Nov 2024 10:29:58 +0800 Subject: [PATCH 4/5] fix test --- pandas/tests/io/test_common.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 7cd342fa27553..bf281bbd5993d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -678,8 +678,15 @@ def test_pickle_reader(reader): @td.skip_if_no("pyarrow") def test_pyarrow_read_csv_datetime_dtype(): - data = "date,id\n20/12/2025,a\n,b\n31/12/2020,c" - df = pd.read_csv( + # GH 59904 + data = '"date"\n"20/12/2025"\n""\n"31/12/2020"' + result = pd.read_csv( StringIO(data), parse_dates=["date"], dayfirst=True, dtype_backend="pyarrow" ) - assert (df["date"].dtype) == "datetime64[s]" + expect_data = pd.Series( + pd.to_datetime(["20/12/2025", pd.NaT, "31/12/2020"], dayfirst=True) + ) + expect = pd.DataFrame({"date": expect_data}) + + assert (result["date"].dtype) == "datetime64[s]" + tm.assert_frame_equal(expect, result) From 4812f69b4899e32170b88605608ff32143959cf3 Mon Sep 17 00:00:00 2001 From: KevsterAmp Date: Fri, 15 Nov 2024 17:00:33 +0800 Subject: [PATCH 5/5] improve expect_data; remove assert on "date" column dtype --- pandas/tests/io/test_common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index bf281bbd5993d..3e8488e6fadb6 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -683,10 +683,8 @@ def test_pyarrow_read_csv_datetime_dtype(): result = pd.read_csv( StringIO(data), parse_dates=["date"], dayfirst=True, dtype_backend="pyarrow" ) - expect_data = pd.Series( - pd.to_datetime(["20/12/2025", pd.NaT, "31/12/2020"], dayfirst=True) - ) + + expect_data = pd.to_datetime(["20/12/2025", pd.NaT, "31/12/2020"], dayfirst=True) expect = pd.DataFrame({"date": expect_data}) - assert (result["date"].dtype) == "datetime64[s]" tm.assert_frame_equal(expect, result)