Skip to content

Commit

Permalink
TST(string dtype): Resolve xfails in pytables (#60795)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored Feb 10, 2025
1 parent e557039 commit 4511251
Show file tree
Hide file tree
Showing 13 changed files with 142 additions and 149 deletions.
3 changes: 3 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5118,6 +5118,9 @@ def _maybe_convert_for_string_atom(
errors,
columns: list[str],
):
if isinstance(bvalues.dtype, StringDtype):
# "ndarray[Any, Any]" has no attribute "to_numpy"
bvalues = bvalues.to_numpy() # type: ignore[union-attr]
if bvalues.dtype != object:
return bvalues

Expand Down
56 changes: 30 additions & 26 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@
ensure_clean_store,
)

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]
pytestmark = [pytest.mark.single_cpu]

tables = pytest.importorskip("tables")

Expand All @@ -40,7 +37,7 @@ def test_append(setup_path):
# tables.NaturalNameWarning):
df = DataFrame(
np.random.default_rng(2).standard_normal((20, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=20, freq="B"),
)
_maybe_remove(store, "df1")
Expand Down Expand Up @@ -203,7 +200,7 @@ def test_append_some_nans(setup_path):
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)


def test_append_all_nans(setup_path):
def test_append_all_nans(setup_path, using_infer_string):
with ensure_clean_store(setup_path) as store:
df = DataFrame(
{
Expand Down Expand Up @@ -255,7 +252,13 @@ def test_append_all_nans(setup_path):
_maybe_remove(store, "df")
store.append("df", df[:10], dropna=True)
store.append("df", df[10:], dropna=True)
tm.assert_frame_equal(store["df"], df, check_index_type=True)
result = store["df"]
expected = df
if using_infer_string:
# TODO: Test is incorrect when not using_infer_string.
# Should take the last 4 rows uncondiationally.
expected = expected[-4:]
tm.assert_frame_equal(result, expected, check_index_type=True)

_maybe_remove(store, "df2")
store.append("df2", df[:10], dropna=False)
Expand Down Expand Up @@ -294,7 +297,7 @@ def test_append_frame_column_oriented(setup_path, request):
# column oriented
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df.index = df.index._with_freq(None) # freq doesn't round-trip
Expand Down Expand Up @@ -426,7 +429,7 @@ def check_col(key, name, size):
{
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype=object),
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"]),
"D": date_range("20130101", periods=5),
}
).set_index("C")
Expand All @@ -453,7 +456,7 @@ def check_col(key, name, size):
_maybe_remove(store, "df")
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df["string"] = "foo"
Expand Down Expand Up @@ -513,11 +516,12 @@ def test_append_with_empty_string(setup_path):
tm.assert_frame_equal(store.select("df"), df)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_append_with_data_columns(setup_path):
with ensure_clean_store(setup_path) as store:
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df.iloc[0, df.columns.get_loc("B")] = 1.0
Expand Down Expand Up @@ -693,8 +697,8 @@ def test_append_misc(setup_path):
with ensure_clean_store(setup_path) as store:
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
store.append("df", df, chunksize=1)
result = store.select("df")
Expand All @@ -710,8 +714,8 @@ def test_append_misc_chunksize(setup_path, chunksize):
# more chunksize in append tests
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
df["string"] = "foo"
df["float322"] = 1.0
Expand Down Expand Up @@ -747,15 +751,15 @@ def test_append_misc_empty_frame(setup_path):
tm.assert_frame_equal(store.select("df2"), df)


def test_append_raise(setup_path):
def test_append_raise(setup_path, using_infer_string):
with ensure_clean_store(setup_path) as store:
# test append with invalid input to get good error messages

# list in column
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
df["invalid"] = [["a"]] * len(df)
assert df.dtypes["invalid"] == np.object_
Expand All @@ -775,8 +779,8 @@ def test_append_raise(setup_path):
# datetime with embedded nans as object
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
s = s.astype(object)
Expand All @@ -803,8 +807,8 @@ def test_append_raise(setup_path):
# appending an incompatible table
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
store.append("df", df)

Expand Down Expand Up @@ -881,7 +885,7 @@ def test_append_with_timedelta(setup_path):
def test_append_to_multiple(setup_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df2 = df1.copy().rename(columns="{}_2".format)
Expand Down Expand Up @@ -918,12 +922,12 @@ def test_append_to_multiple(setup_path):
def test_append_to_multiple_dropna(setup_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df2 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
).rename(columns="{}_2".format)
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
Expand All @@ -943,7 +947,7 @@ def test_append_to_multiple_dropna(setup_path):
def test_append_to_multiple_dropna_false(setup_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df2 = df1.copy().rename(columns="{}_2".format)
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/io/pytables/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@
ensure_clean_store,
)

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]
pytestmark = [pytest.mark.single_cpu]


def test_categorical(setup_path):
Expand Down Expand Up @@ -143,6 +140,7 @@ def test_categorical(setup_path):
store.select("df3/meta/s/meta")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_categorical_conversion(tmp_path, setup_path):
# GH13322
# Check that read_hdf with categorical columns doesn't return rows if
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/io/pytables/test_complex.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand All @@ -13,10 +11,6 @@

from pandas.io.pytables import read_hdf

pytestmark = pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string)", strict=False
)


def test_complex_fixed(tmp_path, setup_path):
df = DataFrame(
Expand Down
18 changes: 9 additions & 9 deletions pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
CategoricalIndex,
DataFrame,
Expand All @@ -24,10 +22,7 @@
_maybe_adjust_name,
)

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]
pytestmark = [pytest.mark.single_cpu]


def test_pass_spec_to_storer(setup_path):
Expand Down Expand Up @@ -93,9 +88,14 @@ def test_unimplemented_dtypes_table_columns(setup_path):

with ensure_clean_store(setup_path) as store:
# this fails because we have a date in the object block......
msg = re.escape(
"""Cannot serialize the column [datetime1]
because its data contents are not [string] but [date] object dtype"""
msg = "|".join(
[
re.escape(
"Cannot serialize the column [datetime1]\nbecause its data "
"contents are not [string] but [date] object dtype"
),
re.escape("[date] is not implemented as a table column"),
]
)
with pytest.raises(TypeError, match=msg):
store.append("df_unimplemented", df)
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
PY311,
is_ci_environment,
Expand Down Expand Up @@ -35,9 +33,7 @@
from pandas.io import pytables
from pandas.io.pytables import Term

pytestmark = [
pytest.mark.single_cpu,
]
pytestmark = [pytest.mark.single_cpu]


@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
Expand Down Expand Up @@ -329,7 +325,6 @@ def test_complibs(tmp_path, lvl, lib, request):
assert node.filters.complib == lib


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.skipif(
not is_platform_little_endian(), reason="reason platform is not little endian"
)
Expand All @@ -347,7 +342,6 @@ def test_encoding(setup_path):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"val",
[
Expand All @@ -362,7 +356,7 @@ def test_encoding(setup_path):
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
],
)
@pytest.mark.parametrize("dtype", ["category", object])
@pytest.mark.parametrize("dtype", ["category", None])
def test_latin_encoding(tmp_path, setup_path, dtype, val):
enc = "latin-1"
nan_rep = ""
Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/io/pytables/test_keys.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
HDFStore,
Expand All @@ -15,10 +13,7 @@
tables,
)

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]
pytestmark = [pytest.mark.single_cpu]


def test_keys(setup_path):
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/pytables/test_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@
)
from pandas.util import _test_decorators as td

pytestmark = [
pytest.mark.single_cpu,
]
pytestmark = [pytest.mark.single_cpu]


def test_format_type(tmp_path, setup_path):
Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/io/pytables/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@

from pandas.io.pytables import TableIterator

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]
pytestmark = [pytest.mark.single_cpu]


def test_read_missing_key_close_store(tmp_path, setup_path):
Expand Down Expand Up @@ -75,10 +72,11 @@ def test_read_missing_key_opened_store(tmp_path, setup_path):
read_hdf(store, "k1")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_read_column(setup_path):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)

Expand Down Expand Up @@ -175,7 +173,7 @@ def test_pytables_native2_read(datapath):
assert isinstance(d1, DataFrame)


def test_read_hdf_open_store(tmp_path, setup_path):
def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string):
# GH10330
# No check for non-string path_or-buf, and no test of open store
df = DataFrame(
Expand All @@ -187,6 +185,12 @@ def test_read_hdf_open_store(tmp_path, setup_path):
df = df.set_index(keys="E", append=True)

path = tmp_path / setup_path
if using_infer_string:
# TODO(infer_string) make this work for string dtype
msg = "Saving a MultiIndex with an extension dtype is not supported."
with pytest.raises(NotImplementedError, match=msg):
df.to_hdf(path, key="df", mode="w")
return
df.to_hdf(path, key="df", mode="w")
direct = read_hdf(path, "df")
with HDFStore(path, mode="r") as store:
Expand Down
Loading

0 comments on commit 4511251

Please sign in to comment.