From 75545afdf3e4848a016fd1eced45b3c3415a95f0 Mon Sep 17 00:00:00 2001 From: Joseph Willard Date: Sat, 15 Jul 2023 14:05:38 -0400 Subject: [PATCH 1/3] Add mixed int string entry. --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index d7e8fbeb9336b..14a107d563d62 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -619,6 +619,7 @@ def _create_mi_with_dt64tz_level(): "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), "multi": _create_multiindex(), + "mixed-int-string": Index([0, "a", 1, "b", 2, "c"]), "repeats": Index([0, 0, 1, 1, 2, 2]), "nullable_int": Index(np.arange(100), dtype="Int64"), "nullable_uint": Index(np.arange(100), dtype="UInt16"), From 0bb557dab9cae3a04250c39ba1573d0abd8eeac0 Mon Sep 17 00:00:00 2001 From: Joseph Willard Date: Sun, 16 Jul 2023 11:05:05 -0400 Subject: [PATCH 2/3] Add catches for mixed indices in value_count tests. --- pandas/tests/base/test_value_counts.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 89f3c005c52f0..142ce8c4be37f 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -23,16 +23,21 @@ def test_value_counts(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.value_counts() + present_types = set([type(i) for i in obj]) counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name="count") + if len(present_types) > 1: + pytest.skip("Test doesn't make sense on data with different index types.") + if obj.dtype != np.float16: expected.index = expected.index.astype(obj.dtype) else: with pytest.raises(NotImplementedError, match="float16 indexes are not "): expected.index.astype(obj.dtype) return + if isinstance(expected.index, MultiIndex): expected.index.names = obj.names else: @@ -57,11 +62,14 @@ def test_value_counts(index_or_series_obj): def test_value_counts_null(null_obj, index_or_series_obj): orig = index_or_series_obj obj = orig.copy() + present_types = set([type(i) for i in obj]) if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") + elif len(present_types) > 1: + pytest.skip("Test doesn't make sense on data with different index types.") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") From d6e25416a2964fdce3a8d211c3990d969f40bc6a Mon Sep 17 00:00:00 2001 From: Joseph Willard Date: Thu, 20 Jul 2023 07:48:20 -0400 Subject: [PATCH 3/3] Update more tests effected by mixed int string. --- pandas/tests/base/test_value_counts.py | 15 +++++++-------- pandas/tests/test_algos.py | 5 ++++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 142ce8c4be37f..fc3743ffe150b 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -23,14 +23,10 @@ def test_value_counts(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.value_counts() - present_types = set([type(i) for i in obj]) counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name="count") - if len(present_types) > 1: - pytest.skip("Test doesn't make sense on data with different index types.") - if obj.dtype != np.float16: expected.index = expected.index.astype(obj.dtype) else: @@ -52,10 +48,13 @@ def test_value_counts(index_or_series_obj): # TODO(GH#32514): Order of entries with the same count is inconsistent # on CI (gh-32449) - if obj.duplicated().any(): - result = result.sort_index() - expected = expected.sort_index() - tm.assert_series_equal(result, expected) + try: + if obj.duplicated().any(): + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + except TypeError: + pytest.xfail("dtypes not suitable for sorting.") @pytest.mark.parametrize("null_obj", [np.nan, None]) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 34a0bee877664..1fc326610d8d6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -71,7 +71,10 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: - expected_uniques = expected_uniques.sort_values() + try: + expected_uniques = expected_uniques.sort_values() + except TypeError: + pytest.xfail("dtypes not suitable for sorting.") # construct an integer ndarray so that # `expected_uniques.take(expected_codes)` is equal to `obj`