diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2e2c5281e6752..4845acb3d12e0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -288,6 +288,9 @@ Categorical - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) - Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) +- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the + same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a + :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue: `27952`) Datetimelike diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index cc8ecc0e64684..e8d2ba85e08a6 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.accessor import PandasDelegate, delegate_names -from pandas.core.algorithms import take_1d from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimes import DatetimeIndex @@ -75,9 +74,7 @@ def _delegate_property_get(self, name): result = np.asarray(result) - # blow up if we operate on categories if self.orig is not None: - result = take_1d(result, self.orig.cat.codes) index = self.orig.index else: index = self._parent.index @@ -324,7 +321,12 @@ def __new__(cls, data): orig = data if is_categorical_dtype(data) else None if orig is not None: - data = Series(orig.values.categories, name=orig.name, copy=False) + data = Series( + orig.array, + name=orig.name, + copy=False, + dtype=orig.values.categories.dtype, + ) if is_datetime64_dtype(data.dtype): return DatetimeProperties(data, orig) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 1346f2fd57f10..9304e1c4fc157 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -344,6 +344,39 @@ def test_dt_namespace_accessor_categorical(self): expected = Series([2017, 2017, 2018, 2018], name="foo") tm.assert_series_equal(result, expected) + def test_dt_tz_localize_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = pd.Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_localize(tz) + expected = datetimes.dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_dt_tz_convert_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = pd.Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_convert(tz) + expected = datetimes.dt.tz_convert(tz) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("accessor", ["year", "month", "day"]) + def test_dt_other_accessors_categorical(self, accessor): + # GH 27952 + datetimes = pd.Series( + ["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = getattr(categorical.dt, accessor) + expected = getattr(datetimes.dt, accessor) + tm.assert_series_equal(result, expected) + def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range("20130101", periods=5, freq="D"))