diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py index 9f36499586b..0c754317185 100644 --- a/python/cudf/cudf/core/_internals/where.py +++ b/python/cudf/cudf/core/_internals/where.py @@ -110,9 +110,7 @@ def _make_categorical_like(result, column): if isinstance(column, cudf.core.column.CategoricalColumn): result = cudf.core.column.build_categorical_column( categories=column.categories, - codes=cudf.core.column.NumericalColumn( - result.base_data, dtype=result.dtype - ), + codes=result, mask=result.base_mask, size=result.size, offset=result.offset, diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index d25983842f9..66aed38bffd 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -659,10 +659,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self: Self, cudf.core.column.build_categorical_column( categories=self.categories, - codes=cudf.core.column.NumericalColumn( - codes.base_data, # type: ignore[arg-type] - dtype=codes.dtype, - ), + codes=codes, mask=codes.base_mask, ordered=self.ordered, size=codes.size, @@ -734,10 +731,7 @@ def sort_values( codes = self.codes.sort_values(ascending, na_position) col = column.build_categorical_column( categories=self.dtype.categories._values, - codes=cudf.core.column.NumericalColumn( - codes.base_data, # type: ignore[arg-type] - dtype=codes.dtype, - ), + codes=codes, mask=codes.base_mask, size=codes.size, ordered=self.dtype.ordered, @@ -845,10 +839,7 @@ def unique(self) -> CategoricalColumn: codes = self.codes.unique() return column.build_categorical_column( categories=self.categories, - codes=cudf.core.column.NumericalColumn( - codes.base_data, # type: ignore[arg-type] - dtype=codes.dtype, - ), + codes=codes, mask=codes.base_mask, offset=codes.offset, size=codes.size, @@ -986,9 +977,7 @@ def find_and_replace( result = column.build_categorical_column( categories=new_cats["cats"], - codes=cudf.core.column.NumericalColumn( - output.base_data, dtype=output.dtype - ), + codes=output, mask=output.base_mask, offset=output.offset, size=output.size, @@ -1184,10 +1173,7 @@ def _concat( return column.build_categorical_column( categories=column.as_column(cats), - codes=cudf.core.column.NumericalColumn( - codes_col.base_data, # type: ignore[arg-type] - dtype=codes_col.dtype, - ), + codes=codes_col, mask=codes_col.base_mask, size=codes_col.size, offset=codes_col.offset, @@ -1199,10 +1185,7 @@ def _with_type_metadata( if isinstance(dtype, CategoricalDtype): return column.build_categorical_column( categories=dtype.categories._values, - codes=cudf.core.column.NumericalColumn( - self.codes.base_data, # type: ignore[arg-type] - dtype=self.codes.dtype, - ), + codes=self.codes, mask=self.codes.base_mask, ordered=dtype.ordered, size=self.codes.size, @@ -1345,9 +1328,7 @@ def _set_categories( Self, column.build_categorical_column( categories=new_cats, - codes=cudf.core.column.NumericalColumn( - new_codes.base_data, dtype=new_codes.dtype - ), + codes=new_codes, mask=new_codes.base_mask, size=new_codes.size, offset=new_codes.offset, @@ -1478,9 +1459,7 @@ def pandas_categorical_as_column( return column.build_categorical_column( categories=categorical.categories, - codes=cudf.core.column.NumericalColumn( - codes.base_data, dtype=codes.dtype - ), + codes=codes, size=codes.size, mask=mask, ordered=categorical.ordered, diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index b0e33e8b9ce..090c02da990 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1513,6 +1513,7 @@ def column_empty( * cudf.dtype(libcudf.types.size_type_dtype).itemsize ) ), + size=None, dtype=libcudf.types.size_type_dtype, ), ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 16e78ef35ef..ac36813202a 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -654,6 +654,7 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: categories=dtype.categories._values, codes=cudf.core.column.NumericalColumn( self.base_data, # type: ignore[arg-type] + self.size, dtype=self.dtype, ), mask=self.base_mask, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 3033abd53f5..f935217f4f9 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -46,7 +46,6 @@ from cudf.core.column import ( CategoricalColumn, ColumnBase, - NumericalColumn, StructColumn, as_column, build_categorical_column, @@ -8541,9 +8540,7 @@ def _reassign_categories(categories, cols, col_idxs): if idx in categories: cols[name] = build_categorical_column( categories=categories[idx], - codes=NumericalColumn( - cols[name].base_data, dtype=cols[name].dtype - ), + codes=cols[name], mask=cols[name].base_mask, offset=cols[name].offset, size=cols[name].size, diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index d02633a97fa..ee2f0317f8d 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2501,6 +2501,7 @@ def _get_dt_field(self, field: str) -> Index: out_column = self._column.get_dt_field(field) out_column = NumericalColumn( data=out_column.base_data, + size=out_column.size, dtype=out_column.dtype, mask=out_column.base_mask, offset=out_column.offset,