rapidsai · rapids-bot · Jan 29, 2025 · Dec 19, 2024 · Jan 24, 2025 · Jan 24, 2025
diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md
@@ -142,6 +142,30 @@ cuDF (learn more in [this
 blog](https://medium.com/rapids-ai/easy-cpu-gpu-arrays-and-dataframes-run-your-dask-code-where-youd-like-e349d92351d)) and the [RAPIDS Accelerator for Apache Spark](https://nvidia.github.io/spark-rapids/)
 provides a similar configuration-based plugin for Spark.
 
+
+## How can I access the underlying GPU or CPU objects?
+
+When working with `cudf.pandas` proxy objects, it is sometimes necessary to get true `cudf` or `pandas` objects that reside on GPU or CPU.
+For example, this can be used to ensure that GPU-aware libraries that support both `cudf` and `pandas` can use the `cudf`-optimized code paths that keep data on GPU when processing `cudf.pandas` objects.
+Otherwise, the library might use less-optimized CPU code because it thinks that the `cudf.pandas` object is a plain `pandas` dataframe.
+
+The following methods can be used to retrieve the actual `cudf` or `pandas` objects:
+
+- `as_gpu_object()`: This method returns the `cudf` object from the proxy.
+- `as_cpu_object()`: This method returns the `pandas` object from the proxy.
+
+Here is an example of how to use these methods:
+
+```python
+# Assuming `proxy_obj` is a cudf.pandas proxy object
+cudf_obj = proxy_obj.as_gpu_object()
+pandas_obj = proxy_obj.as_cpu_object()
+
+# Now you can use `cudf_obj` and `pandas_obj` with libraries that are cudf or pandas aware
+```
+
+Be aware that if `cudf.pandas` objects are converted to their underlying `cudf` or `pandas` types, the `cudf.pandas` proxy no longer controls them. This means that automatic conversion between GPU and CPU types and automatic fallback from GPU to CPU functionality will not occur.
+
 (are-there-any-known-limitations)=
 ## Are there any known limitations?
 

@@ -1251,7 +1251,7 @@ def as_categorical_column(self, dtype) -> ColumnBase:
             )
 
         # Categories must be unique and sorted in ascending order.
-        cats = self.unique().sort_values().astype(self.dtype)
+        cats = self.unique().sort_values()
         label_dtype = min_unsigned_type(len(cats))
         labels = self._label_encoding(
             cats=cats, dtype=label_dtype, na_sentinel=cudf.Scalar(1)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -96,6 +96,7 @@
 from cudf.utils.utils import (
     GetAttrGetItemMixin,
     _external_only_api,
+    _extract_from_proxy,
     _is_null_host_scalar,
 )
 
@@ -707,9 +708,23 @@ def __init__(
         if copy is not None:
             raise NotImplementedError("copy is not currently implemented.")
         super().__init__({}, index=cudf.Index([]))
+
         if nan_as_null is no_default:
             nan_as_null = not cudf.get_option("mode.pandas_compatible")
 
+        if cudf.get_option("mode.pandas_compatible"):
+            data, data_extracted = _extract_from_proxy(data)
+            index, index_extracted = _extract_from_proxy(index)
+            columns, columns_extracted = _extract_from_proxy(
+                columns, fast=False
+            )
+            if (
+                (data is None or data_extracted)
+                and (index is None or index_extracted)
+                and (columns is None or columns_extracted)
+            ) and (dtype is None and copy is None):
+                self.__dict__.update(data.__dict__)
+                return
         if isinstance(columns, (Series, cudf.BaseIndex)):
             columns = columns.to_pandas()
 

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
@@ -57,7 +57,11 @@
     is_mixed_with_object_dtype,
 )
 from cudf.utils.performance_tracking import _performance_tracking
-from cudf.utils.utils import _warn_no_dask_cudf, search_range
+from cudf.utils.utils import (
+    _extract_from_proxy,
+    _warn_no_dask_cudf,
+    search_range,
+)
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Iterable
@@ -1067,6 +1071,12 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta):
     @_performance_tracking
     def __init__(self, data, **kwargs):
         name = _getdefault_name(data, name=kwargs.get("name"))
+        if cudf.get_option("mode.pandas_compatible"):
+            data, data_extracted = _extract_from_proxy(data)
+            if data_extracted and len(kwargs) == 0:
+                self.__dict__.update(data.__dict__)
+                return
+
         super().__init__({name: data})
 
     @_performance_tracking

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -68,6 +68,9 @@
     to_cudf_compatible_scalar,
 )
 from cudf.utils.performance_tracking import _performance_tracking
+from cudf.utils.utils import (
+    _extract_from_proxy,
+)
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping
@@ -626,6 +629,20 @@ def __init__(
     ):
         if nan_as_null is no_default:
             nan_as_null = not cudf.get_option("mode.pandas_compatible")
+
+        if cudf.get_option("mode.pandas_compatible"):
+            data, data_extracted = _extract_from_proxy(data)
+            index, _ = _extract_from_proxy(index)
+
+            if (
+                data_extracted
+                and index is None
+                and dtype is None
+                and name is None
+                and copy is False
+            ):
+                self.__dict__.update(data.__dict__)
+                return
         index_from_data = None
         name_from_data = None
         if data is None:

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 import abc
@@ -266,6 +266,12 @@ def custom_repr_html(obj):
     html_formatter.for_type(DataFrame, custom_repr_html)
 
 
+def _Series_dtype(self):
+    # Fast-path to extract dtype from the current
+    # object without round-tripping through the slow<->fast
+    return self._fsproxy_wrapped.dtype
+
+
 Series = make_final_proxy_type(
     "Series",
     cudf.Series,
@@ -285,6 +291,7 @@ def custom_repr_html(obj):
         "_constructor": _FastSlowAttribute("_constructor"),
         "_constructor_expanddim": _FastSlowAttribute("_constructor_expanddim"),
         "_accessors": set(),
+        "dtype": _Series_dtype,
     },
 )
 

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -204,6 +204,12 @@ def _fsproxy_fast_to_slow(self):
             return fast_to_slow(self._fsproxy_wrapped)
         return self._fsproxy_wrapped
 
+    def as_gpu_object(self):
+        return self._fsproxy_slow_to_fast()
+
+    def as_cpu_object(self):
+        return self._fsproxy_fast_to_slow()
+
     @property  # type: ignore
     def _fsproxy_state(self) -> _State:
         return (
@@ -221,6 +227,8 @@ def _fsproxy_state(self) -> _State:
         "_fsproxy_slow_type": slow_type,
         "_fsproxy_slow_to_fast": _fsproxy_slow_to_fast,
         "_fsproxy_fast_to_slow": _fsproxy_fast_to_slow,
+        "as_gpu_object": as_gpu_object,
+        "as_cpu_object": as_cpu_object,
         "_fsproxy_state": _fsproxy_state,
     }
 

diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import decimal
@@ -451,3 +451,17 @@ def _datetime_timedelta_find_and_replace(
     except TypeError:
         result_col = original_column.copy(deep=True)
     return result_col  # type: ignore
+
+
+def _extract_from_proxy(proxy, fast=True):
+    """
+    Extract the object from a proxy object.
+    """
+    try:
+        return (
+            (proxy.as_gpu_object(), True)
+            if fast
+            else (proxy.as_cpu_object(), True)
+        )
+    except AttributeError:
+        return (proxy, False)
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -1885,3 +1885,9 @@ def test_dataframe_setitem():
     new_df = df + 1
     df[df.columns] = new_df
     tm.assert_equal(df, new_df)
+
+
+def test_dataframe_get_fast_slow_methods():
+    df = xpd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
+    assert isinstance(df.as_gpu_object(), cudf.DataFrame)
+    assert isinstance(df.as_cpu_object(), pd.DataFrame)