From b5eea1f8ed033c2d0c79a68f1f39b34aa165ec9b Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 19 Dec 2024 01:21:25 -0800 Subject: [PATCH] Add a public api to get fast slow objects --- docs/cudf/source/cudf_pandas/faq.md | 18 ++++++++++++++++++ python/cudf/cudf/pandas/fast_slow_proxy.py | 8 ++++++++ .../cudf/cudf_pandas_tests/test_cudf_pandas.py | 6 ++++++ 3 files changed, 32 insertions(+) diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md index 5024747227e..4638397738d 100644 --- a/docs/cudf/source/cudf_pandas/faq.md +++ b/docs/cudf/source/cudf_pandas/faq.md @@ -142,6 +142,24 @@ cuDF (learn more in [this blog](https://medium.com/rapids-ai/easy-cpu-gpu-arrays-and-dataframes-run-your-dask-code-where-youd-like-e349d92351d)) and the [RAPIDS Accelerator for Apache Spark](https://nvidia.github.io/spark-rapids/) provides a similar configuration-based plugin for Spark. + +## Recommendation for libraries that are type aware. + +When working with `cudf.pandas` proxy objects, it is important to access the real underlying objects to ensure compatibility with libraries that are `cudf` or `pandas` aware. You can use the following methods to retrieve the actual `cudf` or `pandas` objects: + +- `get_cudf_pandas_fast_object()`: This method returns the fast `cudf` object from the proxy. +- `get_cudf_pandas_slow_object()`: This method returns the slow `pandas` object from the proxy. + +Here is an example of how to use these methods: + +```python +# Assuming `proxy_obj` is a cudf.pandas proxy object +fast_obj = proxy_obj.get_cudf_pandas_fast_object() +slow_obj = proxy_obj.get_cudf_pandas_slow_object() + +# Now you can use `fast_obj` and `slow_obj` with libraries that are cudf or pandas aware +``` + (are-there-any-known-limitations)= ## Are there any known limitations? diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index d32d388b975..09e6a3457c3 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -204,6 +204,12 @@ def _fsproxy_fast_to_slow(self): return fast_to_slow(self._fsproxy_wrapped) return self._fsproxy_wrapped + def get_cudf_pandas_fast_object(self): + return self._fsproxy_slow_to_fast() + + def get_cudf_pandas_slow_object(self): + return self._fsproxy_fast_to_slow() + @property # type: ignore def _fsproxy_state(self) -> _State: return ( @@ -221,6 +227,8 @@ def _fsproxy_state(self) -> _State: "_fsproxy_slow_type": slow_type, "_fsproxy_slow_to_fast": _fsproxy_slow_to_fast, "_fsproxy_fast_to_slow": _fsproxy_fast_to_slow, + "get_cudf_pandas_fast_object": get_cudf_pandas_fast_object, + "get_cudf_pandas_slow_object": get_cudf_pandas_slow_object, "_fsproxy_state": _fsproxy_state, } diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index d494e157a18..948a8324604 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1885,3 +1885,9 @@ def test_dataframe_setitem(): new_df = df + 1 df[df.columns] = new_df tm.assert_equal(df, new_df) + + +def test_dataframe_get_fast_slow_methods(): + df = xpd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) + assert isinstance(df.get_cudf_pandas_fast_object(), cudf.DataFrame) + assert isinstance(df.get_cudf_pandas_slow_object(), pd.DataFrame)