From 4467066c952111c0131383784d3eb6bf3248f0ac Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 18 Sep 2023 12:51:53 -0500 Subject: [PATCH] Restrict iterables of `DataFrame`'s as input to `DataFrame` constructor (#14118) Fixes: #14094 This PR raises an error when an iterates of `DataFrame`'s is detected in `DataFrame` constructor. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/14118 --- python/cudf/cudf/core/dataframe.py | 11 ++++++----- python/cudf/cudf/tests/test_dataframe.py | 6 ++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 4fc175512a0..84c16b71997 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -852,12 +852,13 @@ def _init_from_list_like(self, data, index=None, columns=None): elif len(data) > 0 and isinstance(data[0], pd._libs.interval.Interval): data = DataFrame.from_pandas(pd.DataFrame(data)) self._data = data._data + elif any( + not isinstance(col, (abc.Iterable, abc.Sequence)) for col in data + ): + raise TypeError("Inputs should be an iterable or sequence.") + elif len(data) > 0 and not can_convert_to_column(data[0]): + raise ValueError("Must pass 2-d input.") else: - if any( - not isinstance(col, (abc.Iterable, abc.Sequence)) - for col in data - ): - raise TypeError("Inputs should be an iterable or sequence.") if ( len(data) > 0 and columns is None diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 652bdbbee45..cbef9bfa2d8 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10260,6 +10260,12 @@ def __getitem__(self, key): cudf.DataFrame({"a": A()}) +def test_dataframe_constructor_dataframe_list(): + df = cudf.DataFrame(range(2)) + with pytest.raises(ValueError): + cudf.DataFrame([df]) + + def test_dataframe_constructor_from_namedtuple(): Point1 = namedtuple("Point1", ["a", "b", "c"]) Point2 = namedtuple("Point1", ["x", "y"])