From 8e081c015417c5a8d2a99f9db6bbc9a2c438e477 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 18 Sep 2023 12:51:08 -0500 Subject: [PATCH] Add support for nested dict in `DataFrame` constructor (#14119) Fixes: #14096 This PR enables nested dict initialization support in `DataFrame` constructor. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/14119 --- python/cudf/cudf/core/dataframe.py | 4 ++-- python/cudf/cudf/tests/test_dataframe.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 5a3d25a08a7..4fc175512a0 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -977,7 +977,7 @@ def _align_input_series_indices(data, index): input_series = [ Series(val) for val in data.values() - if isinstance(val, (pd.Series, Series)) + if isinstance(val, (pd.Series, Series, dict)) ] if input_series: @@ -994,7 +994,7 @@ def _align_input_series_indices(data, index): index = aligned_input_series[0].index for name, val in data.items(): - if isinstance(val, (pd.Series, Series)): + if isinstance(val, (pd.Series, Series, dict)): data[name] = aligned_input_series.pop(0) return data, index diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 61372bab3ad..652bdbbee45 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10349,3 +10349,22 @@ def test_dataframe_round_builtin(digits): actual = round(gdf, digits) assert_eq(expected, actual) + + +def test_dataframe_init_from_nested_dict(): + ordered_dict = OrderedDict( + [ + ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), + ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), + ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), + ] + ) + pdf = pd.DataFrame(ordered_dict) + gdf = cudf.DataFrame(ordered_dict) + + assert_eq(pdf, gdf) + regular_dict = {key: dict(value) for key, value in ordered_dict.items()} + + pdf = pd.DataFrame(regular_dict) + gdf = cudf.DataFrame(regular_dict) + assert_eq(pdf, gdf)