Skip to content

Commit

Permalink
TYP: to_arrays, BUG: from_records empty dtypes (pandas-dev#40121)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 1, 2021
1 parent 80b3e8d commit cbbaf20
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 13 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ Conversion
^^^^^^^^^^
- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
- Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`)
-
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
-

Strings
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,8 @@ def __init__(
if is_dataclass(data[0]):
data = dataclasses_to_dicts(data)
if treat_as_nested(data):
if columns is not None:
columns = ensure_index(columns)
arrays, columns, index = nested_data_to_arrays(
data, columns, index, dtype
)
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,9 @@ def dataclasses_to_dicts(data):
# Conversion of Inputs to Arrays


def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None):
def to_arrays(
data, columns: Optional[Index], dtype: Optional[DtypeObj] = None
) -> Tuple[List[ArrayLike], Index]:
"""
Return list of arrays, columns.
"""
Expand All @@ -623,8 +625,10 @@ def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None):
if isinstance(data, np.ndarray):
columns = data.dtype.names
if columns is not None:
return [[]] * len(columns), columns
return [], [] # columns if columns is not None else []
# i.e. numpy structured array
arrays = [data[name] for name in columns]
return arrays, ensure_index(columns)
return [], ensure_index([])

elif isinstance(data[0], Categorical):
if columns is None:
Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CategoricalIndex,
DataFrame,
Index,
Int64Index,
Interval,
RangeIndex,
Series,
Expand Down Expand Up @@ -437,11 +438,11 @@ def test_from_records_empty(self):
def test_from_records_empty_with_nonempty_fields_gh3682(self):
a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
df = DataFrame.from_records(a, index="id")
tm.assert_index_equal(df.index, Index([1], name="id"))
assert df.index.name == "id"
tm.assert_index_equal(df.columns, Index(["value"]))

b = np.array([], dtype=[("id", np.int64), ("value", np.int64)])
df = DataFrame.from_records(b, index="id")
tm.assert_index_equal(df.index, Index([], name="id"))
assert df.index.name == "id"

ex_index = Int64Index([1], name="id")
expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
tm.assert_frame_equal(df, expected)

b = a[:0]
df2 = DataFrame.from_records(b, index="id")
tm.assert_frame_equal(df2, df.iloc[:0])
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,8 @@ def test_constructor_unequal_length_nested_list_column(self):
# GH 32173
arrays = [list("abcd"), list("cde")]

msg = "Length of columns passed for MultiIndex columns is different"
# exception raised inside MultiIndex constructor
msg = "all arrays must be same length"
with pytest.raises(ValueError, match=msg):
DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)

Expand Down

0 comments on commit cbbaf20

Please sign in to comment.