Skip to content

Commit

Permalink
Merge branch 'main' into fix-doc-issue-60366
Browse files Browse the repository at this point in the history
  • Loading branch information
mingjji authored Nov 23, 2024
2 parents d409d9d + ee0902a commit 62c10af
Show file tree
Hide file tree
Showing 21 changed files with 276 additions and 42 deletions.
7 changes: 0 additions & 7 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Period.freq GL08" \
-i "pandas.Period.ordinal GL08" \
-i "pandas.RangeIndex.from_range PR01,SA01" \
-i "pandas.Series.dt.freq GL08" \
-i "pandas.Series.dt.unit GL08" \
-i "pandas.Series.pad PR01,SA01" \
-i "pandas.Timedelta.max PR02" \
Expand All @@ -92,15 +91,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
Expand All @@ -114,8 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.resample.Resampler.std SA01" \
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.var SA01" \
-i "pandas.errors.AttributeConflictWarning SA01" \
-i "pandas.errors.ChainedAssignmentError SA01" \
-i "pandas.errors.DuplicateLabelError SA01" \
-i "pandas.errors.IntCastingNaNError SA01" \
-i "pandas.errors.InvalidIndexError SA01" \
Expand Down
5 changes: 4 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Other enhancements
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
Expand Down Expand Up @@ -626,6 +627,7 @@ Datetimelike
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

Timedelta
Expand Down Expand Up @@ -688,6 +690,7 @@ I/O
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
Expand Down Expand Up @@ -762,7 +765,7 @@ ExtensionArray

Styler
^^^^^^
-
- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.

Other
^^^^^
Expand Down
11 changes: 6 additions & 5 deletions pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -660,11 +660,12 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base,
perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;

set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000);
out->us = (npy_int32)extract_unit(&dt, 1000LL);
out->ps = (npy_int32)(dt * 1000);
out->hour =
(npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60);
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60);
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
out->us = (npy_int32)extract_unit(&dt, 1000LL * 1000);
out->ps = (npy_int32)(dt);
break;

case NPY_FR_fs:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4742,7 +4742,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
3 4 4 7 8 0
4 5 2 6 7 3
For columns with spaces in their name, you can use backtick quoting.
For columns with spaces or other disallowed characters in their name, you can
use backtick quoting.
>>> df.eval("B * `C&C`")
0 100
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,11 @@ def is_monotonic_increasing(self) -> Series:
-------
Series
See Also
--------
SeriesGroupBy.is_monotonic_decreasing : Return whether each group's values
are monotonically decreasing.
Examples
--------
>>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])
Expand All @@ -1462,6 +1467,11 @@ def is_monotonic_decreasing(self) -> Series:
-------
Series
See Also
--------
SeriesGroupBy.is_monotonic_increasing : Return whether each group's values
are monotonically increasing.
Examples
--------
>>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])
Expand Down
13 changes: 0 additions & 13 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3983,19 +3983,6 @@ def nth(self) -> GroupByNthSelector:
'all' or 'any'; this is equivalent to calling dropna(how=dropna)
before the groupby.
Parameters
----------
n : int, slice or list of ints and slices
A single nth value for the row or a list of nth values or slices.
.. versionchanged:: 1.4.0
Added slice and lists containing slices.
Added index notation.
dropna : {'any', 'all', None}, default None
Apply the specified dropna operation before counting which row is
the nth row. Only supported if n is an int.
Returns
-------
Series or DataFrame
Expand Down
22 changes: 22 additions & 0 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,28 @@ def to_pydatetime(self) -> Series:

@property
def freq(self):
"""
Tries to return a string representing a frequency generated by infer_freq.
Returns None if it can't autodetect the frequency.
See Also
--------
Series.dt.to_period : Cast to PeriodArray/PeriodIndex at a particular
frequency.
Examples
--------
>>> ser = pd.Series(["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04"])
>>> ser = pd.to_datetime(ser)
>>> ser.dt.freq
'D'
>>> ser = pd.Series(["2022-01-01", "2024-01-01", "2026-01-01", "2028-01-01"])
>>> ser = pd.to_datetime(ser)
>>> ser.dt.freq
'2YS-JAN'
"""
return self._get_values().inferred_freq

def isocalendar(self) -> DataFrame:
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def __arrow_c_stream__(self, requested_schema=None):
Export the pandas Series as an Arrow C stream PyCapsule.
This relies on pyarrow to convert the pandas Series to the Arrow
format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
format (and follows the default behavior of ``pyarrow.Array.from_pandas``
in its handling of the index, i.e. to ignore it).
This conversion is not necessarily zero-copy.
Expand Down Expand Up @@ -2226,7 +2226,7 @@ def drop_duplicates(
5 hippo
Name: animal, dtype: object
With the 'keep' parameter, the selection behaviour of duplicated values
With the 'keep' parameter, the selection behavior of duplicated values
can be changed. The value 'first' keeps the first occurrence for each
set of duplicated entries. The default value of keep is 'first'.
Expand Down Expand Up @@ -3451,7 +3451,7 @@ def sort_values(
4 5.0
dtype: float64
Sort values ascending order (default behaviour)
Sort values ascending order (default behavior)
>>> s.sort_values(ascending=True)
1 1.0
Expand Down Expand Up @@ -4098,7 +4098,7 @@ def swaplevel(
In the following example, we will swap the levels of the indices.
Here, we will swap the levels column-wise, but levels can be swapped row-wise
in a similar manner. Note that column-wise is the default behaviour.
in a similar manner. Note that column-wise is the default behavior.
By not supplying any arguments for i and j, we swap the last and second to
last indices.
Expand Down
11 changes: 11 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,11 @@ class ChainedAssignmentError(Warning):
For more information on Copy-on-Write,
see :ref:`the user guide<copy_on_write>`.
See Also
--------
options.mode.copy_on_write : Global setting for enabling or disabling
Copy-on-Write behavior.
Examples
--------
>>> pd.options.mode.copy_on_write = True
Expand Down Expand Up @@ -672,6 +677,12 @@ class AttributeConflictWarning(Warning):
name than the existing index on an HDFStore or attempting to append an index with a
different frequency than the existing index on an HDFStore.
See Also
--------
HDFStore : Dict-like IO interface for storing pandas objects in PyTables.
DataFrame.to_hdf : Write the contained data to an HDF5 file using HDFStore.
read_hdf : Read from an HDF5 file into a DataFrame.
Examples
--------
>>> idx1 = pd.Index(["a", "b"], name="name1")
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,12 @@ def arrow_table_to_pandas(
table: pyarrow.Table,
dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
null_to_int64: bool = False,
to_pandas_kwargs: dict | None = None,
) -> pd.DataFrame:
pa = import_optional_dependency("pyarrow")

to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs

types_mapper: type[pd.ArrowDtype] | None | Callable
if dtype_backend == "numpy_nullable":
mapping = _arrow_dtype_mapping()
Expand All @@ -80,5 +83,5 @@ def arrow_table_to_pandas(
else:
raise NotImplementedError

df = table.to_pandas(types_mapper=types_mapper)
df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
return df
8 changes: 8 additions & 0 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
DataFrame,
Index,
MultiIndex,
Period,
PeriodIndex,
)
import pandas.core.common as com
Expand Down Expand Up @@ -803,6 +804,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
allow_fill=levels._can_hold_na,
fill_value=levels._na_value,
)
# GH#60099
if isinstance(values[0], Period):
values = values.to_timestamp()

for i, span_val in spans.items():
mergestart, mergeend = None, None
Expand All @@ -827,6 +831,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
# Format hierarchical rows with non-merged values.
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
# GH#60099
if isinstance(indexcolval, Period):
indexcolval = indexcolval.to_timestamp()

yield CssExcelCell(
row=self.rowcounter + idx,
col=gcolidx,
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,8 @@ def _translate_latex(self, d: dict, clines: str | None) -> None:
or multirow sparsification (so that \multirow and \multicol work correctly).
"""
index_levels = self.index.nlevels
visible_index_level_n = index_levels - sum(self.hide_index_)
# GH 52218
visible_index_level_n = max(1, index_levels - sum(self.hide_index_))
d["head"] = [
[
{**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}
Expand Down
22 changes: 20 additions & 2 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def read(
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
storage_options: StorageOptions | None = None,
filesystem=None,
to_pandas_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> DataFrame:
kwargs["use_pandas_metadata"] = True
Expand All @@ -266,7 +267,11 @@ def read(
"make_block is deprecated",
DeprecationWarning,
)
result = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
result = arrow_table_to_pandas(
pa_table,
dtype_backend=dtype_backend,
to_pandas_kwargs=to_pandas_kwargs,
)

if pa_table.schema.metadata:
if b"PANDAS_ATTRS" in pa_table.schema.metadata:
Expand Down Expand Up @@ -347,6 +352,7 @@ def read(
filters=None,
storage_options: StorageOptions | None = None,
filesystem=None,
to_pandas_kwargs: dict | None = None,
**kwargs,
) -> DataFrame:
parquet_kwargs: dict[str, Any] = {}
Expand All @@ -362,6 +368,10 @@ def read(
raise NotImplementedError(
"filesystem is not implemented for the fastparquet engine."
)
if to_pandas_kwargs is not None:
raise NotImplementedError(
"to_pandas_kwargs is not implemented for the fastparquet engine."
)
path = stringify_path(path)
handles = None
if is_fsspec_url(path):
Expand Down Expand Up @@ -452,7 +462,7 @@ def to_parquet(
.. versionadded:: 2.1.0
kwargs
Additional keyword arguments passed to the engine
Additional keyword arguments passed to the engine.
Returns
-------
Expand Down Expand Up @@ -491,6 +501,7 @@ def read_parquet(
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
filesystem: Any = None,
filters: list[tuple] | list[list[tuple]] | None = None,
to_pandas_kwargs: dict | None = None,
**kwargs,
) -> DataFrame:
"""
Expand Down Expand Up @@ -564,6 +575,12 @@ def read_parquet(
.. versionadded:: 2.1.0
to_pandas_kwargs : dict | None, default None
Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
when ``engine="pyarrow"``.
.. versionadded:: 3.0.0
**kwargs
Any additional kwargs are passed to the engine.
Expand Down Expand Up @@ -636,5 +653,6 @@ def read_parquet(
storage_options=storage_options,
dtype_backend=dtype_backend,
filesystem=filesystem,
to_pandas_kwargs=to_pandas_kwargs,
**kwargs,
)
Loading

0 comments on commit 62c10af

Please sign in to comment.