Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix several deprecation warnings in pandas 2.1 which became actual errors in 2.2 #158

Merged
merged 2 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ Contributions
- `Anton Ian Sipos <https://github.com/aisipos>`_
- `Chuan-Jhe Hwong <https://github.com/CJHwong>`_
- `Thomas Grainger <https://github.com/graingert/>`_
- `Ryan Smith <https://github.com/bixbyr/>`_
4 changes: 2 additions & 2 deletions django_pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def read_frame(qs, fieldnames=(), index_col=None, coerce_float=False,
"""

if fieldnames:
fieldnames = pd.unique(fieldnames)
fieldnames = pd.unique(pd.Series(fieldnames))
if index_col is not None and index_col not in fieldnames:
# Add it to the field names if not already there
fieldnames = tuple(fieldnames) + (index_col,)
Expand Down Expand Up @@ -151,7 +151,7 @@ def read_frame(qs, fieldnames=(), index_col=None, coerce_float=False,
df.set_index(index_col, inplace=True)

if datetime_index:
df.index = pd.to_datetime(df.index, errors="ignore")
df.index = pd.to_datetime(df.index)
return df


Expand Down
6 changes: 3 additions & 3 deletions django_pandas/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ def to_timeseries(self, fieldnames=(), verbose=True,

if freq is not None:
if agg_kwargs is None:
agg_kwargs=dict()
agg_kwargs = dict()
if agg_args is None:
agg_args=[]
agg_args = []
df = df.resample(freq, **rs_kwargs).agg(*agg_args, **agg_kwargs)

return df
Expand All @@ -253,7 +253,7 @@ def to_dataframe(self, fieldnames=(), verbose=True, index=None,
"""
Returns a DataFrame from the queryset

Paramaters
Parameters
-----------

fieldnames: The model field names(columns) to utilise in creating
Expand Down
58 changes: 44 additions & 14 deletions django_pandas/tests/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from datetime import datetime

from django.test import TestCase
import pandas as pd
import numpy as np
import pickle
import django
from pandas.core.indexes.datetimes import bdate_range

from .models import (
DataFrame, WideTimeSeries, WideTimeSeriesDateField,
LongTimeSeries, PivotData, Dude, Car, Spot
Expand Down Expand Up @@ -68,8 +72,28 @@ def unpivot(self, frame):
'date': np.tile(np.array(frame.index), K)}
return pd.DataFrame(data, columns=['date', 'variable', 'value'])

def _makeTimeDataFrame(self, n_rows: int) -> pd.DataFrame:
# Beginning in 2.2 pandas._testing.makeTimeDataFrame was removed, however all that is required for the tests
# in this module is a dataframe with columns A, B, C, D of random values indexed by a DatetimeIndex.
data = {}
for c in ['A', 'B', 'C', 'D']:
dt = datetime(2000, 1, 1)
dr = bdate_range(dt, periods=n_rows, freq='B', name=c)
pd.DatetimeIndex(dr, name=c)

data[c] = pd.Series(
np.random.default_rng(2).standard_normal(n_rows),
index=pd.DatetimeIndex(dr, name=c),
name=c,
)
return pd.DataFrame(data)

def setUp(self):
self.ts = tm.makeTimeDataFrame(100)
if PANDAS_VERSIONINFO >= '2.2.0':
self.ts = self._makeTimeDataFrame(100)
else:
self.ts = tm.makeTimeDataFrame(100)

self.ts2 = self.unpivot(self.ts).set_index('date')
self.ts.columns = ['col1', 'col2', 'col3', 'col4']
create_list = []
Expand All @@ -87,9 +111,9 @@ def setUp(self):
col4=cols['col4']))
WideTimeSeriesDateField.objects.bulk_create(create_list)

create_list = [LongTimeSeries(date_ix=r[0], series_name=r[1][0],
value=r[1][1])
for r in self.ts2.iterrows()]
create_list = [LongTimeSeries(date_ix=timestamp, series_name=s.iloc[0],
value=s.iloc[1])
for timestamp, s in self.ts2.iterrows()]

LongTimeSeries.objects.bulk_create(create_list)

Expand Down Expand Up @@ -125,18 +149,24 @@ def test_longstorage(self):

def test_resampling(self):
qs = LongTimeSeries.objects.all()
rs_kwargs = {'kind': 'period'}
agg_args = None
agg_kwargs = None
if PANDAS_VERSIONINFO >= '0.25.0':
agg_kwargs = {'func': 'sum'}
else:
agg_args= ['sum']
agg_args = ['sum']

if PANDAS_VERSIONINFO >= '2.2.0':
freq = 'ME'
else:
freq = 'M'

df = qs.to_timeseries(index='date_ix', pivot_columns='series_name',
values='value', storage='long',
freq='M', rs_kwargs=rs_kwargs,
freq=freq,
agg_args=agg_args,
agg_kwargs=agg_kwargs)
df.index = pd.PeriodIndex(df.index)

self.assertEqual([d.month for d in qs.dates('date_ix', 'month')],
df.index.month.tolist())
Expand All @@ -147,9 +177,10 @@ def test_resampling(self):
qs2 = WideTimeSeries.objects.all()

df1 = qs2.to_timeseries(index='date_ix', storage='wide',
freq='M', rs_kwargs=rs_kwargs,
freq=freq,
agg_args=agg_args,
agg_kwargs = agg_kwargs)
agg_kwargs=agg_kwargs)
df1.index = pd.PeriodIndex(df1.index)

self.assertEqual([d.month for d in qs.dates('date_ix', 'month')],
df1.index.month.tolist())
Expand Down Expand Up @@ -222,11 +253,10 @@ def setUp(self):
'value_col_d': np.random.randn(11),
'value_col_e': np.random.randn(11),
'value_col_f': np.random.randn(11)})

create_list = [PivotData(row_col_a=r[1][0], row_col_b=r[1][1],
row_col_c=r[1][2], value_col_d=r[1][3],
value_col_e=r[1][4], value_col_f=r[1][5])
for r in self.data.iterrows()]
create_list = [PivotData(row_col_a=r.iloc[0], row_col_b=r.iloc[1],
row_col_c=r.iloc[2], value_col_d=r.iloc[3],
value_col_e=r.iloc[4], value_col_f=r.iloc[5])
for _, r in self.data.iterrows()]

PivotData.objects.bulk_create(create_list)

Expand Down
3 changes: 1 addition & 2 deletions django_pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def get_cache_key_from_pk(pk):

def inner(pk_series):
pk_series = pk_series.astype(object).where(pk_series.notnull(), None)
cache_keys = pk_series.apply(
get_cache_key_from_pk, convert_dtype=False)
cache_keys = pk_series.apply(get_cache_key_from_pk)
unique_cache_keys = list(filter(None, cache_keys.unique()))

if not unique_cache_keys:
Expand Down