Skip to content

Commit

Permalink
STYLE: Apply black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Jul 4, 2019
1 parent 4199c98 commit bb61358
Show file tree
Hide file tree
Showing 748 changed files with 126,206 additions and 97,282 deletions.
123 changes: 69 additions & 54 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from pandas.util import testing as tm

for imp in ['pandas.util', 'pandas.tools.hashing']:
for imp in ["pandas.util", "pandas.tools.hashing"]:
try:
hashing = import_module(imp)
break
Expand All @@ -15,15 +15,17 @@

class Factorize:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]

def time_factorize(self, sort, dtype):
Expand All @@ -32,15 +34,17 @@ def time_factorize(self, sort, dtype):

class FactorizeUnique:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.arange(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.arange(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
assert self.idx.is_unique

Expand All @@ -50,15 +54,17 @@ def time_factorize(self, sort, dtype):

class Duplicated:

params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
param_names = ['keep', 'dtype']
params = [["first", "last", False], ["int", "uint", "float", "string"]]
param_names = ["keep", "dtype"]

def setup(self, keep, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -69,15 +75,17 @@ def time_duplicated(self, keep, dtype):

class DuplicatedUniqueIndex:

params = ['int', 'uint', 'float', 'string']
param_names = ['dtype']
params = ["int", "uint", "float", "string"]
param_names = ["dtype"]

def setup(self, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.random.randn(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -87,67 +95,74 @@ def time_duplicated_unique(self, dtype):


class Hashing:

def setup_cache(self):
N = 10**5
N = 10 ** 5

df = pd.DataFrame(
{'strings': pd.Series(tm.makeStringIndex(10000).take(
np.random.randint(0, 10000, size=N))),
'floats': np.random.randn(N),
'ints': np.arange(N),
'dates': pd.date_range('20110101', freq='s', periods=N),
'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)})
df['categories'] = df['strings'].astype('category')
{
"strings": pd.Series(
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
),
"floats": np.random.randn(N),
"ints": np.arange(N),
"dates": pd.date_range("20110101", freq="s", periods=N),
"timedeltas": pd.timedelta_range("1 day", freq="s", periods=N),
}
)
df["categories"] = df["strings"].astype("category")
df.iloc[10:20] = np.nan
return df

def time_frame(self, df):
hashing.hash_pandas_object(df)

def time_series_int(self, df):
hashing.hash_pandas_object(df['ints'])
hashing.hash_pandas_object(df["ints"])

def time_series_string(self, df):
hashing.hash_pandas_object(df['strings'])
hashing.hash_pandas_object(df["strings"])

def time_series_float(self, df):
hashing.hash_pandas_object(df['floats'])
hashing.hash_pandas_object(df["floats"])

def time_series_categorical(self, df):
hashing.hash_pandas_object(df['categories'])
hashing.hash_pandas_object(df["categories"])

def time_series_timedeltas(self, df):
hashing.hash_pandas_object(df['timedeltas'])
hashing.hash_pandas_object(df["timedeltas"])

def time_series_dates(self, df):
hashing.hash_pandas_object(df['dates'])
hashing.hash_pandas_object(df["dates"])


class Quantile:
params = [[0, 0.5, 1],
['linear', 'nearest', 'lower', 'higher', 'midpoint'],
['float', 'int', 'uint']]
param_names = ['quantile', 'interpolation', 'dtype']
params = [
[0, 0.5, 1],
["linear", "nearest", "lower", "higher", "midpoint"],
["float", "int", "uint"],
]
param_names = ["quantile", "interpolation", "dtype"]

def setup(self, quantile, interpolation, dtype):
N = 10**5
data = {'int': np.arange(N),
'uint': np.arange(N).astype(np.uint64),
'float': np.random.randn(N)}
N = 10 ** 5
data = {
"int": np.arange(N),
"uint": np.arange(N).astype(np.uint64),
"float": np.random.randn(N),
}
self.idx = pd.Series(data[dtype].repeat(5))

def time_quantile(self, quantile, interpolation, dtype):
self.idx.quantile(quantile, interpolation=interpolation)


class SortIntegerArray:
params = [10**3, 10**5]
params = [10 ** 3, 10 ** 5]

def setup(self, N):
data = np.arange(N, dtype=float)
data[40] = np.nan
self.array = pd.array(data, dtype='Int64')
self.array = pd.array(data, dtype="Int64")

def time_argsort(self, N):
self.array.argsort()
Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/attrs_caching.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np
from pandas import DataFrame

try:
from pandas.util import cache_readonly
except ImportError:
from pandas.util.decorators import cache_readonly


class DataFrameAttributes:

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index
Expand All @@ -20,14 +20,12 @@ def time_set_index(self):


class CacheReadonly:

def setup(self):

class Foo:

@cache_readonly
def prop(self):
return 5

self.obj = Foo()

def time_cache_readonly(self):
Expand Down
51 changes: 27 additions & 24 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from pandas import DataFrame, Series, date_range
from pandas.core.algorithms import checked_add_with_arr

try:
import pandas.core.computation.expressions as expr
except ImportError:
Expand All @@ -9,14 +10,14 @@

class Ops:

params = [[True, False], ['default', 1]]
param_names = ['use_numexpr', 'threads']
params = [[True, False], ["default", 1]]
param_names = ["use_numexpr", "threads"]

def setup(self, use_numexpr, threads):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

if threads != 'default':
if threads != "default":
expr.set_numexpr_threads(threads)
if not use_numexpr:
expr.set_use_numexpr(False)
Expand All @@ -39,18 +40,21 @@ def teardown(self, use_numexpr, threads):


class Ops2:

def setup(self):
N = 10**3
N = 10 ** 3
self.df = DataFrame(np.random.randn(N, N))
self.df2 = DataFrame(np.random.randn(N, N))

self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))
self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))
self.df_int = DataFrame(
np.random.randint(
np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
)
)
self.df2_int = DataFrame(
np.random.randint(
np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
)
)

self.s = Series(np.random.randn(N))

Expand Down Expand Up @@ -90,16 +94,16 @@ def time_frame_series_dot(self):

class Timeseries:

params = [None, 'US/Eastern']
param_names = ['tz']
params = [None, "US/Eastern"]
param_names = ["tz"]

def setup(self, tz):
N = 10**6
N = 10 ** 6
halfway = (N // 2) - 1
self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz))
self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz))
self.ts = self.s[halfway]

self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz))
self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))

def time_series_timestamp_compare(self, tz):
self.s <= self.ts
Expand All @@ -117,20 +121,19 @@ def time_timestamp_ops_diff_with_shift(self, tz):
class AddOverflowScalar:

params = [1, -1, 0]
param_names = ['scalar']
param_names = ["scalar"]

def setup(self, scalar):
N = 10**6
N = 10 ** 6
self.arr = np.arange(N)

def time_add_overflow_scalar(self, scalar):
checked_add_with_arr(self.arr, scalar)


class AddOverflowArray:

def setup(self):
N = 10**6
N = 10 ** 6
self.arr = np.arange(N)
self.arr_rev = np.arange(-N, 0)
self.arr_mixed = np.array([1, -1]).repeat(N / 2)
Expand All @@ -144,12 +147,12 @@ def time_add_overflow_arr_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)

def time_add_overflow_b_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed,
b_mask=self.arr_nan_1)
checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)

def time_add_overflow_both_arg_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1,
b_mask=self.arr_nan_2)
checked_add_with_arr(
self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
)


from .pandas_vb_common import setup # noqa: F401
Loading

0 comments on commit bb61358

Please sign in to comment.