From fced5b1475e9efd1cd2e26a7cf5795d9cc63cec5 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 10:11:03 -0800
Subject: [PATCH] CLN: assorted, mostly typing (#29419)

---
 pandas/_libs/algos.pyx                  | 71 ++++++++++++++++++++++++
 pandas/_libs/algos_common_helper.pxi.in | 71 ------------------------
 pandas/_libs/missing.pyx                |  6 +--
 pandas/_libs/window.pyx                 |  4 +-
 pandas/core/base.py                     |  8 +--
 pandas/core/groupby/categorical.py      |  4 +-
 pandas/core/groupby/generic.py          | 72 +++++++++++++------------
 pandas/core/groupby/groupby.py          | 56 +++++++++++--------
 pandas/core/groupby/ops.py              | 46 +++++++++-------
 pandas/core/internals/concat.py         | 14 ++---
 pandas/core/reshape/concat.py           | 27 +++++-----
 pandas/core/reshape/melt.py             |  8 +--
 pandas/core/reshape/merge.py            | 26 +++++----
 pandas/core/reshape/reshape.py          |  2 +-
 pandas/core/window/common.py            |  2 +-
 pandas/core/window/rolling.py           | 19 ++++---
 pandas/tseries/offsets.py               |  8 +--
 17 files changed, 238 insertions(+), 206 deletions(-)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index a08ae66865e20..2d6c8e1008ce1 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1150,6 +1150,77 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
         return ranks
 
 
+ctypedef fused diff_t:
+    float64_t
+    float32_t
+    int8_t
+    int16_t
+    int32_t
+    int64_t
+
+ctypedef fused out_t:
+    float32_t
+    float64_t
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def diff_2d(ndarray[diff_t, ndim=2] arr,
+            ndarray[out_t, ndim=2] out,
+            Py_ssize_t periods, int axis):
+    cdef:
+        Py_ssize_t i, j, sx, sy, start, stop
+        bint f_contig = arr.flags.f_contiguous
+
+    # Disable for unsupported dtype combinations,
+    #  see https://github.com/cython/cython/issues/2646
+    if (out_t is float32_t
+            and not (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)):
+        raise NotImplementedError
+    elif (out_t is float64_t
+          and (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)):
+        raise NotImplementedError
+    else:
+        # We put this inside an indented else block to avoid cython build
+        #  warnings about unreachable code
+        sx, sy = (<object>arr).shape
+        with nogil:
+            if f_contig:
+                if axis == 0:
+                    if periods >= 0:
+                        start, stop = periods, sx
+                    else:
+                        start, stop = 0, sx + periods
+                    for j in range(sy):
+                        for i in range(start, stop):
+                            out[i, j] = arr[i, j] - arr[i - periods, j]
+                else:
+                    if periods >= 0:
+                        start, stop = periods, sy
+                    else:
+                        start, stop = 0, sy + periods
+                    for j in range(start, stop):
+                        for i in range(sx):
+                            out[i, j] = arr[i, j] - arr[i, j - periods]
+            else:
+                if axis == 0:
+                    if periods >= 0:
+                        start, stop = periods, sx
+                    else:
+                        start, stop = 0, sx + periods
+                    for i in range(start, stop):
+                        for j in range(sy):
+                            out[i, j] = arr[i, j] - arr[i - periods, j]
+                else:
+                    if periods >= 0:
+                        start, stop = periods, sy
+                    else:
+                        start, stop = 0, sy + periods
+                    for i in range(sx):
+                        for j in range(start, stop):
+                            out[i, j] = arr[i, j] - arr[i, j - periods]
+
+
 # generated from template
 include "algos_common_helper.pxi"
 include "algos_take_helper.pxi"
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index ea05c4afc8fce..5bfc594602dd8 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -4,77 +4,6 @@ Template for each `dtype` helper function using 1-d template
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-ctypedef fused diff_t:
-    float64_t
-    float32_t
-    int8_t
-    int16_t
-    int32_t
-    int64_t
-
-ctypedef fused out_t:
-    float32_t
-    float64_t
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def diff_2d(ndarray[diff_t, ndim=2] arr,
-            ndarray[out_t, ndim=2] out,
-            Py_ssize_t periods, int axis):
-    cdef:
-        Py_ssize_t i, j, sx, sy, start, stop
-        bint f_contig = arr.flags.f_contiguous
-
-    # Disable for unsupported dtype combinations,
-    #  see https://github.com/cython/cython/issues/2646
-    if (out_t is float32_t
-            and not (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)):
-        raise NotImplementedError
-    elif (out_t is float64_t
-          and (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)):
-        raise NotImplementedError
-    else:
-        # We put this inside an indented else block to avoid cython build
-        #  warnings about unreachable code
-        sx, sy = (<object>arr).shape
-        with nogil:
-            if f_contig:
-                if axis == 0:
-                    if periods >= 0:
-                        start, stop = periods, sx
-                    else:
-                        start, stop = 0, sx + periods
-                    for j in range(sy):
-                        for i in range(start, stop):
-                            out[i, j] = arr[i, j] - arr[i - periods, j]
-                else:
-                    if periods >= 0:
-                        start, stop = periods, sy
-                    else:
-                        start, stop = 0, sy + periods
-                    for j in range(start, stop):
-                        for i in range(sx):
-                            out[i, j] = arr[i, j] - arr[i, j - periods]
-            else:
-                if axis == 0:
-                    if periods >= 0:
-                        start, stop = periods, sx
-                    else:
-                        start, stop = 0, sx + periods
-                    for i in range(start, stop):
-                        for j in range(sy):
-                            out[i, j] = arr[i, j] - arr[i - periods, j]
-                else:
-                    if periods >= 0:
-                        start, stop = periods, sy
-                    else:
-                        start, stop = 0, sy + periods
-                    for i in range(sx):
-                        for j in range(start, stop):
-                            out[i, j] = arr[i, j] - arr[i, j - periods]
-
-
 # ----------------------------------------------------------------------
 # ensure_dtype
 # ----------------------------------------------------------------------
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index 052b081988c9e..9568ddb7fe53f 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -121,7 +121,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnaobj_old(ndarray arr):
+def isnaobj_old(arr: ndarray) -> ndarray:
     """
     Return boolean mask denoting which elements of a 1-D array are na-like,
     defined as being any of:
@@ -156,7 +156,7 @@ def isnaobj_old(ndarray arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnaobj2d(ndarray arr):
+def isnaobj2d(arr: ndarray) -> ndarray:
     """
     Return boolean mask denoting which elements of a 2-D array are na-like,
     according to the criteria defined in `checknull`:
@@ -198,7 +198,7 @@ def isnaobj2d(ndarray arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnaobj2d_old(ndarray arr):
+def isnaobj2d_old(arr: ndarray) -> ndarray:
     """
     Return boolean mask denoting which elements of a 2-D array are na-like,
     according to the criteria defined in `checknull_old`:
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index d1adc7789a7a3..b51d61d05ce98 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -69,8 +69,8 @@ def _check_minp(win, minp, N, floor=None) -> int:
     if not util.is_integer_object(minp):
         raise ValueError("min_periods must be an integer")
     if minp > win:
-        raise ValueError("min_periods (%d) must be <= "
-                         "window (%d)" % (minp, win))
+        raise ValueError("min_periods (minp) must be <= "
+                         "window (win)".format(minp=minp, win=win))
     elif minp > N:
         minp = N + 1
     elif minp < 0:
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 1a2f906f97152..0e088a381e964 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -207,7 +207,7 @@ def _selected_obj(self):
             return self.obj[self._selection]
 
     @cache_readonly
-    def ndim(self):
+    def ndim(self) -> int:
         return self._selected_obj.ndim
 
     @cache_readonly
@@ -339,7 +339,7 @@ def _aggregate(self, arg, *args, **kwargs):
 
             obj = self._selected_obj
 
-            def nested_renaming_depr(level=4):
+            def nested_renaming_depr(level: int = 4):
                 # deprecation of nested renaming
                 # GH 15931
                 msg = textwrap.dedent(
@@ -488,11 +488,11 @@ def _agg(arg, func):
 
             # combine results
 
-            def is_any_series():
+            def is_any_series() -> bool:
                 # return a boolean if we have *any* nested series
                 return any(isinstance(r, ABCSeries) for r in result.values())
 
-            def is_any_frame():
+            def is_any_frame() -> bool:
                 # return a boolean if we have *any* nested series
                 return any(isinstance(r, ABCDataFrame) for r in result.values())
 
diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
index fcf52ecfcbbcd..399ed9ddc9ba1 100644
--- a/pandas/core/groupby/categorical.py
+++ b/pandas/core/groupby/categorical.py
@@ -8,7 +8,7 @@
 )
 
 
-def recode_for_groupby(c, sort, observed):
+def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
     """
     Code the categories to ensure we can groupby for categoricals.
 
@@ -74,7 +74,7 @@ def recode_for_groupby(c, sort, observed):
     return c.reorder_categories(cat.categories), None
 
 
-def recode_from_groupby(c, sort, ci):
+def recode_from_groupby(c: Categorical, sort: bool, ci):
     """
     Reverse the codes_to_groupby to account for sort / observed.
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1e38dde2096ba..8512b6c3ae530 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -21,6 +21,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 import warnings
 
@@ -369,7 +370,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             # GH #6265
             return Series([], name=self._selection_name, index=keys)
 
-        def _get_index():
+        def _get_index() -> Index:
             if self.grouper.nkeys > 1:
                 index = MultiIndex.from_tuples(keys, names=self.grouper.names)
             else:
@@ -462,7 +463,7 @@ def transform(self, func, *args, **kwargs):
         result.index = self._selected_obj.index
         return result
 
-    def _transform_fast(self, func, func_nm):
+    def _transform_fast(self, func, func_nm) -> Series:
         """
         fast version of transform, only applicable to
         builtin/cythonizable functions
@@ -512,7 +513,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
             wrapper = lambda x: func(x, *args, **kwargs)
 
         # Interpret np.nan as False.
-        def true_and_notna(x, *args, **kwargs):
+        def true_and_notna(x, *args, **kwargs) -> bool:
             b = wrapper(x, *args, **kwargs)
             return b and notna(b)
 
@@ -526,7 +527,7 @@ def true_and_notna(x, *args, **kwargs):
         filtered = self._apply_filter(indices, dropna)
         return filtered
 
-    def nunique(self, dropna=True):
+    def nunique(self, dropna: bool = True) -> Series:
         """
         Return number of unique elements in the group.
 
@@ -719,7 +720,7 @@ def value_counts(
             out = ensure_int64(out)
         return Series(out, index=mi, name=self._selection_name)
 
-    def count(self):
+    def count(self) -> Series:
         """
         Compute count of group, excluding missing values.
 
@@ -768,8 +769,6 @@ class DataFrameGroupBy(GroupBy):
 
     _apply_whitelist = base.dataframe_apply_whitelist
 
-    _block_agg_axis = 1
-
     _agg_see_also_doc = dedent(
         """
     See Also
@@ -944,19 +943,21 @@ def _iterate_slices(self) -> Iterable[Tuple[Optional[Hashable], Series]]:
 
                 yield label, values
 
-    def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1):
+    def _cython_agg_general(
+        self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
+    ):
         new_items, new_blocks = self._cython_agg_blocks(
             how, alt=alt, numeric_only=numeric_only, min_count=min_count
         )
         return self._wrap_agged_blocks(new_items, new_blocks)
 
-    _block_agg_axis = 0
-
-    def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
+    def _cython_agg_blocks(
+        self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
+    ):
         # TODO: the actual managing of mgr_locs is a PITA
         # here, it should happen via BlockManager.combine
 
-        data, agg_axis = self._get_data_to_aggregate()
+        data = self._get_data_to_aggregate()
 
         if numeric_only:
             data = data.get_numeric_data(copy=False)
@@ -971,7 +972,7 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
             locs = block.mgr_locs.as_array
             try:
                 result, _ = self.grouper.aggregate(
-                    block.values, how, axis=agg_axis, min_count=min_count
+                    block.values, how, axis=1, min_count=min_count
                 )
             except NotImplementedError:
                 # generally if we have numeric_only=False
@@ -1000,12 +1001,13 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                     # continue and exclude the block
                     deleted_items.append(locs)
                     continue
-
-                # unwrap DataFrame to get array
-                assert len(result._data.blocks) == 1
-                result = result._data.blocks[0].values
-                if result.ndim == 1 and isinstance(result, np.ndarray):
-                    result = result.reshape(1, -1)
+                else:
+                    result = cast(DataFrame, result)
+                    # unwrap DataFrame to get array
+                    assert len(result._data.blocks) == 1
+                    result = result._data.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
 
             finally:
                 assert not isinstance(result, DataFrame)
@@ -1081,11 +1083,11 @@ def _aggregate_frame(self, func, *args, **kwargs):
 
         return self._wrap_frame_output(result, obj)
 
-    def _aggregate_item_by_item(self, func, *args, **kwargs):
+    def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
         # only for axis==0
 
         obj = self._obj_with_exclusions
-        result = OrderedDict()
+        result = OrderedDict()  # type: dict
         cannot_agg = []
         errors = None
         for item in obj:
@@ -1291,12 +1293,12 @@ def first_not_none(values):
             # values are not series or array-like but scalars
             else:
                 # only coerce dates if we find at least 1 datetime
-                coerce = any(isinstance(x, Timestamp) for x in values)
+                should_coerce = any(isinstance(x, Timestamp) for x in values)
                 # self._selection_name not passed through to Series as the
                 # result should not take the name of original selection
                 # of columns
                 return Series(values, index=key_index)._convert(
-                    datetime=True, coerce=coerce
+                    datetime=True, coerce=should_coerce
                 )
 
         else:
@@ -1391,7 +1393,7 @@ def transform(self, func, *args, **kwargs):
 
         return self._transform_fast(result, obj, func)
 
-    def _transform_fast(self, result, obj, func_nm):
+    def _transform_fast(self, result: DataFrame, obj: DataFrame, func_nm) -> DataFrame:
         """
         Fast transform path for aggregations
         """
@@ -1451,7 +1453,7 @@ def _choose_path(self, fast_path, slow_path, group):
 
         return path, res
 
-    def _transform_item_by_item(self, obj, wrapper):
+    def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
         # iterate through columns
         output = {}
         inds = []
@@ -1536,7 +1538,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
 
         return self._apply_filter(indices, dropna)
 
-    def _gotitem(self, key, ndim, subset=None):
+    def _gotitem(self, key, ndim: int, subset=None):
         """
         sub-classes to define
         return a sliced object
@@ -1571,7 +1573,7 @@ def _gotitem(self, key, ndim, subset=None):
 
         raise AssertionError("invalid ndim for _gotitem")
 
-    def _wrap_frame_output(self, result, obj):
+    def _wrap_frame_output(self, result, obj) -> DataFrame:
         result_index = self.grouper.levels[0]
 
         if self.axis == 0:
@@ -1582,9 +1584,9 @@ def _wrap_frame_output(self, result, obj):
     def _get_data_to_aggregate(self):
         obj = self._obj_with_exclusions
         if self.axis == 1:
-            return obj.T._data, 1
+            return obj.T._data
         else:
-            return obj._data, 1
+            return obj._data
 
     def _insert_inaxis_grouper_inplace(self, result):
         # zip in reverse so we can always insert at loc 0
@@ -1622,7 +1624,7 @@ def _wrap_aggregated_output(self, output, names=None):
 
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _wrap_transformed_output(self, output, names=None):
+    def _wrap_transformed_output(self, output, names=None) -> DataFrame:
         return DataFrame(output, index=self.obj.index)
 
     def _wrap_agged_blocks(self, items, blocks):
@@ -1670,7 +1672,7 @@ def count(self):
         DataFrame
             Count of values within each group.
         """
-        data, _ = self._get_data_to_aggregate()
+        data = self._get_data_to_aggregate()
         ids, _, ngroups = self.grouper.group_info
         mask = ids != -1
 
@@ -1687,7 +1689,7 @@ def count(self):
 
         return self._wrap_agged_blocks(data.items, list(blk))
 
-    def nunique(self, dropna=True):
+    def nunique(self, dropna: bool = True):
         """
         Return DataFrame with number of distinct observations per group for
         each column.
@@ -1756,7 +1758,7 @@ def groupby_series(obj, col=None):
     boxplot = boxplot_frame_groupby
 
 
-def _is_multi_agg_with_relabel(**kwargs):
+def _is_multi_agg_with_relabel(**kwargs) -> bool:
     """
     Check whether kwargs passed to .agg look like multi-agg with relabeling.
 
@@ -1778,7 +1780,9 @@ def _is_multi_agg_with_relabel(**kwargs):
     >>> _is_multi_agg_with_relabel()
     False
     """
-    return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and kwargs
+    return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and (
+        len(kwargs) > 0
+    )
 
 
 def _normalize_keyword_aggregation(kwargs):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 59b118431cfc9..873a31e658625 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -756,7 +756,7 @@ def _iterate_slices(self) -> Iterable[Tuple[Optional[Hashable], Series]]:
     def transform(self, func, *args, **kwargs):
         raise AbstractMethodError(self)
 
-    def _cumcount_array(self, ascending=True):
+    def _cumcount_array(self, ascending: bool = True):
         """
         Parameters
         ----------
@@ -788,7 +788,7 @@ def _cumcount_array(self, ascending=True):
         rev[sorter] = np.arange(count, dtype=np.intp)
         return out[rev].astype(np.int64, copy=False)
 
-    def _try_cast(self, result, obj, numeric_only=False):
+    def _try_cast(self, result, obj, numeric_only: bool = False):
         """
         Try to cast the result to our obj original type,
         we may have roundtripped through object in the mean-time.
@@ -828,7 +828,7 @@ def _try_cast(self, result, obj, numeric_only=False):
 
         return result
 
-    def _transform_should_cast(self, func_nm):
+    def _transform_should_cast(self, func_nm: str) -> bool:
         """
         Parameters
         ----------
@@ -844,8 +844,8 @@ def _transform_should_cast(self, func_nm):
             func_nm not in base.cython_cast_blacklist
         )
 
-    def _cython_transform(self, how, numeric_only=True, **kwargs):
-        output = collections.OrderedDict()
+    def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):
+        output = collections.OrderedDict()  # type: dict
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
             if numeric_only and not is_numeric:
@@ -871,10 +871,12 @@ def _wrap_aggregated_output(self, output, names=None):
     def _wrap_transformed_output(self, output, names=None):
         raise AbstractMethodError(self)
 
-    def _wrap_applied_output(self, keys, values, not_indexed_same=False):
+    def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False):
         raise AbstractMethodError(self)
 
-    def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1):
+    def _cython_agg_general(
+        self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
+    ):
         output = {}
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
@@ -920,7 +922,7 @@ def _python_agg_general(self, func, *args, **kwargs):
 
         return self._wrap_aggregated_output(output)
 
-    def _concat_objects(self, keys, values, not_indexed_same=False):
+    def _concat_objects(self, keys, values, not_indexed_same: bool = False):
         from pandas.core.reshape.concat import concat
 
         def reset_identity(values):
@@ -980,10 +982,7 @@ def reset_identity(values):
             values = reset_identity(values)
             result = concat(values, axis=self.axis)
 
-        if (
-            isinstance(result, Series)
-            and getattr(self, "_selection_name", None) is not None
-        ):
+        if isinstance(result, Series) and self._selection_name is not None:
 
             result.name = self._selection_name
 
@@ -1104,7 +1103,7 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def any(self, skipna=True):
+    def any(self, skipna: bool = True):
         """
         Return True if any value in the group is truthful, else False.
 
@@ -1121,7 +1120,7 @@ def any(self, skipna=True):
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def all(self, skipna=True):
+    def all(self, skipna: bool = True):
         """
         Return True if all values in the group are truthful, else False.
 
@@ -1221,7 +1220,7 @@ def median(self, **kwargs):
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def std(self, ddof=1, *args, **kwargs):
+    def std(self, ddof: int = 1, *args, **kwargs):
         """
         Compute standard deviation of groups, excluding missing values.
 
@@ -1244,7 +1243,7 @@ def std(self, ddof=1, *args, **kwargs):
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def var(self, ddof=1, *args, **kwargs):
+    def var(self, ddof: int = 1, *args, **kwargs):
         """
         Compute variance of groups, excluding missing values.
 
@@ -1272,7 +1271,7 @@ def var(self, ddof=1, *args, **kwargs):
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def sem(self, ddof=1):
+    def sem(self, ddof: int = 1):
         """
         Compute standard error of the mean of groups, excluding missing values.
 
@@ -1313,7 +1312,13 @@ def _add_numeric_operations(cls):
         Add numeric operations to the GroupBy generically.
         """
 
-        def groupby_function(name, alias, npfunc, numeric_only=True, min_count=-1):
+        def groupby_function(
+            name: str,
+            alias: str,
+            npfunc,
+            numeric_only: bool = True,
+            min_count: int = -1,
+        ):
 
             _local_template = """
             Compute %(f)s of group values.
@@ -1403,7 +1408,7 @@ def last(x):
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def ohlc(self):
+    def ohlc(self) -> DataFrame:
         """
         Compute sum of values, excluding missing values.
 
@@ -1815,7 +1820,7 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
 
         return result
 
-    def quantile(self, q=0.5, interpolation="linear"):
+    def quantile(self, q=0.5, interpolation: str = "linear"):
         """
         Return group values at the given quantile, a la numpy.percentile.
 
@@ -1928,7 +1933,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
             return result.take(indices)
 
     @Substitution(name="groupby")
-    def ngroup(self, ascending=True):
+    def ngroup(self, ascending: bool = True):
         """
         Number each group from 0 to the number of groups - 1.
 
@@ -1997,7 +2002,7 @@ def ngroup(self, ascending=True):
             return result
 
     @Substitution(name="groupby")
-    def cumcount(self, ascending=True):
+    def cumcount(self, ascending: bool = True):
         """
         Number each item in each group from 0 to the length of that group - 1.
 
@@ -2058,7 +2063,12 @@ def cumcount(self, ascending=True):
     @Substitution(name="groupby")
     @Appender(_common_see_also)
     def rank(
-        self, method="average", ascending=True, na_option="keep", pct=False, axis=0
+        self,
+        method: str = "average",
+        ascending: bool = True,
+        na_option: str = "keep",
+        pct: bool = False,
+        axis: int = 0,
     ):
         """
         Provide the rank of values within each group.
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 9bbe73c1851b5..2cc0e5fde2290 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -7,7 +7,7 @@
 """
 
 import collections
-from typing import List, Optional
+from typing import List, Optional, Type
 
 import numpy as np
 
@@ -96,7 +96,7 @@ def __iter__(self):
         return iter(self.indices)
 
     @property
-    def nkeys(self):
+    def nkeys(self) -> int:
         return len(self.groupings)
 
     def get_iterator(self, data, axis=0):
@@ -135,7 +135,7 @@ def _get_group_keys(self):
             # provide "flattened" iterator for multi-group setting
             return get_flattened_iterator(comp_ids, ngroups, self.levels, self.labels)
 
-    def apply(self, f, data, axis=0):
+    def apply(self, f, data, axis: int = 0):
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
         group_keys = self._get_group_keys()
@@ -220,7 +220,7 @@ def levels(self):
     def names(self):
         return [ping.name for ping in self.groupings]
 
-    def size(self):
+    def size(self) -> Series:
         """
         Compute group sizes
 
@@ -244,7 +244,7 @@ def groups(self):
             return self.axis.groupby(to_groupby)
 
     @cache_readonly
-    def is_monotonic(self):
+    def is_monotonic(self) -> bool:
         # return if my group orderings are monotonic
         return Index(self.group_info[0]).is_monotonic
 
@@ -275,7 +275,7 @@ def _get_compressed_labels(self):
         return ping.labels, np.arange(len(ping.group_index))
 
     @cache_readonly
-    def ngroups(self):
+    def ngroups(self) -> int:
         return len(self.result_index)
 
     @property
@@ -345,7 +345,7 @@ def _is_builtin_func(self, arg):
         """
         return SelectionMixin._builtin_table.get(arg, arg)
 
-    def _get_cython_function(self, kind, how, values, is_numeric):
+    def _get_cython_function(self, kind: str, how: str, values, is_numeric: bool):
 
         dtype_str = values.dtype.name
 
@@ -386,7 +386,9 @@ def get_func(fname):
 
         return func
 
-    def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs):
+    def _cython_operation(
+        self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
+    ):
         assert kind in ["transform", "aggregate"]
         orig_values = values
 
@@ -530,16 +532,23 @@ def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs
 
         return result, names
 
-    def aggregate(self, values, how, axis=0, min_count=-1):
+    def aggregate(self, values, how: str, axis: int = 0, min_count: int = -1):
         return self._cython_operation(
             "aggregate", values, how, axis, min_count=min_count
         )
 
-    def transform(self, values, how, axis=0, **kwargs):
+    def transform(self, values, how: str, axis: int = 0, **kwargs):
         return self._cython_operation("transform", values, how, axis, **kwargs)
 
     def _aggregate(
-        self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1
+        self,
+        result,
+        counts,
+        values,
+        comp_ids,
+        agg_func,
+        is_datetimelike: bool,
+        min_count: int = -1,
     ):
         if values.ndim > 2:
             # punting for now
@@ -554,7 +563,7 @@ def _aggregate(
         return result
 
     def _transform(
-        self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs
+        self, result, values, comp_ids, transform_func, is_datetimelike: bool, **kwargs
     ):
 
         comp_ids, _, ngroups = self.group_info
@@ -566,7 +575,7 @@ def _transform(
 
         return result
 
-    def agg_series(self, obj, func):
+    def agg_series(self, obj: Series, func):
         if is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M":
             # _aggregate_series_fast would raise TypeError when
             #  calling libreduction.Slider
@@ -684,7 +693,7 @@ def groups(self):
         return result
 
     @property
-    def nkeys(self):
+    def nkeys(self) -> int:
         return 1
 
     def _get_grouper(self):
@@ -771,7 +780,7 @@ def groupings(self):
             for lvl, name in zip(self.levels, self.names)
         ]
 
-    def agg_series(self, obj, func):
+    def agg_series(self, obj: Series, func):
         dummy = obj[:0]
         grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy)
         return grouper.get_result()
@@ -863,10 +872,11 @@ def _chop(self, sdata, slice_obj: slice):
             return sdata._slice(slice_obj, axis=1)
 
 
-def get_splitter(data, *args, **kwargs):
+def get_splitter(data: NDFrame, *args, **kwargs):
     if isinstance(data, Series):
-        klass = SeriesSplitter
-    elif isinstance(data, DataFrame):
+        klass = SeriesSplitter  # type: Type[DataSplitter]
+    else:
+        # i.e. DataFrame
         klass = FrameSplitter
 
     return klass(data, *args, **kwargs)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 36e1b06230d7e..4ba485c85d8ba 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -244,7 +244,7 @@ def concatenate_join_units(join_units, concat_axis, copy):
         # Concatenating join units along ax0 is handled in _merge_blocks.
         raise AssertionError("Concatenating join units along axis0")
 
-    empty_dtype, upcasted_na = get_empty_dtype_and_na(join_units)
+    empty_dtype, upcasted_na = _get_empty_dtype_and_na(join_units)
 
     to_concat = [
         ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
@@ -268,7 +268,7 @@ def concatenate_join_units(join_units, concat_axis, copy):
     return concat_values
 
 
-def get_empty_dtype_and_na(join_units):
+def _get_empty_dtype_and_na(join_units):
     """
     Return dtype and N/A values to use when concatenating specified units.
 
@@ -284,7 +284,7 @@ def get_empty_dtype_and_na(join_units):
         if blk is None:
             return np.float64, np.nan
 
-    if is_uniform_reindex(join_units):
+    if _is_uniform_reindex(join_units):
         # FIXME: integrate property
         empty_dtype = join_units[0].block.dtype
         upcasted_na = join_units[0].block.fill_value
@@ -398,7 +398,7 @@ def is_uniform_join_units(join_units):
     )
 
 
-def is_uniform_reindex(join_units):
+def _is_uniform_reindex(join_units) -> bool:
     return (
         # TODO: should this be ju.block._can_hold_na?
         all(ju.block and ju.block.is_extension for ju in join_units)
@@ -406,7 +406,7 @@ def is_uniform_reindex(join_units):
     )
 
 
-def trim_join_unit(join_unit, length):
+def _trim_join_unit(join_unit, length):
     """
     Reduce join_unit's shape along item axis to length.
 
@@ -486,9 +486,9 @@ def _next_or_none(seq):
                 for i, (plc, unit) in enumerate(next_items):
                     yielded_units[i] = unit
                     if len(plc) > min_len:
-                        # trim_join_unit updates unit in place, so only
+                        # _trim_join_unit updates unit in place, so only
                         # placement needs to be sliced to skip min_len.
-                        next_items[i] = (plc[min_len:], trim_join_unit(unit, min_len))
+                        next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len))
                     else:
                         yielded_placement = plc
                         next_items[i] = _next_or_none(plans[i])
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 39e00047ea968..772ac1cd93059 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -29,15 +29,15 @@
 def concat(
     objs,
     axis=0,
-    join="outer",
+    join: str = "outer",
     join_axes=None,
-    ignore_index=False,
+    ignore_index: bool = False,
     keys=None,
     levels=None,
     names=None,
-    verify_integrity=False,
+    verify_integrity: bool = False,
     sort=None,
-    copy=True,
+    copy: bool = True,
 ):
     """
     Concatenate pandas objects along a particular axis with optional set logic
@@ -265,14 +265,14 @@ def __init__(
         self,
         objs,
         axis=0,
-        join="outer",
+        join: str = "outer",
         join_axes=None,
         keys=None,
         levels=None,
         names=None,
-        ignore_index=False,
-        verify_integrity=False,
-        copy=True,
+        ignore_index: bool = False,
+        verify_integrity: bool = False,
+        copy: bool = True,
         sort=False,
     ):
         if isinstance(objs, (NDFrame, str)):
@@ -324,8 +324,8 @@ def __init__(
         for obj in objs:
             if not isinstance(obj, (Series, DataFrame)):
                 msg = (
-                    "cannot concatenate object of type '{}';"
-                    " only Series and DataFrame objs are valid".format(type(obj))
+                    "cannot concatenate object of type '{typ}';"
+                    " only Series and DataFrame objs are valid".format(typ=type(obj))
                 )
                 raise TypeError(msg)
 
@@ -580,7 +580,7 @@ def _get_concat_axis(self):
 
         return concat_axis
 
-    def _maybe_check_integrity(self, concat_index):
+    def _maybe_check_integrity(self, concat_index: Index):
         if self.verify_integrity:
             if not concat_index.is_unique:
                 overlap = concat_index[concat_index.duplicated()].unique()
@@ -590,11 +590,11 @@ def _maybe_check_integrity(self, concat_index):
                 )
 
 
-def _concat_indexes(indexes):
+def _concat_indexes(indexes) -> Index:
     return indexes[0].append(indexes[1:])
 
 
-def _make_concat_multiindex(indexes, keys, levels=None, names=None):
+def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex:
 
     if (levels is None and isinstance(keys[0], tuple)) or (
         levels is not None and len(levels) > 1
@@ -715,7 +715,6 @@ def _get_series_result_type(result, objs=None):
     """
     # TODO: See if we can just inline with _constructor_expanddim
     # now that sparse is removed.
-    from pandas import DataFrame
 
     # concat Series with axis 1
     if isinstance(result, dict):
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
index c85050bc4232b..98fee491e0a73 100644
--- a/pandas/core/reshape/melt.py
+++ b/pandas/core/reshape/melt.py
@@ -188,7 +188,7 @@ def lreshape(data, groups, dropna=True, label=None):
     return data._constructor(mdata, columns=id_cols + pivot_cols)
 
 
-def wide_to_long(df, stubnames, i, j, sep="", suffix=r"\d+"):
+def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
     r"""
     Wide panel to long format. Less flexible but more user-friendly than melt.
 
@@ -419,7 +419,7 @@ def get_var_names(df, stub, sep, suffix):
         pattern = re.compile(regex)
         return [col for col in df.columns if pattern.match(col)]
 
-    def melt_stub(df, stub, i, j, value_vars, sep):
+    def melt_stub(df, stub, i, j, value_vars, sep: str):
         newdf = melt(
             df,
             id_vars=i,
@@ -456,8 +456,8 @@ def melt_stub(df, stub, i, j, value_vars, sep):
     value_vars_flattened = [e for sublist in value_vars for e in sublist]
     id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened))
 
-    melted = [melt_stub(df, s, i, j, v, sep) for s, v in zip(stubnames, value_vars)]
-    melted = melted[0].join(melted[1:], how="outer")
+    _melted = [melt_stub(df, s, i, j, v, sep) for s, v in zip(stubnames, value_vars)]
+    melted = _melted[0].join(_melted[1:], how="outer")
 
     if len(i) == 1:
         new = df[id_vars].set_index(i).join(melted)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6ef13a62ee366..a189b2cd1ab84 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from pandas._libs import hashtable as libhashtable, lib
+from pandas._libs import Timedelta, hashtable as libhashtable, lib
 import pandas._libs.join as libjoin
 from pandas.errors import MergeError
 from pandas.util._decorators import Appender, Substitution
@@ -36,9 +36,10 @@
     is_object_dtype,
     needs_i8_conversion,
 )
-from pandas.core.dtypes.missing import isnull, na_value_for_dtype
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
-from pandas import Categorical, DataFrame, Index, MultiIndex, Series, Timedelta
+from pandas import Categorical, Index, MultiIndex
 import pandas.core.algorithms as algos
 from pandas.core.arrays.categorical import _recode_for_categories
 import pandas.core.common as com
@@ -1204,7 +1205,7 @@ def _validate_specification(self):
         if len(self.right_on) != len(self.left_on):
             raise ValueError("len(right_on) must equal len(left_on)")
 
-    def _validate(self, validate):
+    def _validate(self, validate: str):
 
         # Check uniqueness of each
         if self.left_index:
@@ -1300,7 +1301,12 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how="inner", **kwargs)
 
 
 def _restore_dropped_levels_multijoin(
-    left, right, dropped_level_names, join_index, lindexer, rindexer
+    left: MultiIndex,
+    right: MultiIndex,
+    dropped_level_names,
+    join_index,
+    lindexer,
+    rindexer,
 ):
     """
     *this is an internal non-public method*
@@ -1338,7 +1344,7 @@ def _restore_dropped_levels_multijoin(
 
     """
 
-    def _convert_to_mulitindex(index):
+    def _convert_to_mulitindex(index) -> MultiIndex:
         if isinstance(index, MultiIndex):
             return index
         else:
@@ -1686,13 +1692,13 @@ def flip(xs):
         msg_missings = "Merge keys contain null values on {side} side"
 
         if not Index(left_values).is_monotonic:
-            if isnull(left_values).any():
+            if isna(left_values).any():
                 raise ValueError(msg_missings.format(side="left"))
             else:
                 raise ValueError(msg_sorted.format(side="left"))
 
         if not Index(right_values).is_monotonic:
-            if isnull(right_values).any():
+            if isna(right_values).any():
                 raise ValueError(msg_missings.format(side="right"))
             else:
                 raise ValueError(msg_sorted.format(side="right"))
@@ -1959,9 +1965,9 @@ def _any(x) -> bool:
 
 
 def validate_operand(obj):
-    if isinstance(obj, DataFrame):
+    if isinstance(obj, ABCDataFrame):
         return obj
-    elif isinstance(obj, Series):
+    elif isinstance(obj, ABCSeries):
         if obj.name is None:
             raise ValueError("Cannot merge a Series without a name")
         else:
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 7537dd0ac2065..a8dcc995e48da 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -958,7 +958,7 @@ def _get_dummies_1d(
     if is_object_dtype(dtype):
         raise ValueError("dtype=object is not a valid dtype for get_dummies")
 
-    def get_empty_frame(data):
+    def get_empty_frame(data) -> DataFrame:
         if isinstance(data, Series):
             index = data.index
         else:
diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
index 0f2920b3558c9..2ad5a1eb6faed 100644
--- a/pandas/core/window/common.py
+++ b/pandas/core/window/common.py
@@ -32,7 +32,7 @@ class _GroupByMixin(GroupByMixin):
     """
 
     def __init__(self, obj, *args, **kwargs):
-        parent = kwargs.pop("parent", None)  # noqa
+        kwargs.pop("parent", None)
         groupby = kwargs.pop("groupby", None)
         if groupby is None:
             groupby, obj = obj, obj.obj
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 68eb1f630bfc3..f6d27de132ad9 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1642,17 +1642,18 @@ def _get_corr(a, b):
 
 class Rolling(_Rolling_and_Expanding):
     @cache_readonly
-    def is_datetimelike(self):
+    def is_datetimelike(self) -> bool:
         return isinstance(
             self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex)
         )
 
     @cache_readonly
-    def _on(self):
+    def _on(self) -> Index:
         if self.on is None:
             if self.axis == 0:
                 return self.obj.index
-            elif self.axis == 1:
+            else:
+                # i.e. self.axis == 1
                 return self.obj.columns
         elif isinstance(self.on, Index):
             return self.on
@@ -1660,9 +1661,9 @@ def _on(self):
             return Index(self.obj[self.on])
         else:
             raise ValueError(
-                "invalid on specified as {0}, "
+                "invalid on specified as {on}, "
                 "must be a column (of DataFrame), an Index "
-                "or None".format(self.on)
+                "or None".format(on=self.on)
             )
 
     def validate(self):
@@ -1711,7 +1712,9 @@ def _validate_monotonic(self):
             formatted = self.on
             if self.on is None:
                 formatted = "index"
-            raise ValueError("{0} must be monotonic".format(formatted))
+            raise ValueError(
+                "{formatted} must be monotonic".format(formatted=formatted)
+            )
 
     def _validate_freq(self):
         """
@@ -1723,9 +1726,9 @@ def _validate_freq(self):
             return to_offset(self.window)
         except (TypeError, ValueError):
             raise ValueError(
-                "passed window {0} is not "
+                "passed window {window} is not "
                 "compatible with a datetimelike "
-                "index".format(self.window)
+                "index".format(window=self.window)
             )
 
     _agg_see_also_doc = dedent(
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 1e3f5c1ed870e..f5e40e712642e 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -36,8 +36,6 @@
 
 from pandas.core.dtypes.inference import is_list_like
 
-from pandas.core.tools.datetimes import to_datetime
-
 __all__ = [
     "Day",
     "BusinessDay",
@@ -2752,8 +2750,10 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
 
     offset = to_offset(offset)
 
-    start = to_datetime(start)
-    end = to_datetime(end)
+    start = Timestamp(start)
+    start = start if start is not NaT else None
+    end = Timestamp(end)
+    end = end if end is not NaT else None
 
     if start and not offset.onOffset(start):
         start = offset.rollforward(start)