From 5652362b2c7b0a25adcf9a5239e7e7124840d080 Mon Sep 17 00:00:00 2001 From: fulpm <8397318+fulpm@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:52:34 -0400 Subject: [PATCH] 21860: Improved type hinting (#309) - Updates modules to bring them up to the latest typing standards - Fixes feature conviction in the scikit package --- howso/client/base.py | 10 +- howso/client/feature_flags.py | 8 +- howso/client/pandas/client.py | 6 +- howso/client/schemas/reaction.py | 47 +++--- howso/direct/_utilities.py | 7 +- howso/engine/trainee.py | 62 +++---- howso/scikit/scikit.py | 153 ++++++++++-------- howso/utilities/feature_attributes/base.py | 125 +++++++------- howso/utilities/feature_attributes/pandas.py | 2 +- .../utilities/feature_attributes/protocols.py | 81 +++++----- .../feature_attributes/relational.py | 77 ++++----- .../feature_attributes/time_series.py | 63 ++++---- howso/utilities/installation_verification.py | 10 +- howso/utilities/internals.py | 76 ++++----- howso/utilities/json_wrapper.py | 15 +- howso/utilities/posix.py | 10 +- howso/utilities/testing.py | 9 +- howso/utilities/utilities.py | 40 +++-- 18 files changed, 424 insertions(+), 377 deletions(-) diff --git a/howso/client/base.py b/howso/client/base.py index f0b0493f..b02ab8a6 100644 --- a/howso/client/base.py +++ b/howso/client/base.py @@ -1830,7 +1830,7 @@ def react( # noqa: C901 If set to True, will scale influence weights by each case's `weight_feature` weight. If unspecified, case weights will be used if the Trainee has them. - case_indices : Iterable of Sequence[Union[str, int]], defaults to None + case_indices : Iterable of Sequence[str | int], defaults to None An Iterable of Sequences, of session id and index, where index is the original 0-based index of the case as it was trained into the session. If this case does not exist, discriminative react @@ -1938,7 +1938,7 @@ def react( # noqa: C901 action -> pandas.DataFrame A data frame of action values. - details -> Dict or List + details -> dict or list An aggregated list of any requested details. Raises @@ -2677,7 +2677,7 @@ def react_series( # noqa: C901 action -> pandas.DataFrame A data frame of action values. - details -> Dict or List + details -> dict or list An aggregated list of any requested details. Raises @@ -4041,10 +4041,10 @@ def set_auto_ablation_params( residual_prediction_features : Optional[List[str]], optional For each of the features specified, will ablate a case if abs(prediction - case value) / prediction <= feature residual. - tolerance_prediction_threshold_map : Optional[Dict[str, Tuple[float, float]]], optional + tolerance_prediction_threshold_map : Optional[dict[str, tuple[float, float]]], optional For each of the features specified, will ablate a case if the prediction >= (case value - MIN) and the prediction <= (case value + MAX). - relative_prediction_threshold_map : Optional[Dict[str, float]], optional + relative_prediction_threshold_map : Optional[dict[str, float]], optional For each of the features specified, will ablate a case if abs(prediction - case value) / prediction <= relative threshold conviction_lower_threshold : Optional[float], optional diff --git a/howso/client/feature_flags.py b/howso/client/feature_flags.py index ad1e3557..b06f82c9 100644 --- a/howso/client/feature_flags.py +++ b/howso/client/feature_flags.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import typing as t import warnings @@ -13,9 +15,9 @@ class FeatureFlags: """ # Define obsolete flags here to raise a warning when defined - _obsolete_flags: t.Union[t.Set[str], None] = None + _obsolete_flags: set[str] | None = None - def __init__(self, flags: t.Optional[t.Dict[str, t.Any]]): + def __init__(self, flags: t.Optional[dict[str, t.Any]]): self._store = dict() if flags is not None: obsolete = set() @@ -55,7 +57,7 @@ def parse_flag(cls, flag: str) -> str: """Parse the flag name.""" return flag.replace('-', '_').lower() - def __iter__(self) -> t.Generator[t.Tuple[str, bool], None, None]: + def __iter__(self) -> t.Generator[tuple[str, bool], None, None]: """Iterate over flags.""" return ((key, value) for key, value in self._store.items()) diff --git a/howso/client/pandas/client.py b/howso/client/pandas/client.py index 17245ad0..0274c43b 100644 --- a/howso/client/pandas/client.py +++ b/howso/client/pandas/client.py @@ -1,7 +1,7 @@ from __future__ import annotations -from collections.abc import Iterable -from typing import Optional +from collections.abc import Collection +import typing as t import pandas as pd from pandas import DataFrame, Index @@ -88,7 +88,7 @@ def get_extreme_cases( trainee_id: str, num: int, sort_feature: str, - features: Optional[Iterable[str]] = None + features: t.Optional[Collection[str]] = None ) -> DataFrame: """ Base: :func:`howso.client.AbstractHowsoClient.get_extreme_cases`. diff --git a/howso/client/schemas/reaction.py b/howso/client/schemas/reaction.py index 3aaf2b36..b4948d70 100644 --- a/howso/client/schemas/reaction.py +++ b/howso/client/schemas/reaction.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from collections import abc from functools import singledispatchmethod from pprint import pformat @@ -29,11 +31,11 @@ class Reaction(abc.MutableMapping): Parameters ---------- - action : Union[pandas.DataFrame, list, dict], default None + action : pandas.DataFrame or list or dict, default None (Optional) A DataFrame with columns representing the requested features of ``react`` or ``react_series`` cases. - details : List or None + details : list or None (Optional) The details of results from ``react`` or ``react_series`` when providing a ``details`` parameter. """ @@ -60,8 +62,8 @@ class Reaction(abc.MutableMapping): } def __init__(self, - action: t.Optional[t.Union[pd.DataFrame, list, dict]] = None, - details: t.Optional[t.MutableMapping[str, t.Any]] = None + action: t.Optional[pd.DataFrame | list | dict] = None, + details: t.Optional[abc.MutableMapping[str, t.Any]] = None ): """Initialize the dictionary with the allowed keys.""" self._data = { @@ -79,7 +81,7 @@ def __init__(self, self._reorganized_details = None - def _validate_key(self, key) -> str: + def _validate_key(self, key: str) -> str: """ Raise KeyError if key is not one of the allowed keys. @@ -115,18 +117,18 @@ def _validate_key(self, key) -> str: return key - def __getitem__(self, key): + def __getitem__(self, key: str): """Get an item by key if the key is allowed.""" key = self._validate_key(key) return self._data[key] - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: t.Any): """Set an item by key if the key is allowed.""" key = self._validate_key(key) self._reorganized_details = None self._data[key] = value - def __delitem__(self, key): + def __delitem__(self, key: str): """Delete an item by key if the key is allowed.""" key = self._validate_key(key) self._reorganized_details = None @@ -148,7 +150,7 @@ def __repr__(self) -> str: @singledispatchmethod def add_reaction(self, action: pd.DataFrame, - details: t.MutableMapping[str, t.Any]): + details: abc.MutableMapping[str, t.Any]): """ Add more data to the instance. @@ -201,18 +203,18 @@ def add_reaction(self, action: pd.DataFrame, self._reorganized_details = None @add_reaction.register - def _(self, action: dict, details: t.MutableMapping[str, t.Any]): - """Add Dict[List, Dict] to Reaction.""" + def _(self, action: dict, details: abc.MutableMapping[str, t.Any]): + """Add dict[list, dict] to Reaction.""" action_df = pd.DataFrame.from_dict(action) return self.add_reaction(action_df, details) @add_reaction.register - def _(self, action: list, details: t.MutableMapping[str, t.Any]): - """Add list[Dict] to Reaction.""" + def _(self, action: list, details: abc.MutableMapping[str, t.Any]): + """Add list[dict] to Reaction.""" action_df = pd.DataFrame(action) return self.add_reaction(action_df, details) - def gen_cases(self) -> t.Generator[t.Dict, None, None]: + def gen_cases(self) -> t.Generator[dict, None, None]: """ Yield dict containing DetailedCase items for a single case. @@ -240,8 +242,7 @@ def reorganized_details(self): return self._reorganized_details @classmethod - def _reorganize_details(cls, details: t.MutableMapping[str, t.List] - ) -> t.List[t.Dict]: + def _reorganize_details(cls, details: abc.MutableMapping[str, list]) -> list[dict]: """ Re-organize `details` to be a list of dicts. One dict per case. @@ -261,11 +262,15 @@ def _reorganize_details(cls, details: t.MutableMapping[str, t.List] {k1: v1m, k2: v2m, ... kn: vnm} ] - Parameters: - details : Dict of Lists + Parameters + ---------- + details : dict of list + The reaction details. - Returns: - List of Dicts, one Dict per case + Returns + ------- + List of dicts + One dict per case. """ if isinstance(details, list): return details @@ -282,7 +287,7 @@ def _reorganize_details(cls, details: t.MutableMapping[str, t.List] k: v for k, v in details.items() if k in cls.KNOWN_KEYS and v } - # Transform Dict[List] -> List[Dict] + # Transform dict[list] -> list[dict] per_case_details = [ dict(zip([key for key in cleaned_details.keys()], values)) for values in zip(*cleaned_details.values()) diff --git a/howso/direct/_utilities.py b/howso/direct/_utilities.py index 081fd3de..90c2cf84 100644 --- a/howso/direct/_utilities.py +++ b/howso/direct/_utilities.py @@ -1,10 +1,11 @@ +from __future__ import annotations + from importlib import metadata from pathlib import Path import sysconfig -from typing import Union -def get_file_in_distribution(file_path) -> Union[Path, None]: +def get_file_in_distribution(file_path: str) -> Path | None: """ Locate the LICENSE.txt file in the distribution of this package. @@ -20,6 +21,8 @@ def get_file_in_distribution(file_path) -> Union[Path, None]: """ purelib_path = sysconfig.get_path('purelib') dist = metadata.distribution('howso-engine') + if dist.files is None: + raise AssertionError("The package howso-engine is not installed correctly, please reinstall.") for fp in dist.files: if fp.name == file_path: return Path(purelib_path, fp) diff --git a/howso/engine/trainee.py b/howso/engine/trainee.py index cc2c19ec..f5aa9c9e 100644 --- a/howso/engine/trainee.py +++ b/howso/engine/trainee.py @@ -294,7 +294,7 @@ def needs_analyze(self) -> bool: return self._needs_analyze @property - def calculated_matrices(self) -> t.Optional[dict[str, DataFrame]]: + def calculated_matrices(self) -> dict[str, DataFrame] | None: """ The calculated matrices. @@ -2610,7 +2610,7 @@ def react_group( use_case_weights: t.Optional[bool] = None, features: t.Optional[Collection[str]] = None, weight_feature: t.Optional[str] = None, - ) -> DataFrame | dict: + ) -> DataFrame: """ Computes specified data for a **set** of cases. @@ -2661,23 +2661,26 @@ def react_group( Returns ------- - DataFrame or dict + DataFrame The conviction of grouped cases. """ - return self.client.react_group( - trainee_id=self.id, - new_cases=new_cases, - features=features, - familiarity_conviction_addition=familiarity_conviction_addition, - familiarity_conviction_removal=familiarity_conviction_removal, - kl_divergence_addition=kl_divergence_addition, - kl_divergence_removal=kl_divergence_removal, - p_value_of_addition=p_value_of_addition, - p_value_of_removal=p_value_of_removal, - distance_contributions=distance_contributions, - use_case_weights=use_case_weights, - weight_feature=weight_feature, - ) + if isinstance(self.client, HowsoPandasClientMixin): + return self.client.react_group( + trainee_id=self.id, + new_cases=new_cases, + features=features, + familiarity_conviction_addition=familiarity_conviction_addition, + familiarity_conviction_removal=familiarity_conviction_removal, + kl_divergence_addition=kl_divergence_addition, + kl_divergence_removal=kl_divergence_removal, + p_value_of_addition=p_value_of_addition, + p_value_of_removal=p_value_of_removal, + distance_contributions=distance_contributions, + use_case_weights=use_case_weights, + weight_feature=weight_feature, + ) + else: + raise AssertionError("Client must have the 'react_group' method.") def get_feature_conviction( self, @@ -2688,7 +2691,7 @@ def get_feature_conviction( action_features: t.Optional[Collection[str]] = None, features: t.Optional[Collection[str]] = None, weight_feature: t.Optional[str] = None, - ) -> DataFrame | dict: + ) -> DataFrame: """ Get familiarity conviction for features in the model. @@ -2720,19 +2723,22 @@ def get_feature_conviction( Returns ------- - DataFrame or dict + DataFrame A DataFrame containing the familiarity conviction rows to feature columns. """ - return self.client.get_feature_conviction( - trainee_id=self.id, - action_features=action_features, - familiarity_conviction_addition=familiarity_conviction_addition, - familiarity_conviction_removal=familiarity_conviction_removal, - features=features, - use_case_weights=use_case_weights, - weight_feature=weight_feature, - ) + if isinstance(self.client, HowsoPandasClientMixin): + return self.client.get_feature_conviction( + trainee_id=self.id, + action_features=action_features, + familiarity_conviction_addition=familiarity_conviction_addition, + familiarity_conviction_removal=familiarity_conviction_removal, + features=features, + use_case_weights=use_case_weights, + weight_feature=weight_feature, + ) + else: + raise AssertionError("Client must have the 'get_feature_conviction' method.") def get_marginal_stats( self, *, diff --git a/howso/scikit/scikit.py b/howso/scikit/scikit.py index ff4938d4..d3a4bd10 100644 --- a/howso/scikit/scikit.py +++ b/howso/scikit/scikit.py @@ -1,5 +1,8 @@ +from __future__ import annotations + +from collections.abc import Mapping import inspect -from typing import Any, Dict, List, Optional, Union +import typing as t import uuid import numpy as np @@ -10,6 +13,8 @@ from howso import engine from howso.client import AbstractHowsoClient, HowsoPandasClient from howso.client.exceptions import HowsoApiError, HowsoError, HowsoNotUniqueError +from howso.client.schemas import Reaction +from howso.client.typing import Precision import howso.utilities as utils from howso.utilities.feature_attributes import infer_feature_attributes @@ -31,7 +36,7 @@ class HowsoEstimator(BaseEstimator): Parameters ---------- - features : dict of str: dict, default None + features : Mapping of str: Mapping, default None The features that will predict the targets(s). Will be generated automatically if not specified. @@ -48,7 +53,7 @@ class HowsoEstimator(BaseEstimator): "class": { "type" : "nominal" } } - targets : dict of str: dict, default None + targets : Mapping of str: Mapping, default None The target(s) to be predicted. Will be generated automatically if not specified. @@ -73,9 +78,9 @@ class HowsoEstimator(BaseEstimator): ttl : int, in milliseconds The maximum time a server should maintain a connection open for a trainee when processing requests. - client_params : dict, default None + client_params : Mapping, default None The parameters with which to instantiate the client. - trainee_params : dict, default None + trainee_params : Mapping, default None The parameters with which to instantiate the trainee. Examples @@ -108,12 +113,18 @@ class HowsoEstimator(BaseEstimator): 0.9666666666666667 """ - def __init__(self, client: Optional[AbstractHowsoClient] = None, - features: Optional[Dict] = None, targets: Dict = None, - method: Optional[str] = None, verbose: bool = False, - debug: bool = False, ttl: int = DEFAULT_TTL, - trainee_params: Optional[Dict] = None, - client_params: Optional[Dict] = None): + def __init__( + self, + client: t.Optional[AbstractHowsoClient] = None, + features: t.Optional[Mapping] = None, + targets: t.Optional[Mapping] = None, + method: t.Optional[str] = None, + verbose: bool = False, + debug: bool = False, + ttl: int = DEFAULT_TTL, + trainee_params: t.Optional[Mapping] = None, + client_params: t.Optional[Mapping] = None + ): """Initialize HowsoEstimator.""" if method not in [CLASSIFICATION, REGRESSION]: raise ValueError(f'Unsupported method {method}') @@ -230,7 +241,7 @@ def release_resources(self): # noqa: C901 elif getattr(self, 'verbose', False): print("The Howso estimator has no trainees to delete.") - def _get_trainee_params(self) -> Dict: + def _get_trainee_params(self) -> dict: """ Gets the initial parameters of `self.trainee`. @@ -251,7 +262,7 @@ def _get_trainee_params(self) -> Dict: trainee_params["args"] = {p: getattr(self.trainee_id, p, None) for p in parameters} return trainee_params - def _get_client_params(self) -> Dict: + def _get_client_params(self) -> dict: """ Get the initial parameters of `self.client`. @@ -273,7 +284,7 @@ def _get_client_params(self) -> Dict: return client_params @property - def trainee_id(self) -> Union[str, None]: + def trainee_id(self) -> str | None: """Return the trainee's ID, if possible.""" try: return self.trainee.id @@ -281,7 +292,7 @@ def trainee_id(self) -> Union[str, None]: return None @property - def trainee_name(self) -> Union[str, None]: + def trainee_name(self) -> str | None: """Return the trainee name (getter).""" return self.trainee.name @@ -322,7 +333,7 @@ def trainee_name(self, name: str = ''): print(f'The trainee name was successfully set ' f'to "{self.trainee.name}".') - def get_params(self, deep=True) -> Dict[str, Any]: + def get_params(self, deep: bool = True) -> dict[str, t.Any]: """ Get parameters for this estimator. @@ -357,7 +368,7 @@ def get_params(self, deep=True) -> Dict[str, Any]: out[key] = value return out - def fit(self, X, y, analyze=True) -> "HowsoEstimator": + def fit(self, X: np.ndarray, y: np.ndarray, analyze: bool = True) -> "HowsoEstimator": """ Fit a model with Howso. @@ -414,7 +425,7 @@ def fit(self, X, y, analyze=True) -> "HowsoEstimator": return self - def partial_fit(self, X, y): + def partial_fit(self, X: np.ndarray, y: np.ndarray): """ Add data to an existing Howso model. @@ -433,7 +444,7 @@ def partial_fit(self, X, y): X, y = utils.align_data(X, y) self._train(X, y) - def predict(self, X) -> np.ndarray: + def predict(self, X: np.ndarray) -> np.ndarray: """ Make predictions using Howso. @@ -447,7 +458,7 @@ def predict(self, X) -> np.ndarray: numpy.ndarray, shape (n_samples,) The predicted values based on the feature values provided. """ - X = utils.align_data(X) + X = t.cast(np.ndarray, utils.align_data(X)) cases = X.tolist() cases = utils.replace_nan_with_none(cases) @@ -467,7 +478,7 @@ def predict(self, X) -> np.ndarray: print('Server returned NaN with predictions.') return out - def score(self, X, y) -> float: + def score(self, X: np.ndarray, y: np.ndarray) -> float: """ Score Howso. @@ -568,7 +579,7 @@ def react_into_features( weight_feature=weight_feature, ) - def describe_prediction(self, X, details=None) -> Dict: + def describe_prediction(self, X: np.ndarray, details: t.Optional[Mapping] = None) -> Reaction: """ Describe a prediction in detail. @@ -923,7 +934,7 @@ def describe_prediction(self, X, details=None) -> Dict: audit_data['action'] = utils.replace_none_with_nan(audit_data['action']) return audit_data - def get_feature_conviction(self, features=None) -> Dict: + def get_feature_conviction(self, features: t.Optional[str | list[str]] = None) -> dict: """ Gets the conviction of the features in a model. @@ -935,28 +946,24 @@ def get_feature_conviction(self, features=None) -> Dict: Returns ------- dict - A map of feature convictions and contributions. + A map of feature name to conviction. """ ret = self.trainee.get_feature_conviction( features=self.feature_names, action_features=self.target_names, ) - - feature_conviction = ret.to_dict() + feature_conviction = ret.loc["familiarity_conviction_addition"].to_dict() if features is not None: if isinstance(features, str): features = [features] + feature_conviction = { + k: v for k, v in feature_conviction.items() if k in features + } - filtered = {} - for k, v in feature_conviction.items(): - v = {fkey: fval for fkey, fval in v.items() if fkey in features} - filtered[k] = v - feature_conviction = filtered - - return feature_conviction["familiarity_conviction_addition"] + return feature_conviction - def get_case_conviction(self, X, features=None) -> List: + def get_case_conviction(self, X: np.ndarray | list, features: t.Optional[str | list[str]] = None) -> list: """ Return case conviction. @@ -996,7 +1003,7 @@ def get_case_conviction(self, X, features=None) -> List: new_case_groups, features=features ) - return [case['familiarity_conviction_addition'] for case in ret] + return [case for case in ret['familiarity_conviction_addition']] def _set_random_name(self, retries: int = RENAME_RETRIES): """ @@ -1022,7 +1029,7 @@ def _set_random_name(self, retries: int = RENAME_RETRIES): if last_exception: raise last_exception - def __getstate__(self) -> Dict: + def __getstate__(self) -> dict: """ Returns the state of this object (self.__dict__). @@ -1052,7 +1059,7 @@ def __getstate__(self) -> Dict: self.trainee.persist() return self.__dict__ - def __setstate__(self, state: Dict): + def __setstate__(self, state: Mapping): """ Receives the state of the object when unpickling. @@ -1061,7 +1068,7 @@ def __setstate__(self, state: Dict): Parameters ---------- - state : dict + state : Mapping The state of the HowsoEstimator. """ for attr in state: @@ -1174,8 +1181,8 @@ def analyze(self, seed=None, **kwargs): self.trainee.analyze(**kwargs) - def partial_unfit(self, precision: str, num_cases: int, - criteria: Optional[Dict] = None): + def partial_unfit(self, precision: t.Optional[Precision], num_cases: int, + criteria: t.Optional[Mapping] = None): """ Remove a training case from a trainee. @@ -1196,10 +1203,10 @@ def partial_unfit(self, precision: str, num_cases: int, have the feature) | a value (must match exactly) | an array of two values (a range, feature values must be between) """ - self.trainee.remove_cases(precision, num_cases, criteria) + self.trainee.remove_cases(num_cases, precision=precision, condition=criteria) - def feature_add(self, feature: str = None, - value: Union[float, int, str, None] = None): + def feature_add(self, feature: t.Optional[str] = None, + value: t.Optional[float | int | str] = None): """ Add a feature to a trainee. @@ -1219,7 +1226,7 @@ def feature_add(self, feature: str = None, self.trainee.add_feature(feature, feature_value=value) - def feature_remove(self, feature: Optional[str] = None): + def feature_remove(self, feature: t.Optional[str] = None): """ Remove a feature from a trainee. @@ -1272,7 +1279,7 @@ def _generate_new_feature_name(self): return name -def build_cases(X: np.ndarray, y: np.ndarray) -> List: +def build_cases(X: np.ndarray, y: np.ndarray) -> list: """ Transform the cases from the feature and target ndarrays to a list of case values. @@ -1285,8 +1292,8 @@ def build_cases(X: np.ndarray, y: np.ndarray) -> List: Returns ------- - List - A multi-dimensional List. + list + A multi-dimensional list. """ if len(y.shape) == 1: y = np.expand_dims(y, axis=1) @@ -1303,7 +1310,7 @@ class HowsoClassifier(HowsoEstimator): Parameters ---------- - features : dict of str: dict, default None + features : Mapping of str: Mapping, default None The features that will predict the targets(s). Will be generated automatically if not specified. @@ -1320,7 +1327,7 @@ class HowsoClassifier(HowsoEstimator): "class": { "type" : "nominal" } } - targets : dict of str: dict, default None + targets : Mapping of str: Mapping, default None The target(s) to be predicted. Will be generated automatically if not specified. @@ -1343,19 +1350,23 @@ class HowsoClassifier(HowsoEstimator): ttl : int, in milliseconds The maximum time a server should maintain a connection open for a trainee when processing requests. - client_params : dict, default None + client_params : Mapping, default None The parameters with which to instantiate the client. - trainee_params : dict, default None + trainee_params : Mapping, default None The parameters with which to instantiate the client. Intended for use by `HowsoEstimator.get_params`. """ - def __init__(self, client: Optional[AbstractHowsoClient] = None, - features: Optional[Dict] = None, - targets: Optional[Dict] = None, - verbose: bool = False, - debug: bool = False, ttl: int = DEFAULT_TTL, - client_params: Optional[Dict] = None, - trainee_params: Optional[Dict] = None): + def __init__( + self, + client: t.Optional[AbstractHowsoClient] = None, + features: t.Optional[Mapping] = None, + targets: t.Optional[Mapping] = None, + verbose: bool = False, + debug: bool = False, + ttl: int = DEFAULT_TTL, + client_params: t.Optional[Mapping] = None, + trainee_params: t.Optional[Mapping] = None + ): """Initialize HowsoClassifier.""" super(HowsoClassifier, self).__init__(client=client, features=features, @@ -1487,7 +1498,7 @@ class HowsoRegressor(HowsoEstimator): Parameters ---------- - features : dict of str: dict, default None + features : Mapping of str: Mapping, default None The features that will predict the targets(s). Will be generated automatically if not specified. @@ -1504,7 +1515,7 @@ class HowsoRegressor(HowsoEstimator): "class": { "type" : "nominal" } } - targets : dict of str: dict, default None + targets : Mapping of str: Mapping, default None The target(s) to be predicted. Will be generated automatically if not specified. @@ -1527,20 +1538,24 @@ class HowsoRegressor(HowsoEstimator): ttl : int, in milliseconds The maximum time a server should maintain a connection open for a trainee when processing requests. - client_params : dict, default None + client_params : Mapping, default None The parameters with which to instantiate the client. - trainee_params : dict, default None + trainee_params : Mapping, default None The parameters with which to instantiate the client. Intended for use by `HowsoEstimator.get_params`. """ - def __init__(self, client=None, features: Optional[Dict] = None, - targets: Optional[Dict] = None, - verbose: bool = False, - debug: bool = False, - ttl: int = DEFAULT_TTL, - client_params: Optional[Dict] = None, - trainee_params: Optional[Dict] = None): + def __init__( + self, + client=None, + features: t.Optional[Mapping] = None, + targets: t.Optional[Mapping] = None, + verbose: bool = False, + debug: bool = False, + ttl: int = DEFAULT_TTL, + client_params: t.Optional[Mapping] = None, + trainee_params: t.Optional[Mapping] = None + ): """Initialize a HowsoRegressor.""" super(HowsoRegressor, self).__init__(client=client, features=features, diff --git a/howso/utilities/feature_attributes/base.py b/howso/utilities/feature_attributes/base.py index 5203f7d9..85b2ae6d 100644 --- a/howso/utilities/feature_attributes/base.py +++ b/howso/utilities/feature_attributes/base.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from abc import ABC, abstractmethod -from collections.abc import Container +from collections.abc import Collection, Container, Iterable, Mapping from copy import deepcopy from functools import singledispatchmethod import json @@ -7,9 +9,7 @@ import math import numbers import platform -from typing import ( - Any, Collection, Dict, Iterable, List, Mapping, Optional, Tuple, Union -) +import typing as t import warnings from dateutil.parser import isoparse @@ -34,22 +34,22 @@ class FeatureAttributesBase(dict): """Provides accessor methods for and dict-like access to inferred feature attributes.""" - def __init__(self, feature_attributes: Mapping, params: Dict = {}, unsupported: List[str] = []): + def __init__(self, feature_attributes: Mapping, params: dict = {}, unsupported: list[str] = []): """ Instantiate this FeatureAttributesBase object. Parameters ---------- - feature_attributes : Dict + feature_attributes : dict The feature attributes dictionary to be wrapped by this object. - params : Dict + params : dict (Optional) The parameters used in the call to infer_feature_attributes. - unsupported : List of str + unsupported : list of str (Optional) A list of features that contain data that is unsupported by the engine. """ if not isinstance(feature_attributes, Mapping): - raise TypeError('Provided feature attributes must be a Dict.') + raise TypeError('Provided feature attributes must be a Mapping.') self.params = params self.update(feature_attributes) self.unsupported = unsupported @@ -68,7 +68,7 @@ def get_parameters(self) -> dict: Returns ------- - Dict + dict A dictionary containing the kwargs used in the call to `infer_feature_attributes`. """ @@ -85,9 +85,9 @@ def to_json(self) -> str: """ return json.dumps(self) - def get_names(self, *, types: Union[str, Container, None] = None, - without: Union[Iterable[str], None] = None, - ) -> List[str]: + def get_names(self, *, types: t.Optional[str | Container] = None, + without: t.Optional[Iterable[str]] = None, + ) -> list[str]: """ Get feature names associated with this FeatureAttributes object. @@ -101,7 +101,7 @@ def get_names(self, *, types: Union[str, Container, None] = None, Returns ------- - List of String + list of str A list of feature names. """ if without: @@ -125,7 +125,7 @@ def get_names(self, *, types: Union[str, Container, None] = None, ] def _validate_bounds(self, data: pd.DataFrame, feature: str, - attributes: Dict) -> List[str]: # noqa: C901 + attributes: dict) -> list[str]: # noqa: C901 """Validate the feature bounds of the provided DataFrame.""" # Import here to avoid circular import from howso.utilities import date_to_epoch @@ -177,8 +177,8 @@ def _validate_bounds(self, data: pd.DataFrame, feature: str, return errors def _validate_dtype(self, data: pd.DataFrame, feature: str, # noqa: C901 - expected_dtype: Union[str, pd.CategoricalDtype], coerced_df: pd.DataFrame, - coerce: bool = False, localize_datetimes=True) -> List[str]: + expected_dtype: str | pd.CategoricalDtype, coerced_df: pd.DataFrame, + coerce: bool = False, localize_datetimes: bool = True) -> list[str]: """Validate the data type of a feature and optionally attempt to coerce.""" errors = [] series = coerced_df[feature] @@ -239,12 +239,12 @@ def _validate_dtype(self, data: pd.DataFrame, feature: str, # noqa: C901 return errors @staticmethod - def _allows_null(attributes: Dict) -> bool: + def _allows_null(attributes: dict) -> bool: """Return whether the given attributes indicates the allowance of null values.""" return 'bounds' in attributes and attributes['bounds'].get('allow_null', False) def _validate_df(self, data: pd.DataFrame, coerce: bool = False, # noqa: C901 - raise_errors: bool = False, table_name: str = None, validate_bounds=True, + raise_errors: bool = False, table_name: t.Optional[str] = None, validate_bounds=True, allow_missing_features: bool = False, localize_datetimes=True): errors = [] coerced_df = data.copy(deep=True) @@ -350,8 +350,8 @@ def _validate_df(self, data: pd.DataFrame, coerce: bool = False, # noqa: C901 return coerced_df @abstractmethod - def validate(self, data: Any, coerce=False, raise_errors=False, validate_bounds=True, - allow_missing_features=False, localize_datetimes=True): + def validate(self, data: t.Any, coerce: bool = False, raise_errors: bool = False, validate_bounds: bool = True, + allow_missing_features: bool = False, localize_datetimes: bool = True): """ Validate the given data against this FeatureAttributes object. @@ -391,7 +391,7 @@ class SingleTableFeatureAttributes(FeatureAttributesBase): """A dict-like object containing feature attributes for a single table or DataFrame.""" @singledispatchmethod - def validate(data: Any, **kwargs): + def validate(data: t.Any, **kwargs): """ Validate the given single table data against this FeatureAttributes object. @@ -420,7 +420,7 @@ def validate(data: Any, **kwargs): raise NotImplementedError("'data' is an unsupported type") @validate.register - def _(self, data: pd.DataFrame, coerce=False, raise_errors=False, validate_bounds=True, + def _(self, data: pd.DataFrame, coerce: bool = False, raise_errors: bool = False, validate_bounds: bool = True, allow_missing_features=False, localize_datetimes=True): return self._validate_df(data, coerce=coerce, raise_errors=raise_errors, validate_bounds=validate_bounds, @@ -522,20 +522,20 @@ class InferFeatureAttributesBase(ABC): """ def _process(self, # noqa: C901 - features: Optional[Dict[str, Dict]] = None, + features: t.Optional[dict[str, dict]] = None, infer_bounds: bool = True, - tight_bounds: Optional[Iterable[str]] = None, - mode_bound_features: Optional[Iterable[str]] = None, - id_feature_name: Optional[Union[str, Iterable[str]]] = None, + tight_bounds: t.Optional[Iterable[str]] = None, + mode_bound_features: t.Optional[Iterable[str]] = None, + id_feature_name: t.Optional[str | Iterable[str]] = None, attempt_infer_extended_nominals: bool = False, - nominal_substitution_config: Optional[Dict[str, Dict]] = None, - include_extended_nominal_probabilities: Optional[bool] = False, - datetime_feature_formats: Optional[Dict] = None, - ordinal_feature_values: Optional[Dict[str, List[str]]] = None, - dependent_features: Optional[Dict[str, List[str]]] = None, + nominal_substitution_config: t.Optional[dict[str, dict]] = None, + include_extended_nominal_probabilities: t.Optional[bool] = False, + datetime_feature_formats: t.Optional[dict] = None, + ordinal_feature_values: t.Optional[dict[str, list[str]]] = None, + dependent_features: t.Optional[dict[str, list[str]]] = None, include_sample: bool = False, - max_workers: Optional[int] = None, - ) -> Dict: + max_workers: t.Optional[int] = None, + ) -> dict: """ Get inferred feature attributes for the parameters. @@ -557,7 +557,7 @@ def _process(self, # noqa: C901 ) if features: - feature_attributes: Dict = serialize_models(features) + feature_attributes: dict = serialize_models(features) else: feature_attributes = dict() @@ -805,39 +805,39 @@ def __call__(self) -> FeatureAttributesBase: """Process and return the feature attributes.""" @abstractmethod - def _infer_floating_point_attributes(self, feature_name: str) -> Dict: + def _infer_floating_point_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given floating-point column.""" @abstractmethod - def _infer_datetime_attributes(self, feature_name: str) -> Dict: + def _infer_datetime_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given date-time column.""" @abstractmethod - def _infer_date_attributes(self, feature_name: str) -> Dict: + def _infer_date_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given date only column.""" @abstractmethod - def _infer_time_attributes(self, feature_name: str) -> Dict: + def _infer_time_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given time column.""" @abstractmethod - def _infer_timedelta_attributes(self, feature_name: str) -> Dict: + def _infer_timedelta_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given timedelta column.""" @abstractmethod - def _infer_boolean_attributes(self, feature_name: str) -> Dict: + def _infer_boolean_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given boolean column.""" @abstractmethod - def _infer_integer_attributes(self, feature_name: str) -> Dict: + def _infer_integer_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given integer column.""" @abstractmethod - def _infer_string_attributes(self, feature_name: str) -> Dict: + def _infer_string_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given string column.""" @abstractmethod - def _infer_unknown_attributes(self, feature_name: str) -> Dict: + def _infer_unknown_attributes(self, feature_name: str) -> dict: """Get inferred attributes for the given unknown-type column.""" @abstractmethod @@ -845,9 +845,9 @@ def _infer_feature_bounds( self, feature_attributes: Mapping[str, Mapping], feature_name: str, - tight_bounds: Optional[Iterable[str]] = None, - mode_bound_features: Optional[Iterable[str]] = None, - ) -> Optional[Dict]: + tight_bounds: t.Optional[Iterable[str]] = None, + mode_bound_features: t.Optional[Iterable[str]] = None, + ) -> dict | None: """ Return inferred bounds for the given column. @@ -857,8 +857,6 @@ def _infer_feature_bounds( Parameters ---------- - data : Any - Input data feature_attributes : dict A dictionary of feature names to a dictionary of parameters. feature_name : str @@ -878,9 +876,9 @@ def _infer_feature_bounds( """ @staticmethod - def infer_loose_feature_bounds(min_bound: Union[int, float], - max_bound: Union[int, float] - ) -> Tuple[float, float]: + def infer_loose_feature_bounds(min_bound: int | float, + max_bound: int | float + ) -> tuple[float, float]: """ Infer the loose bound values given a tight min and max bound value. @@ -895,8 +893,9 @@ def infer_loose_feature_bounds(min_bound: Union[int, float], Returns ------- - Tuple of (min_bound, max_bound) of loose bounds around the provided tight - min and max_bound bounds + tuple + Tuple (min_bound, max_bound) of loose bounds around the provided tight + min and max_bound bounds """ # NOTE: It was considered to use a smoother bounds-expansion function that # looked like max_loose_bounds = exp(ln(max_bounds) + 0.5), but this @@ -934,7 +933,7 @@ def _get_datetime_max(): return WIN_DT_MAX return LINUX_DT_MAX - def _check_unsupported_data(self, feature_attributes: Dict) -> None: + def _check_unsupported_data(self, feature_attributes: dict) -> None: """ Determine whether any features contain data that is unsupported by the core. @@ -988,7 +987,7 @@ def _check_unsupported_data(self, feature_attributes: Dict) -> None: self.unsupported.append(feature_name) @staticmethod - def _is_datetime(string): + def _is_datetime(string: str): """ Return True if string can be interpreted as a date. @@ -1131,9 +1130,7 @@ def _is_yaml_feature(self, feature: str) -> bool: return True @staticmethod - def _add_id_attribute(feature_attributes: Mapping, - id_feature_name: str - ) -> None: + def _add_id_attribute(feature_attributes: Mapping, id_feature_name: str) -> None: """Update the given feature_attributes in-place for id_features.""" if id_feature_name in feature_attributes: feature_attributes[id_feature_name]['id_feature'] = True @@ -1149,7 +1146,7 @@ def _add_id_attribute(feature_attributes: Mapping, def _get_min_max_number_size_bounds( cls, feature_attributes: Mapping, feature_name: str - ) -> Tuple[Optional[numbers.Number], Optional[numbers.Number]]: + ) -> tuple[numbers.Number | None, numbers.Number | None]: """ Get the minimum and maximum size bounds for a numeric feature. @@ -1214,7 +1211,7 @@ def _get_min_max_number_size_bounds( @abstractmethod def _get_feature_type(self, feature_name: str - ) -> Tuple[Optional[FeatureType], Optional[Dict]]: + ) -> tuple[FeatureType | None, dict | None]: """ Return the type information for a given feature. @@ -1233,7 +1230,7 @@ def _get_feature_type(self, feature_name: str """ @abstractmethod - def _get_random_value(self, feature_name: str, no_nulls: bool = False) -> Any: + def _get_random_value(self, feature_name: str, no_nulls: bool = False) -> t.Any: """Retrieve a random value from the data.""" @abstractmethod @@ -1241,7 +1238,7 @@ def _has_unique_constraint(self, feature_name: str) -> bool: """Return whether this feature has a unique constraint.""" @abstractmethod - def _get_first_non_null(self, feature_name: str) -> Optional[Any]: + def _get_first_non_null(self, feature_name: str) -> t.Any: """ Get the first non-null value in the given column. @@ -1258,5 +1255,5 @@ def _get_num_cases(self) -> int: """Get the number of cases/rows in the data.""" @abstractmethod - def _get_feature_names(self) -> List[str]: + def _get_feature_names(self) -> list[str]: """Get the names of the features/columns of the data.""" diff --git a/howso/utilities/feature_attributes/pandas.py b/howso/utilities/feature_attributes/pandas.py index 80427221..7730e166 100644 --- a/howso/utilities/feature_attributes/pandas.py +++ b/howso/utilities/feature_attributes/pandas.py @@ -137,7 +137,7 @@ def _has_unique_constraint(self, feature_name: str) -> bool: return False def _get_feature_type(self, feature_name: str # noqa: C901 - ) -> tuple[t.Optional[FeatureType], t.Optional[dict]]: + ) -> tuple[FeatureType | None, dict | None]: # Import this here to avoid circular import from howso.client.exceptions import HowsoError feature = self.data[feature_name] diff --git a/howso/utilities/feature_attributes/protocols.py b/howso/utilities/feature_attributes/protocols.py index 0e43ea31..24d9660b 100644 --- a/howso/utilities/feature_attributes/protocols.py +++ b/howso/utilities/feature_attributes/protocols.py @@ -1,28 +1,29 @@ +from __future__ import annotations + from abc import abstractmethod -from typing import ( - Any, Dict, Generator, Iterable, List, Optional, Protocol, runtime_checkable, Tuple, Union -) +from collections.abc import Generator, Iterable +import typing as t import pandas as pd -class TableNameProtocol(Protocol): +class TableNameProtocol(t.Protocol): """Protocol for a database table name object.""" schema: str table: str -class SQLTableProtocol(Protocol): +class SQLTableProtocol(t.Protocol): """Protocol for a SQL table object.""" - c: Dict - columns: Dict + c: dict + columns: dict name: str schema: str -class SessionProtocol(Protocol): +class SessionProtocol(t.Protocol): """Protocol for a sqlalchemy Session object.""" @abstractmethod @@ -41,7 +42,7 @@ def rollback(self): raise NotImplementedError -class AbstractDataProtocol(Protocol): +class AbstractDataProtocol(t.Protocol): """Protocol for an abstract data file object.""" @abstractmethod @@ -55,12 +56,12 @@ def get_dataframe(self) -> pd.DataFrame: raise NotImplementedError @abstractmethod - def get_group_map(self, column_name: str) -> Dict[Any, int]: + def get_group_map(self, column_name: str) -> dict[t.Any, int]: """Get the group map.""" raise NotImplementedError @abstractmethod - def get_n_random_rows(self, samples: int, seed: Optional[int]) -> pd.DataFrame: + def get_n_random_rows(self, samples: int, seed: t.Optional[int]) -> pd.DataFrame: """Get a specified number of random rows.""" raise NotImplementedError @@ -73,15 +74,15 @@ def write_chunk(self, chunk: pd.DataFrame, *, @abstractmethod def yield_chunk(self, chunk_size: int = 5000, *, - max_chunks: Optional[int] = None, - skip_chunks: Optional[int] = None, + max_chunks: t.Optional[int] = None, + skip_chunks: t.Optional[int] = None, ) -> Generator[pd.DataFrame, None, None]: """Provide a chunk generator.""" raise NotImplementedError @abstractmethod def yield_grouped_chunk(self, column_name: str, - groups: Iterable[Iterable[Any]] + groups: Iterable[Iterable[t.Any]] ) -> Generator[pd.DataFrame, None, None]: """Provide a grouped chunk generator.""" raise NotImplementedError @@ -92,28 +93,28 @@ def map_keys(self, chunk: pd.DataFrame) -> pd.DataFrame: raise NotImplementedError -class RelationshipProtocol(Protocol): +class RelationshipProtocol(t.Protocol): """Protocol for an object representing a relationship in a database.""" source: TableNameProtocol - source_columns: Tuple[str] + source_columns: tuple[str] destination: TableNameProtocol - destination_columns: Tuple[str] + destination_columns: tuple[str] -class ComponentProtocol(Protocol): - """Protocl for an object representing an independent collection of dataframes.""" +class ComponentProtocol(t.Protocol): + """Protocol for an object representing an independent collection of DataFrame.""" - datastore: Any - graph: Any + datastore: t.Any + graph: t.Any -@runtime_checkable -class DatastoreProtocol(Protocol): +@t.runtime_checkable +class DatastoreProtocol(t.Protocol): """Protocol for a datastore object.""" @abstractmethod - def items(self) -> Generator[Tuple[TableNameProtocol, AbstractDataProtocol], None, None]: + def items(self) -> Generator[tuple[TableNameProtocol, AbstractDataProtocol], None, None]: """Get items in the datastore.""" raise NotImplementedError @@ -135,14 +136,12 @@ def components(self) -> Generator[ComponentProtocol, None, None]: raise NotImplementedError @abstractmethod - def pre_synth_check(self, related_datastore: Any, - **kwargs) -> bool: + def pre_synth_check(self, related_datastore: t.Any, **kwargs) -> bool: """Attempt a pre-synth check.""" raise NotImplementedError @abstractmethod - def reflect(self, source: Any, - drop_existing: bool = False) -> None: + def reflect(self, source: t.Any, drop_existing: bool = False) -> None: """Do a reflection.""" raise NotImplementedError @@ -152,7 +151,7 @@ def has_feature_support(self, feature_key: str) -> bool: raise NotImplementedError @abstractmethod - def get_row_count(self, table_name: TableNameProtocol) -> Optional[int]: + def get_row_count(self, table_name: TableNameProtocol) -> int | None: """Get the number of rows in the specified table.""" raise NotImplementedError @@ -169,34 +168,34 @@ def set_data(self, table_name, data: AbstractDataProtocol): @abstractmethod def get_values(self, table_name: TableNameProtocol, - primary_key_columns: Union[List[str], str], - primary_key_values: Union[List[List[Any]], List[Any]], - column_name: str) -> List[Any]: + primary_key_columns: list[str] | str, + primary_key_values: list[list[t.Any]] | list[t.Any], + column_name: str) -> list[t.Any]: """Get the column values in a specified table.""" raise NotImplementedError @abstractmethod def replace_values(self, table_name: TableNameProtocol, - primary_key_columns: Union[List[str], str], - primary_key_values: Union[List[Any], Any], + primary_key_columns: list[str] | str, + primary_key_values: list[t.Any] | t.Any, column_name: str, - replace_values: List[Any], + replace_values: list[t.Any], return_old: bool = False - ) -> Optional[List[Any]]: + ) -> list[t.Any] | None: """Replace the column values in a specified table.""" raise NotImplementedError -@runtime_checkable -class RelationalDatastoreProtocol(DatastoreProtocol, Protocol): +@t.runtime_checkable +class RelationalDatastoreProtocol(DatastoreProtocol, t.Protocol): """Protocol for a relational datastore object.""" - graph: Any + graph: t.Any -@runtime_checkable -class SQLRelationalDatastoreProtocol(DatastoreProtocol, Protocol): +@t.runtime_checkable +class SQLRelationalDatastoreProtocol(DatastoreProtocol, t.Protocol): """Protocol for a SQL relational datastore object.""" engine: int diff --git a/howso/utilities/feature_attributes/relational.py b/howso/utilities/feature_attributes/relational.py index 2ba0abcd..da5ab92b 100644 --- a/howso/utilities/feature_attributes/relational.py +++ b/howso/utilities/feature_attributes/relational.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +from collections.abc import Iterable, Mapping from contextlib import contextmanager import datetime from datetime import time, timedelta @@ -5,9 +8,7 @@ import logging from math import ceil, isnan, log import re -from typing import ( - Any, Dict, Iterable, List, Mapping, Optional, Tuple -) +import typing as t import warnings import numpy as np @@ -131,7 +132,7 @@ def __init__(self, dialect) -> None: """Initialize this DatastoreColumnTypes class and set the dialect.""" self._dialect = dialect - def _get_data_types(self, key: str) -> List[str]: + def _get_data_types(self, key: str) -> list[str]: """ Get data types by key. @@ -151,12 +152,12 @@ def _get_data_types(self, key: str) -> List[str]: return self.DEFAULTS[key] @property - def variable_size_numbers(self) -> List[str]: + def variable_size_numbers(self) -> list[str]: """Get variable size number column types.""" return self._get_data_types('variable_size_numbers') @property - def exact_size_numbers(self) -> Dict[str, int]: + def exact_size_numbers(self) -> dict[str, int]: """Get exact size number column types.""" try: overrides = self.DIALECT_OVERRIDES[ @@ -169,54 +170,54 @@ def exact_size_numbers(self) -> Dict[str, int]: } @property - def floating_point_types(self) -> List[str]: + def floating_point_types(self) -> list[str]: """Get floating point column types.""" return self._get_data_types('floating_point') @property - def integer_types(self) -> List[str]: + def integer_types(self) -> list[str]: """Get integer column types.""" return self._get_data_types('integer') @property - def boolean_types(self) -> List[str]: + def boolean_types(self) -> list[str]: """Get boolean column types.""" return self._get_data_types('boolean') @property - def tz_aware_date_time_types(self) -> List[str]: + def tz_aware_date_time_types(self) -> list[str]: """Get date time column types that are timezone aware.""" return self._get_data_types('tz_aware_date_time_types') @property - def datetime_types(self) -> List[str]: + def datetime_types(self) -> list[str]: """Get date-time column types.""" return self._get_data_types('datetime') @property - def date_types(self) -> List[str]: + def date_types(self) -> list[str]: """Get date column types.""" return self._get_data_types('date') @property - def time_types(self) -> List[str]: + def time_types(self) -> list[str]: """Get time column types.""" return self._get_data_types('time') @property - def all_date_time_types(self) -> List[str]: + def all_date_time_types(self) -> list[str]: """Get all date and time types.""" return (self._get_data_types('datetime') + self._get_data_types('date') + self._get_data_types('time')) @property - def timedelta_types(self) -> List[str]: + def timedelta_types(self) -> list[str]: """Return timedelta column types.""" return self._get_data_types('timedelta') @property - def string_types(self) -> List[str]: + def string_types(self) -> list[str]: """Return string column types.""" return self._get_data_types('string') @@ -293,13 +294,13 @@ def _is_foreign_key(self, feature_name: str) -> bool: return False - def _get_first_non_null(self, feature_name: str) -> Optional[Any]: + def _get_first_non_null(self, feature_name: str) -> t.Any | None: with session_scope(self.session_cls) as session: first_non_null = session.query(self.data.c[feature_name]).filter( self.data.c[feature_name].is_not(None)).first() return first_non_null - def _get_random_value(self, feature_name: str, no_nulls: bool = False) -> Optional[Any]: + def _get_random_value(self, feature_name: str, no_nulls: bool = False) -> t.Any | None: """ Return a random sample from the given table column. @@ -369,7 +370,7 @@ def _has_unique_constraint(self, feature_name: str) -> bool: return any([c['column_names'] == [feature_name] for c in uniques]) - def _get_unique_values(self, feature_name: str) -> List[Any]: + def _get_unique_values(self, feature_name: str) -> list[t.Any]: """Get a list of all the unique values for a column.""" with session_scope(self.session_cls) as session: distinct_values = ( @@ -380,7 +381,7 @@ def _get_unique_values(self, feature_name: str) -> List[Any]: return distinct_values @classmethod - def _value_to_number(cls, value: Any) -> Any: + def _value_to_number(cls, value: t.Any) -> t.Any: """Convert value to a number.""" if pd.isna(value): return float('nan') @@ -393,11 +394,11 @@ def _value_to_number(cls, value: Any) -> Any: else: return value - def _get_min_max_values(self, feature_name: str) -> Tuple[Any, Any]: + def _get_min_max_values(self, feature_name: str) -> tuple[t.Any, t.Any]: """ Get the smallest and largest values for the given table column. - The return type within the Tuple is determined by the column type. + The return type within the tuple is determined by the column type. Smallness and largeness is determined by the SQLAlchemy functions `min()` and `max()`. """ @@ -409,7 +410,7 @@ def _get_min_max_values(self, feature_name: str) -> Tuple[Any, Any]: return results.min_value, results.max_value - def _get_mode(self, feature_name: str) -> List[Tuple[Any, int]]: + def _get_mode(self, feature_name: str) -> list[tuple[t.Any, int]]: """ Get the most common value in the given feature/column. @@ -450,11 +451,11 @@ def _get_num_cases(self) -> int: num_rows = session.query(self.data).count() return num_rows - def _get_feature_names(self) -> List[str]: + def _get_feature_names(self) -> list[str]: return [c.name for c in self.data.columns] def _get_feature_type(self, feature_name: str # noqa: C901 - ) -> Tuple[Optional[FeatureType], Optional[Dict]]: + ) -> tuple[FeatureType | None, dict | None]: # Place here to avoid circular import from howso.client.exceptions import HowsoError for column in self.data.columns: @@ -548,7 +549,7 @@ def _get_feature_type(self, feature_name: str # noqa: C901 return None, None - def _infer_floating_point_attributes(self, feature_name: str) -> Dict: + def _infer_floating_point_attributes(self, feature_name: str) -> dict: if ( self._is_primary_key(feature_name) or self._is_foreign_key(feature_name) @@ -600,7 +601,7 @@ def _infer_floating_point_attributes(self, feature_name: str) -> Dict: return attributes - def _infer_datetime_attributes(self, feature_name: str) -> Dict: + def _infer_datetime_attributes(self, feature_name: str) -> dict: # Although rare, it is plausible that a datetime field could be a # primary- or foreign-key. if ( @@ -627,7 +628,7 @@ def _infer_datetime_attributes(self, feature_name: str) -> Dict: 'date_time_format': dt_format, } - def _infer_date_attributes(self, feature_name: str) -> Dict: + def _infer_date_attributes(self, feature_name: str) -> dict: # Although rare, it is plausible that a date field could be a # primary- or foreign-key. if ( @@ -644,13 +645,13 @@ def _infer_date_attributes(self, feature_name: str) -> Dict: 'date_time_format': ISO_8601_DATE_FORMAT, } - def _infer_time_attributes(self, feature_name: str) -> Dict: + def _infer_time_attributes(self, feature_name: str) -> dict: return { 'type': 'continuous', 'data_type': 'number', } - def _infer_timedelta_attributes(self, feature_name: str) -> Dict: + def _infer_timedelta_attributes(self, feature_name: str) -> dict: # Although rare, it is plausible that a timedelta field could be a # primary- or foreign-key. if ( @@ -666,13 +667,13 @@ def _infer_timedelta_attributes(self, feature_name: str) -> Dict: 'data_type': 'number', } - def _infer_boolean_attributes(self, feature_name: str) -> Dict: + def _infer_boolean_attributes(self, feature_name: str) -> dict: return { 'type': 'nominal', 'data_type': 'boolean', } - def _infer_integer_attributes(self, feature_name: str) -> Dict: + def _infer_integer_attributes(self, feature_name: str) -> dict: # Most primary keys will be integer types (but not all). These are # always treated as nominals. if ( @@ -728,7 +729,7 @@ def _infer_integer_attributes(self, feature_name: str) -> Dict: return attributes - def _infer_string_attributes(self, feature_name: str) -> Dict: + def _infer_string_attributes(self, feature_name: str) -> dict: if ( self._is_primary_key(feature_name) or self._is_foreign_key(feature_name) @@ -751,7 +752,7 @@ def _infer_string_attributes(self, feature_name: str) -> Dict: else: return self._infer_unknown_attributes(feature_name) - def _infer_unknown_attributes(self, feature_name: str) -> Dict: + def _infer_unknown_attributes(self, feature_name: str) -> dict: return { 'type': 'nominal' } @@ -759,9 +760,9 @@ def _infer_unknown_attributes(self, feature_name: str) -> Dict: def _infer_feature_bounds(self, # noqa: C901 feature_attributes: Mapping[str, Mapping], feature_name: str, - tight_bounds: Optional[Iterable[str]] = None, - mode_bound_features: Optional[List[str]] = None, - ) -> Optional[Dict]: + tight_bounds: t.Optional[Iterable[str]] = None, + mode_bound_features: t.Optional[list[str]] = None, + ) -> dict | None: output = None allow_null = True original_type = feature_attributes[feature_name]['original_type'] @@ -902,7 +903,7 @@ def _infer_feature_bounds(self, # noqa: C901 return output - def _parse_column_type(self, full_type_str: str) -> Tuple[str, dict]: + def _parse_column_type(self, full_type_str: str) -> tuple[str, dict]: """ Determine column type from schema description of column. diff --git a/howso/utilities/feature_attributes/time_series.py b/howso/utilities/feature_attributes/time_series.py index fa95d88e..bc7da387 100644 --- a/howso/utilities/feature_attributes/time_series.py +++ b/howso/utilities/feature_attributes/time_series.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +from collections.abc import Iterable from concurrent.futures import ( as_completed, Future, @@ -7,9 +10,7 @@ from math import e, isnan import multiprocessing as mp import os -from typing import ( - Dict, Iterable, Optional, Union -) +import typing as t import warnings import numpy as np @@ -42,13 +43,13 @@ def __init__(self, data: pd.DataFrame, time_feature_name: str): def _infer_delta_min_and_max( # noqa: C901 self, - features: Optional[Dict] = None, - datetime_feature_formats: Optional[Dict] = None, - id_feature_name: Optional[Union[str, Iterable[str]]] = None, - orders_of_derivatives: Optional[Dict] = None, - derived_orders: Optional[Dict] = None, - max_workers: Optional[int] = None - ) -> Dict: + features: t.Optional[dict] = None, + datetime_feature_formats: t.Optional[dict] = None, + id_feature_name: t.Optional[str | Iterable[str]] = None, + orders_of_derivatives: t.Optional[dict] = None, + derived_orders: t.Optional[dict] = None, + max_workers: t.Optional[int] = None + ) -> dict: """ Infer continuous feature delta_min, delta_max for each feature. @@ -276,7 +277,7 @@ def _infer_delta_min_and_max( # noqa: C901 return features - def _set_rate_delta_bounds(self, btype: str, bounds: Dict, features: Dict): + def _set_rate_delta_bounds(self, btype: str, bounds: dict, features: dict): """Set optinally-specified rate/delta bounds in the features dict.""" for feature in bounds.keys(): # Check for any problems @@ -304,29 +305,29 @@ def _set_rate_delta_bounds(self, btype: str, bounds: Dict, features: Dict): def _process( # noqa: C901 self, - features: Optional[Dict] = None, + features: t.Optional[dict] = None, infer_bounds: bool = True, - id_feature_name: Optional[Union[str, Iterable[str]]] = None, - time_invariant_features: Optional[Iterable[str]] = None, - datetime_feature_formats: Optional[Dict] = None, - dependent_features: Optional[dict] = None, - tight_bounds: Optional[Iterable[str]] = None, + id_feature_name: t.Optional[str | Iterable[str]] = None, + time_invariant_features: t.Optional[Iterable[str]] = None, + datetime_feature_formats: t.Optional[dict] = None, + dependent_features: t.Optional[dict] = None, + tight_bounds: t.Optional[Iterable[str]] = None, attempt_infer_extended_nominals: bool = False, - nominal_substitution_config: Optional[Dict[str, Dict]] = None, - include_extended_nominal_probabilities: Optional[bool] = False, + nominal_substitution_config: t.Optional[dict[str, dict]] = None, + include_extended_nominal_probabilities: t.Optional[bool] = False, include_sample: bool = False, - time_feature_is_universal: Optional[bool] = None, - time_series_type_default: Optional[str] = 'rate', - time_series_types_override: Optional[Dict] = None, - orders_of_derivatives: Optional[Dict] = None, - derived_orders: Optional[Dict] = None, - mode_bound_features: Optional[Iterable[str]] = None, - lags: Optional[Union[list, dict]] = None, - num_lags: Optional[Union[int, dict]] = None, - rate_boundaries: Optional[Dict] = None, - delta_boundaries: Optional[Dict] = None, - max_workers: Optional[int] = None, - ) -> Dict: + time_feature_is_universal: t.Optional[bool] = None, + time_series_type_default: t.Optional[str] = 'rate', + time_series_types_override: t.Optional[dict] = None, + orders_of_derivatives: t.Optional[dict] = None, + derived_orders: t.Optional[dict] = None, + mode_bound_features: t.Optional[Iterable[str]] = None, + lags: t.Optional[list | dict] = None, + num_lags: t.Optional[int | dict] = None, + rate_boundaries: t.Optional[dict] = None, + delta_boundaries: t.Optional[dict] = None, + max_workers: t.Optional[int] = None, + ) -> dict: """ Infer time series attributes. diff --git a/howso/utilities/installation_verification.py b/howso/utilities/installation_verification.py index e5405997..bc6bd724 100644 --- a/howso/utilities/installation_verification.py +++ b/howso/utilities/installation_verification.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Callable, Iterable from dataclasses import dataclass from datetime import datetime, timedelta from enum import IntEnum @@ -27,6 +28,7 @@ ConnectionError = None from rich import print as rich_print from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeElapsedColumn +from typing_extensions import TypeAlias try: from howso import engine @@ -91,7 +93,7 @@ class Status(IntEnum): OK = 4 -Requirements = t.Iterable[t.Union[type, object]] +Requirements: TypeAlias = Iterable[type | object] @dataclass @@ -99,7 +101,7 @@ class Check: """Store the specification of a single check.""" name: str - fn: t.Callable + fn: Callable client_required: t.Optional[str] = None other_requirements: t.Optional[Requirements] = None @@ -130,7 +132,7 @@ def __init__(self): ) def add_check(self, name: str, - fn: t.Callable, + fn: Callable, client_required: t.Optional[str] = None, other_requirements: t.Optional[Requirements] = None ): @@ -153,7 +155,7 @@ def add_check(self, name: str, """ if ( other_requirements and - not isinstance(other_requirements, t.Iterable) + not isinstance(other_requirements, Iterable) ): other_requirements = [other_requirements] self._checks.append( diff --git a/howso/utilities/internals.py b/howso/utilities/internals.py index c73fe051..18d4549c 100644 --- a/howso/utilities/internals.py +++ b/howso/utilities/internals.py @@ -7,7 +7,7 @@ from __future__ import annotations from collections import OrderedDict -from collections.abc import Iterable +from collections.abc import Generator, Iterable, Mapping from copy import deepcopy import datetime import decimal @@ -18,7 +18,7 @@ from pathlib import Path import random import re -from typing import Any, Generator, List, Mapping, NamedTuple, Optional, Tuple, TYPE_CHECKING, Union +import typing as t import unicodedata import uuid import warnings @@ -30,14 +30,14 @@ logger = logging.getLogger(__name__) -if TYPE_CHECKING: +if t.TYPE_CHECKING: from .monitors import ProgressTimer def deserialize_to_dataframe( - data: Iterable[Iterable[Any]] | Iterable[Mapping[str, Any]] | None, - columns: Optional[Iterable[str]] = None, - index: Optional[Iterable[Any]] = None + data: Iterable[Iterable[t.Any]] | Iterable[Mapping[str, t.Any]] | None, + columns: t.Optional[Iterable[str]] = None, + index: t.Optional[Iterable[t.Any]] = None ) -> pd.DataFrame: """ Deserialize data into a DataFrame. @@ -67,10 +67,10 @@ def deserialize_to_dataframe( def get_features_from_data( - data: Any, *, - data_parameter: Optional[str] = 'cases', - features_parameter: Optional[str] = 'features' -) -> List[str]: + data: t.Any, *, + data_parameter: t.Optional[str] = 'cases', + features_parameter: t.Optional[str] = 'features' +) -> list[str]: """ Retrieve feature names from dataframe columns. @@ -112,7 +112,7 @@ def get_features_from_data( f"`{data_parameter}` are not provided as a DataFrame.") -def serialize_models(obj: Any, *, exclude_null: bool = False) -> Any: +def serialize_models(obj: t.Any, *, exclude_null: bool = False) -> t.Any: """ Serialize client model instances. @@ -257,7 +257,7 @@ def preprocess_feature_attributes(features: Mapping | None) -> dict | None: return feature_attributes -def format_react_response(response, single_action=False): +def format_react_response(response: dict, single_action: bool = False): """ Reformat the react response into a dict of action and details. @@ -299,7 +299,7 @@ def format_react_response(response, single_action=False): return {'action': action, 'details': response} -def accumulate_react_result(accumulated_result, result): +def accumulate_react_result(accumulated_result: dict, result: dict) -> dict: """ Accumulate the results from multiple reacts responses. @@ -354,7 +354,7 @@ def random_handle() -> str: return uuid.uuid4().hex[-12:] -def slugify(value, allow_unicode=False): +def slugify(value: t.Any, allow_unicode: bool = False): """ Slugify a value. @@ -390,7 +390,7 @@ def slugify(value, allow_unicode=False): return re.sub(r"[-\s]+", "-", value).strip("-_") -def generate_cache_file_name(identifier, ext="txt"): +def generate_cache_file_name(identifier: str, ext: str = "txt"): """ Generate a unique cache file name. @@ -447,7 +447,7 @@ def insufficient_generation_check( return False -def sanitize_for_json(obj: Any): # noqa: C901 +def sanitize_for_json(obj: t.Any): # noqa: C901 """ Sanitizes data for JSON serialization. @@ -569,7 +569,7 @@ def sanitize_for_json(obj: Any): # noqa: C901 def readable_timedelta(delta: datetime.timedelta, *, microsecond_places: int = 2, - precision: Union[bool, int] = True, + precision: bool | int = True, ) -> str: """ Format timedelta to a readable string. @@ -656,25 +656,25 @@ class BatchScalingManager: # Threshold by which batch sizes will be increased/decreased until # request-response time falls between these two times - time_threshold: Tuple[datetime.timedelta, datetime.timedelta] = ( + time_threshold: tuple[datetime.timedelta, datetime.timedelta] = ( datetime.timedelta(seconds=60), datetime.timedelta(seconds=75)) # The batch size min and max (respectively) - size_limits: Tuple[int, Optional[int]] = (1, None) + size_limits: tuple[int, t.Optional[int]] = (1, None) # Limit by memory usage of request or response size (respectively) # In bytes, zero means no limit. - memory_limits: Tuple[int, int] = (10_000_000, 10_000_000) # 10MB + memory_limits: tuple[int, int] = (10_000_000, 10_000_000) # 10MB # Prevent raising batch size when the size of the request or response # (respectively) is within this range of the limit. - memory_limit_thresholds: Tuple[float, float] = (0.1, 0.1) # 10% + memory_limit_thresholds: tuple[float, float] = (0.1, 0.1) # 10% # The rate at which batches are scaled up and down (respectively) # See: https://en.wikipedia.org/wiki/Golden_ratio - size_multiplier: Tuple[float, float] = (1.618, 0.809) + size_multiplier: tuple[float, float] = (1.618, 0.809) - class SendOptions(NamedTuple): + class SendOptions(t.NamedTuple): """Options that can be passed to the generator.""" - tick_duration: Optional[datetime.timedelta] - memory_sizes: Optional[Tuple[int, int]] + tick_duration: t.Optional[datetime.timedelta] + memory_sizes: t.Optional[tuple[int, int]] def __init__(self, starting_size: int, progress_monitor: "ProgressTimer"): """Initialize a new BatchScalingManager instance.""" @@ -683,16 +683,16 @@ def __init__(self, starting_size: int, progress_monitor: "ProgressTimer"): @staticmethod def send( - gen: Generator[int, Optional["SendOptions"], None], + gen: Generator[int, t.Optional["SendOptions"], None], options: SendOptions - ) -> Union[int, None]: + ) -> int | None: """Helper to send to generator and return None when stopped.""" try: return gen.send(options) except StopIteration: return None - def gen_batch_size(self) -> Generator[int, Optional["SendOptions"], None]: + def gen_batch_size(self) -> Generator[int, t.Optional["SendOptions"], None]: """ Returns a generator to get the next batch size. @@ -725,8 +725,8 @@ def gen_batch_size(self) -> Generator[int, Optional["SendOptions"], None]: def scale( self, batch_size: int, - batch_duration: Optional[datetime.timedelta], - memory_sizes: Optional[Tuple[int, int]] + batch_duration: t.Optional[datetime.timedelta], + memory_sizes: t.Optional[tuple[int, int]] ) -> int: """ Scale batch size based on duration or memory size of the batch. @@ -823,7 +823,7 @@ def clamp(self, batch_size: int, batch_offset: int, total: int) -> int: return batch_size -def show_core_warnings(core_warnings): +def show_core_warnings(core_warnings: Iterable[str | dict]): """Warns the user for each warning returned from the core.""" # Import here to avoid circular import from ..client.exceptions import HowsoWarning @@ -835,7 +835,7 @@ def show_core_warnings(core_warnings): warnings.warn(msg, category=HowsoWarning) -def to_pandas_datetime_format(f): +def to_pandas_datetime_format(f: str): """ Normalize the pandas datetime format. @@ -877,19 +877,19 @@ class IgnoreWarnings: def __init__( self, - warning_types: Union[type[Warning], Iterable[type[Warning]]] + warning_types: type[Warning] | Iterable[type[Warning]] ): """Initialize a new `catch_warnings` instance.""" self._catch_warnings = warnings.catch_warnings() - self._warning_types = warning_types - - if not isinstance(self._warning_types, Iterable): - self._warning_types = [self._warning_types] + if not isinstance(warning_types, Iterable): + self._warning_types = [warning_types] + else: + self._warning_types = warning_types for warning_type in self._warning_types: self._check_warning_class(warning_type) @staticmethod - def _check_warning_class(warning_type): + def _check_warning_class(warning_type: type[Warning]): """Check correct warning type.""" if not issubclass(warning_type, Warning): warnings.warn( diff --git a/howso/utilities/json_wrapper.py b/howso/utilities/json_wrapper.py index ae882390..60bf398d 100644 --- a/howso/utilities/json_wrapper.py +++ b/howso/utilities/json_wrapper.py @@ -7,8 +7,11 @@ The `pysimdjson` package must be installed separately. """ +from __future__ import annotations + +from collections.abc import Callable import json -from typing import Any +import typing as t try: import simdjson # noqa @@ -16,7 +19,11 @@ simdjson = None -def detect_encoding(b): +if t.TYPE_CHECKING: + from _typeshed import SupportsRead + + +def detect_encoding(b: bytes | bytearray): """ Detect encoring. @@ -45,7 +52,7 @@ def dumps(*args, **kwargs): return json.dumps(*args, **kwargs) -def load(fp, *, object_hook=None, **kwargs) -> Any: +def load(fp: SupportsRead[str | bytes], *, object_hook: t.Optional[Callable[[dict], t.Any]] = None, **kwargs) -> t.Any: """ Use the fastest available `load` for JSON based on kwargs given. @@ -60,7 +67,7 @@ def load(fp, *, object_hook=None, **kwargs) -> Any: return json.load(fp, object_hook=object_hook, **kwargs) -def loads(s, *, object_hook=None, **kwargs) -> Any: +def loads(s: str | bytes | bytearray, *, object_hook: t.Optional[Callable[[dict], t.Any]] = None, **kwargs) -> t.Any: """ Use the fastest available `loads` for JSON based on kwargs given. diff --git a/howso/utilities/posix.py b/howso/utilities/posix.py index 69034139..26197547 100644 --- a/howso/utilities/posix.py +++ b/howso/utilities/posix.py @@ -1,10 +1,11 @@ +from __future__ import annotations + from ctypes import ( byref, c_int32, c_int64, c_uint, cast, CDLL, create_string_buffer, POINTER, ) from ctypes.util import find_library import sys -from typing import Union _LIBC = None @@ -18,8 +19,8 @@ class CLibError(Exception): """Problem instantiating the C Library.""" -def sysctl_by_name(name: Union[str, bytes], output_type: str = 'raw', - encoding: str = "UTF-8") -> Union[bytes, int, str]: +def sysctl_by_name(name: str | bytes, output_type: str = 'raw', + encoding: str = "UTF-8") -> bytes | int | str | None: """ Call `sysclt` with the provided key and return the result. @@ -38,7 +39,7 @@ def sysctl_by_name(name: Union[str, bytes], output_type: str = 'raw', Returns ------- - bytes, int or str + bytes, int, str or None The result of the `sysctl` call, possibly modified by the given `output_type`. @@ -77,6 +78,7 @@ def sysctl_by_name(name: Union[str, bytes], output_type: str = 'raw', return cast(buf, POINTER(c_int32)).contents.value if size.value == 8: return cast(buf, POINTER(c_int64)).contents.value + return None elif output_type == 'str': return buf.value.decode(encoding=encoding) else: # return_type == 'raw' diff --git a/howso/utilities/testing.py b/howso/utilities/testing.py index b9c38e6b..5955961a 100644 --- a/howso/utilities/testing.py +++ b/howso/utilities/testing.py @@ -1,9 +1,10 @@ """Utilities to aide in testing `howso-engine`.""" +from __future__ import annotations + +from collections.abc import Callable import os -from typing import Callable, Union from unittest.mock import patch -from howso.client import HowsoClient from howso.client.base import AbstractHowsoClient from howso.direct import HowsoDirectClient @@ -33,9 +34,9 @@ def get_test_options(): def get_configurationless_test_client( - client_class: Union[AbstractHowsoClient, Callable] = HowsoDirectClient, + client_class: type[AbstractHowsoClient] | Callable = HowsoDirectClient, **kwargs -) -> HowsoClient: +) -> AbstractHowsoClient: """ Return a client for use within testing. diff --git a/howso/utilities/utilities.py b/howso/utilities/utilities.py index 68e7efaf..dd7c1e5a 100644 --- a/howso/utilities/utilities.py +++ b/howso/utilities/utilities.py @@ -183,7 +183,7 @@ def replace_none_with_nan(dat: Mapping) -> list[dict]: ] -def replace_nan_with_none(dat): +def replace_nan_with_none(dat: list[list]): """ Replace None values with NaN values. @@ -228,7 +228,7 @@ def reshape_data(x: np.ndarray, y: np.ndarray) -> tuple[np.ndarray, np.ndarray]: return x, y -def align_data(x, y=None): +def align_data(x: np.ndarray, y: t.Optional[np.ndarray] = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """ Check and fix type problems with the data and reshape it. @@ -243,7 +243,8 @@ def align_data(x, y=None): Returns ------- - numpy.ndarray, numpy.ndarray or numpy.ndarray + numpy.ndarray or tuple of numpy.ndarray + The aligned data. """ if x.dtype == object: x = x.astype(float) @@ -282,7 +283,7 @@ def replace_doublemax_with_infinity(dat: t.Any) -> t.Any: return dat -def dprint(debug, *argc, **kwargs): +def dprint(debug: bool | int, *argc, **kwargs): """ Print based on debug levels. @@ -541,7 +542,7 @@ def validate_features(features: Mapping[str, Mapping], f"feature type - '{f_type}'") -def validate_datetime_iso8061(datetime_value, feature): +def validate_datetime_iso8061(datetime_value: str, feature: str): """ Check that the passed in datetime value adheres to the ISO 8601 format. @@ -721,7 +722,7 @@ def serialize_datetimes(cases: list[list], columns: Iterable[str], # noqa: C901 case[i] = dt_value -def is_valid_uuid(value, version=4): +def is_valid_uuid(value: str | uuid.UUID, version: int = 4) -> bool: """ Check if a given string is a valid uuid. @@ -861,11 +862,11 @@ class UserFriendlyExit: If True, emit more information """ - def __init__(self, verbose=False): + def __init__(self, verbose: bool = False): """Construct a UserFriendlyExit instance.""" self.verbose = verbose - def __call__(self, msg="An unexpected exit occurred.", exception=None): + def __call__(self, msg: str = "An unexpected exit occurred.", exception: t.Optional[Exception] = None): """ Exit, but print the exception first. @@ -888,7 +889,11 @@ def __call__(self, msg="An unexpected exit occurred.", exception=None): raise StopExecution -def get_kwargs(kwargs, descriptors, warn_on_extra=False): # noqa: C901 +def get_kwargs( # noqa: C901 + kwargs: dict, + descriptors: Iterable[Mapping | Collection | str], + warn_on_extra: bool = False +): """ Decompose kwargs into a tuple of return values. @@ -900,7 +905,7 @@ def get_kwargs(kwargs, descriptors, warn_on_extra=False): # noqa: C901 kwargs : dict Mapping of keys and values (kwargs) - descriptors : + descriptors : Iterable of {Mapping or Collection or str} An iterable of descriptors for how to handle each item in kwargs. Each descriptor can be a mapping, another iterable, or a single string. @@ -1074,7 +1079,7 @@ def check_feature_names(features: Mapping, return ret_value -def build_react_series_df(react_series_response, series_index=None): +def build_react_series_df(react_series_response: Mapping, series_index: t.Optional[str] = None): """ Build a DataFrame from the response from react_series. @@ -1083,9 +1088,9 @@ def build_react_series_df(react_series_response, series_index=None): Parameters ---------- - react_series_response : Dictionary + react_series_response : Mapping The response dictionary from a call to react_series. - series_index : String + series_index : str The name of the series index feature, which will index each series in the form 'series_', e.g., series_1, series_1, ..., series_n. If None, does not include the series index feature in the returned @@ -1123,7 +1128,7 @@ def build_react_series_df(react_series_response, series_index=None): return pd.DataFrame(data, columns=columns) -def date_format_is_iso(f): +def date_format_is_iso(f: str) -> bool: """ Check if datetime format is ISO8601. @@ -1148,7 +1153,7 @@ def date_format_is_iso(f): return False -def deep_update(base, updates): +def deep_update(base: dict | None, updates: dict): """ Update dict `base` with updates from dict `updates` in a "deep" fashion. @@ -1164,9 +1169,10 @@ def deep_update(base, updates): Returns ------- - dict : The updated dictionary. + dict + The updated dictionary. """ - if all((isinstance(d, dict) for d in (base, updates))): + if isinstance(base, dict) and isinstance(updates, dict): for k, v in updates.items(): base[k] = deep_update(base.get(k), v) return base