diff --git a/Backtesting.py b/mlframe/Backtesting.py
similarity index 100%
rename from Backtesting.py
rename to mlframe/Backtesting.py
diff --git a/Data.py b/mlframe/Data.py
similarity index 100%
rename from Data.py
rename to mlframe/Data.py
diff --git a/FeatureEngineering.py b/mlframe/FeatureEngineering.py
similarity index 100%
rename from FeatureEngineering.py
rename to mlframe/FeatureEngineering.py
diff --git a/Features.py b/mlframe/Features.py
similarity index 100%
rename from Features.py
rename to mlframe/Features.py
diff --git a/LICENSE b/mlframe/LICENSE
similarity index 100%
rename from LICENSE
rename to mlframe/LICENSE
diff --git a/Models.py b/mlframe/Models.py
similarity index 100%
rename from Models.py
rename to mlframe/Models.py
diff --git a/OldEnsembling.py b/mlframe/OldEnsembling.py
similarity index 100%
rename from OldEnsembling.py
rename to mlframe/OldEnsembling.py
diff --git a/__init__.py b/mlframe/__init__.py
similarity index 100%
rename from __init__.py
rename to mlframe/__init__.py
diff --git a/arrays.py b/mlframe/arrays.py
similarity index 100%
rename from arrays.py
rename to mlframe/arrays.py
diff --git a/baselines.py b/mlframe/baselines.py
similarity index 100%
rename from baselines.py
rename to mlframe/baselines.py
diff --git a/boruta_shap.py b/mlframe/boruta_shap.py
similarity index 100%
rename from boruta_shap.py
rename to mlframe/boruta_shap.py
diff --git a/calibration.py b/mlframe/calibration.py
similarity index 100%
rename from calibration.py
rename to mlframe/calibration.py
diff --git a/cluster.py b/mlframe/cluster.py
similarity index 100%
rename from cluster.py
rename to mlframe/cluster.py
diff --git a/config.py b/mlframe/config.py
similarity index 100%
rename from config.py
rename to mlframe/config.py
diff --git a/core.py b/mlframe/core.py
similarity index 100%
rename from core.py
rename to mlframe/core.py
diff --git a/custom_estimators.py b/mlframe/custom_estimators.py
similarity index 100%
rename from custom_estimators.py
rename to mlframe/custom_estimators.py
diff --git a/eda.py b/mlframe/eda.py
similarity index 100%
rename from eda.py
rename to mlframe/eda.py
diff --git a/ensembling.py b/mlframe/ensembling.py
similarity index 100%
rename from ensembling.py
rename to mlframe/ensembling.py
diff --git a/estimators.py b/mlframe/estimators.py
similarity index 100%
rename from estimators.py
rename to mlframe/estimators.py
diff --git a/evaluation.py b/mlframe/evaluation.py
similarity index 100%
rename from evaluation.py
rename to mlframe/evaluation.py
diff --git a/ewma.py b/mlframe/ewma.py
similarity index 100%
rename from ewma.py
rename to mlframe/ewma.py
diff --git a/experiments.py b/mlframe/experiments.py
similarity index 100%
rename from experiments.py
rename to mlframe/experiments.py
diff --git a/explainability.py b/mlframe/explainability.py
similarity index 100%
rename from explainability.py
rename to mlframe/explainability.py
diff --git a/feature_cleaning.py b/mlframe/feature_cleaning.py
similarity index 100%
rename from feature_cleaning.py
rename to mlframe/feature_cleaning.py
diff --git a/mlframe/feature_engineering/__init__.py b/mlframe/feature_engineering/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlframe/feature_engineering/basic.py b/mlframe/feature_engineering/basic.py
new file mode 100644
index 0000000..c3b995e
--- /dev/null
+++ b/mlframe/feature_engineering/basic.py
@@ -0,0 +1,57 @@
+"""Basic feature engineering for ML."""
+
+# pylint: disable=wrong-import-order,wrong-import-position,unidiomatic-typecheck,pointless-string-statement
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Packages
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from pyutilz.pythonlib import ensure_installed  # lint: disable=ungrouped-imports,disable=wrong-import-order
+
+# ensure_installed("numpy pandas")
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Normal Imports
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from typing import *
+
+import warnings
+
+warnings.simplefilter(action="ignore", category=FutureWarning)
+import pandas as pd, numpy as np
+
+
+def create_date_features(
+    df: pd.DataFrame,
+    cols: list,
+    delete_original_cols: bool = True,
+    bulk: bool = False,
+    methods: dict = {"day": np.int8, "weekday": np.int8, "month": np.int8}, # "week": np.int8, #, "quarter": np.int8 #  , "year": np.int16
+) -> pd.DataFrame:
+    if len(cols) == 0:
+        return
+
+    if bulk:
+        tmp = {}
+    else:
+        tmp = df
+    for col in cols:
+        for method, dtype in methods.items():
+            tmp[col + "_" + method] = getattr(df[col].dt, method).astype(dtype)
+
+    if delete_original_cols:
+        df.drop(columns=cols, inplace=True)
+
+    if bulk:
+        df = pd.concat([df, pd.DataFrame(tmp)], axis=1)
+
+    return df
diff --git a/mlframe/feature_engineering/categorical.py b/mlframe/feature_engineering/categorical.py
new file mode 100644
index 0000000..d9355e0
--- /dev/null
+++ b/mlframe/feature_engineering/categorical.py
@@ -0,0 +1,126 @@
+"""Categorical feature engineering for ML. Optimized & rich set of aggregates for 1d vectors."""
+
+# pylint: disable=wrong-import-order,wrong-import-position,unidiomatic-typecheck,pointless-string-statement
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Packages
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from pyutilz.pythonlib import (
+    ensure_installed,
+)  # lint: disable=ungrouped-imports,disable=wrong-import-order
+
+# ensure_installed("numpy pandas scipy")
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Normal Imports
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from typing import *
+
+import warnings
+
+warnings.simplefilter(action="ignore", category=FutureWarning)
+
+from antropy import *
+import pandas as pd, numpy as np
+from scipy.stats import entropy
+from .numerical import compute_numaggs, get_numaggs_names
+
+import warnings
+
+warnings.filterwarnings("ignore", message="nperseg =")
+
+numaggs_names = get_numaggs_names()
+directional_numaggs_names = get_numaggs_names(directional_only=True)
+
+
+def compute_countaggs(
+    arr: pd.Series,
+    counts_normalize: bool = True,  # use relative or absolute counts
+    counts_compute_numaggs: bool = True,  # compute numerical aggregates over counts data or not
+    counts_top_n: int = 1,  # return that many highest/lowest value counts
+    counts_return_top_counts: bool = True,  # return top counts
+    counts_return_top_values: bool = True,  # return top values
+    counts_compute_values_numaggs: bool = False,  # if all values are in fact numerical, compute numaggs for them rather than their counts (ordered only, in order of their counts)
+    numerical_kwargs: dict = dict(return_unsorted_stats=False),
+):
+    """For some variables, especially with many repeated values, or categorical, we can do value_counts(normalize=True or False). Further we can return
+    1) Top N highest/lowest values along with their counts (missing are padded with NaNs)
+    2) numaggs over counts data
+    3) if variable is numeric, numaggs(timeseries_features=True) for values series sorted by counts (timeseries_features=True leaves only aggregates depending on the order of values,
+        'cause otherwise it's simply a duplication of num_aggs over regular series)
+    """
+    value_counts = arr.value_counts(normalize=counts_normalize)
+    value_counts=value_counts[value_counts > 0]
+    values = value_counts.index.values
+    counts = value_counts.values
+
+    res = []
+
+    if counts_compute_numaggs:
+        res.extend(compute_numaggs(arr=counts, **numerical_kwargs))
+
+    if counts_top_n:
+
+        if len(counts) >= counts_top_n:
+            extra = []
+        else:
+            extra = [np.nan] * (counts_top_n - len(counts))
+
+        if counts_return_top_counts:
+            res.extend(counts[:counts_top_n].tolist() + extra)
+            res.extend(extra + counts[-counts_top_n:].tolist())
+        if counts_return_top_values:
+            res.extend(values[:counts_top_n].tolist() + extra)
+            res.extend(extra + values[-counts_top_n:].tolist())
+
+    if counts_compute_values_numaggs:
+        if pd.api.types.is_numeric_dtype(values):
+            processed_numerical_kwargs = numerical_kwargs.copy()
+            processed_numerical_kwargs["directional_only"] = True
+            res.extend(compute_numaggs(arr=values, **processed_numerical_kwargs))
+        else:
+            res.extend([np.nan] * len(directional_numaggs_names))
+
+    return res
+
+
+def get_countaggs_names(
+    counts_normalize: bool = True,  # use relative or absolute counts
+    counts_compute_numaggs: bool = True,  # compute numerical aggregates over counts data or not
+    counts_top_n: int = 1,  # return that many highest/lowest value counts
+    counts_return_top_counts: bool = True,  # return top counts
+    counts_return_top_values: bool = True,  # return top values
+    counts_compute_values_numaggs: bool = True,  # if all values are in fact numerical, compute numaggs for them rather than their counts (ordered only, in order of their counts)
+    numerical_kwargs: dict = dict(return_unsorted_stats=False),
+) -> list:
+
+    res = []
+
+    if counts_compute_numaggs:
+        res.extend([feat + "_" + ("cntnrm" if counts_normalize else "cnt") for feat in get_numaggs_names(**numerical_kwargs)])
+
+    if counts_top_n:
+        if counts_return_top_counts:
+            res.extend(["top_" + str(i + 1) + "_vcnt" for i in range(counts_top_n)])
+            res.extend(["btm_" + str(counts_top_n - i) + "_vcnt" for i in range(counts_top_n)])
+        if counts_return_top_values:
+            res.extend(["top_" + str(i + 1) + "_vval" for i in range(counts_top_n)])
+            res.extend(["btm_" + str(counts_top_n - i) + "_vval" for i in range(counts_top_n)])
+
+    if counts_compute_values_numaggs:
+        # if pd.api.types.is_numeric_dtype(values):
+        processed_numerical_kwargs = numerical_kwargs.copy()
+        processed_numerical_kwargs["directional_only"] = True
+        res.extend([feat + "_vvls" for feat in directional_numaggs_names])
+
+    return res
diff --git a/mlframe/feature_engineering/hurst.py b/mlframe/feature_engineering/hurst.py
new file mode 100644
index 0000000..ad65efd
--- /dev/null
+++ b/mlframe/feature_engineering/hurst.py
@@ -0,0 +1,124 @@
+"""Compute the Hurst Exponent of an 1D array by the means of R/S analisys:
+    
+    https://en.wikipedia.org/wiki/Hurst_exponent
+"""
+
+# pylint: disable=wrong-import-order,wrong-import-position,unidiomatic-typecheck,pointless-string-statement
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Packages
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from pyutilz.pythonlib import (
+    ensure_installed,
+)  # lint: disable=ungrouped-imports,disable=wrong-import-order
+
+ensure_installed("numpy pandas numba scipy sklearn antropy")
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Normal Imports
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from typing import *
+import numpy as np
+from numba import njit
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Inits
+# ----------------------------------------------------------------------------------------------------------------------------
+
+fastmath = False
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Core funcs
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+@njit(fastmath=fastmath)
+def compute_hurst_rs(arr: np.ndarray, agg_func: object = np.mean):
+    """Computes R/S stat for a single window."""
+
+    mean = agg_func(arr)
+
+    deviations = arr - mean
+    Z = np.cumsum(deviations)
+    R = np.max(Z) - np.min(Z)
+    S = np.std(arr)  # , ddof=1
+
+    if R == 0 or S == 0:
+        return 0.0  # to skip this interval due the undefined R/S ratio
+
+    return R / S
+
+
+@njit(fastmath=fastmath)
+def precompute_hurst_exponent(
+    arr: np.ndarray, min_window: int = 5, max_window: int = None, windows_log_step: float = 0.25, take_diffs: bool = True, agg_func: object = np.mean
+):
+    """Computes R/S stat for a single window."""
+
+    # Get diffs, if needed
+
+    if take_diffs:
+        arr = arr[1:] - arr[:-1]
+
+    L = len(arr)
+
+    # Split parent array several times into a number of equal chunks, increasing the chunk length
+
+    max_window = max_window or (L - 1)
+    window_sizes = (10 ** np.arange(np.log10(min_window), np.log10(max_window), windows_log_step)).astype(np.int32)
+    # window_sizes.append(L)
+
+    RS = []
+    used_window_sizes = []
+    for w in window_sizes:
+        rs = []
+        for start in range(0, L, w):
+            if (start + w) >= L:
+                break
+            partial_rs = compute_hurst_rs(arr[start : start + w])  # , agg_func=agg_func)
+            if partial_rs:
+                rs.append(partial_rs)
+        if rs:
+            RS.append(agg_func(np.array(rs)))
+            used_window_sizes.append(w)
+
+    return used_window_sizes, RS
+
+
+def compute_hurst_exponent(arr: np.ndarray, min_window: int = 5, max_window: int = None, windows_log_step: float = 0.25, take_diffs: bool = False)->tuple:
+    """Main enrtypoint to compute a Hurst Exponent (and the constant) of a numerical array."""
+    if len(arr) < min_window:
+        return np.nan, np.nan
+    window_sizes, rs = precompute_hurst_exponent(
+        arr=arr, min_window=min_window, max_window=max_window, windows_log_step=windows_log_step, take_diffs=take_diffs
+    )
+    x = np.vstack([np.log10(window_sizes), np.ones(len(rs))]).T
+    h, c = np.linalg.lstsq(x, np.log10(rs), rcond=-1)[0]
+    c = 10**c
+    return h, c
+
+
+def hurst_testing():
+
+    # pip install hurst
+
+    from hurst import random_walk
+
+    brownian = random_walk(1000, proba=0.5)
+    print(compute_hurst_exponent(np.array(brownian)))
+
+    persistent = random_walk(1000, proba=0.7)
+    print(compute_hurst_exponent(np.array(persistent)))
+
+    antipersistent = random_walk(1000, proba=0.3)
+    print(compute_hurst_exponent(np.array(antipersistent)))
diff --git a/mlframe/feature_engineering/numerical.py b/mlframe/feature_engineering/numerical.py
new file mode 100644
index 0000000..4e61f6e
--- /dev/null
+++ b/mlframe/feature_engineering/numerical.py
@@ -0,0 +1,1032 @@
+"""Numerical feature engineering for ML. Optimized & rich set of aggregates for 1d vectors."""
+
+# pylint: disable=wrong-import-order,wrong-import-position,unidiomatic-typecheck,pointless-string-statement
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Packages
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from pyutilz.pythonlib import (
+    ensure_installed,
+)  # lint: disable=ungrouped-imports,disable=wrong-import-order
+
+ensure_installed("numpy numba sklearn antropy astropy entropy_estimators")  # npeet?
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Normal Imports
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from typing import *
+
+import numba
+import numpy as np, pandas as pd
+from antropy import *
+from astropy.stats import histogram
+from entropy_estimators import continuous
+from sklearn.feature_selection import mutual_info_regression
+from mlframe.feature_engineering.hurst import compute_hurst_exponent
+from pyutilz.parallel import parallel_run
+
+from scipy.stats import entropy, kstest
+from scipy import stats
+
+import warnings
+
+warnings.filterwarnings("ignore", message="nperseg =")
+warnings.simplefilter(action="ignore", category=FutureWarning)
+warnings.simplefilter(action="ignore", category=RuntimeWarning)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Inits
+# ----------------------------------------------------------------------------------------------------------------------------
+
+fastmath = False
+empty_float32_array = np.array([], dtype=np.float32)
+
+
+def cont_entropy(arr: np.ndarray, bins: str = "scott") -> float:
+    """Entropy of a continuous distribution"""
+    try:
+        hist, bin_edges = histogram(arr, bins=bins)  # np.histogram(arr, bins=bins, density=True)
+        ent = -(hist * np.log(hist + 1e-60)).sum()
+    except Exception as e:
+        return np.nan
+    return ent
+
+
+entropy_funcs = (cont_entropy, continuous.get_h, app_entropy, svd_entropy, sample_entropy, petrosian_fd, perm_entropy, katz_fd, detrended_fluctuation)  #
+entropy_funcs_names = [f.__name__ for f in entropy_funcs]
+
+distributions = (stats.levy_l, stats.logistic, stats.pareto)  # stats.logistic, stats.pareto
+default_dist_responses = dict(levy_l=[np.nan, np.nan], logistic=[np.nan, np.nan], pareto=[np.nan, np.nan, np.nan])
+
+LARGE_CONST = 1e3
+
+
+def get_distributions_features_names() -> list:
+    distributions_features_names = []
+    for dist in distributions:
+        for i in range(len(default_dist_responses[dist.name])):
+            distributions_features_names.append(dist.name + str(i + 1))
+        distributions_features_names.append(dist.name + "_kss")
+        distributions_features_names.append(dist.name + "_kspval")
+    return distributions_features_names
+
+
+distributions_features_names = get_distributions_features_names()
+
+default_quantiles: list = [0.1, 0.25, 0.5, 0.75, 0.9]  # list vs ndarray gives advantage 125 µs ± 2.79 µs per loop vs 140 µs ± 8.11 µs per loop
+
+
+@numba.njit(fastmath=fastmath)
+def compute_simple_stats_numba(arr: np.ndarray) -> tuple:
+    minval, maxval, argmin, argmax = arr[0], arr[0], 0, 0
+    size = 0
+    total, std_val = 0.0, 0.0
+
+    for i, next_value in enumerate(arr):
+        if np.isfinite(next_value):
+            size += 1
+            total += next_value
+            if next_value < minval:
+                minval = next_value
+                argmin = i
+            elif next_value > maxval:
+                maxval = next_value
+                argmax = i
+
+    if size == 0:
+        size = len(arr)
+
+    if size:
+        mean_value = total / size
+    else:
+        mean_value = 0.0
+        minval, maxval = 0.0, 0.0
+
+    for i, next_value in enumerate(arr):
+        if np.isfinite(next_value):
+            d = next_value - mean_value
+            summand = d * d
+            std_val = std_val + summand
+    if size:
+        std_val = np.sqrt(std_val / size)
+    else:
+        std_val = 0.0
+    return minval, maxval, argmin, argmax, mean_value, std_val
+
+
+def get_simple_stats_names() -> list:
+    return "min,max,argmin,argmax,mean,std".split(",")
+
+
+@numba.njit(fastmath=fastmath)
+def compute_numerical_aggregates_numba(
+    arr: np.ndarray,
+    weights: np.ndarray = None,
+    geomean_log_mode: bool = False,
+    directional_only: bool = False,
+    whiten_means: bool = True,
+    return_drawdown_stats: bool = False,
+    return_profit_factor: bool = False,
+    return_n_zer_pos_int: bool = True,
+    return_exotic_means: bool = True,
+    return_unsorted_stats: bool = True,
+) -> list:
+    """Compute statistical aggregates over 1d array of float32 values.
+    E mid2(abs(x-mid1(X))) where mid1, mid2=averages of any kind
+    E Функции ошибок иногда и классные признаки...
+    V What happens first: min or max? Add relative percentage of min/max indices
+    V Add absolute values of min/max indices?
+    V Добавить количество пересечений средних и медианного значений, линии slope? (trend reversions)
+        Хотя это можно в т.ч. получить, вызвав стату над нормированным или детрендированным рядом (x-X_avg) или (x-(slope*x+x[0]))
+    V убрать гэпы. это статистика второго порядка и должна считаться отдельно. причем можно считать от разностей или от отношений.
+    V взвешенные статы считать отдельным вызовом ( и не только среднеарифметические, а ВСЕ).
+    Добавить
+        V среднее кубическое,
+        V entropy
+        V hurst
+        V R2
+        E? среднее винзоризированное (https://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BD%D0%B7%D0%BE%D1%80%D0%B8%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%BD%D0%BE%D0%B5_%D1%81%D1%80%D0%B5%D0%B4%D0%BD%D0%B5%D0%B5).
+        E? усечённое,
+        E? tukey mean
+        V fit variable to a number of known distributions!! their params become new features
+        V drawdowns, negative drawdowns (for shorts), dd duration (%)
+        V Number of MAX/MIN refreshers during period
+        V numpeaks
+    """
+
+    size = len(arr)
+
+    first = arr[0]
+    last = arr[-1]
+    if first:
+        last_to_first = last / first
+    else:
+        last_to_first = LARGE_CONST * np.sign(last)
+
+    if directional_only:
+        arithmetic_mean = np.mean(arr)
+        return [arithmetic_mean, last_to_first]
+
+    ninteger, npositive, cnt_nonzero = 0, 0, 0
+    sum_positive, sum_negative = 0.0, 0.0
+
+    if not geomean_log_mode:
+        geometric_mean = 1.0
+    else:
+        geometric_mean = 0.0
+
+    arithmetic_mean, quadratic_mean, qubic_mean, harmonic_mean = 0.0, 0.0, 0.0, 0.0
+    weighted_arithmetic_mean = 0.0
+    if weights is not None:
+        weighted_geometric_mean, weighted_arithmetic_mean, weighted_quadratic_mean, weighted_qubic_mean, weighted_harmonic_mean = (
+            geometric_mean,
+            arithmetic_mean,
+            quadratic_mean,
+            qubic_mean,
+            harmonic_mean,
+        )
+        sum_weights = 0.0
+
+    maximum, minimum = first, first
+    max_index, min_index = 0, 0
+
+    if return_drawdown_stats:
+
+        pos_dd_start_idx, neg_dd_start_idx = 0, 0
+
+        pos_dds = np.empty(shape=size, dtype=np.float32)
+        pos_dd_durs = np.empty(shape=size, dtype=np.float32)
+
+        neg_dds = np.empty(shape=size, dtype=np.float32)
+        neg_dd_durs = np.empty(shape=size, dtype=np.float32)
+
+    nmaxupdates, nminupdates = 0, 0
+
+    n_last_crossings, n_last_touches = 0, 0
+    prev_d = None
+
+    for i, next_value in enumerate(arr):
+        if weights is not None:
+            next_weight = weights[i]
+            sum_weights += weights[i]
+            if weights is not None:
+                weighted_arithmetic_mean += next_value * next_weight
+
+        if return_unsorted_stats:
+            d = next_value - last
+            if prev_d is not None:
+                mul = d * prev_d
+                if mul < 0:
+                    n_last_crossings += 1
+                elif mul == 0.0:
+                    n_last_touches += 1
+            else:
+                if next_value == last:
+                    n_last_touches += 1
+            prev_d = d
+
+        arithmetic_mean += next_value
+
+        if return_exotic_means:
+            temp_value = next_value * next_value
+            quadratic_mean += temp_value
+            if weights is not None:
+                weighted_quadratic_mean += temp_value * next_weight
+
+            temp_value = temp_value * next_value
+            qubic_mean += temp_value
+            if weights is not None:
+                weighted_qubic_mean += temp_value * next_weight
+
+        if next_value > maximum:
+            maximum = next_value
+            max_index = i
+            nmaxupdates += 1
+        elif next_value < minimum:
+            minimum = next_value
+            min_index = i
+            nminupdates += 1
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Drawdowns
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        if return_drawdown_stats:
+            # pos
+            dd = maximum - next_value
+            pos_dds[i] = dd
+            if dd == 0.0:
+                pos_dd_start_idx = i
+            pos_dd_durs[i] = i - pos_dd_start_idx
+
+            # neg
+            dd = next_value - minimum
+            neg_dds[i] = dd
+            if dd == 0.0:
+                neg_dd_start_idx = i
+            neg_dd_durs[i] = i - neg_dd_start_idx
+
+        if next_value:
+            cnt_nonzero = cnt_nonzero + 1
+            if return_exotic_means:
+                addend = 1 / next_value
+                harmonic_mean += addend
+                if weights is not None:
+                    weighted_harmonic_mean += next_weight * addend
+
+            if return_n_zer_pos_int:
+                frac = next_value % 1
+                if not frac:
+                    ninteger = ninteger + 1
+
+            if next_value > 0:
+                npositive = npositive + 1
+                sum_positive += next_value
+                if return_exotic_means:
+                    if not geomean_log_mode:
+                        geometric_mean *= next_value
+                        if weights is not None:
+                            weighted_geometric_mean *= next_value**next_weight
+                        if geometric_mean > 1e100 or geometric_mean < 1e-100:
+                            # convert to log mode
+                            geomean_log_mode = True
+                            geometric_mean = np.log(float(geometric_mean))
+                            if weights is not None:
+                                weighted_geometric_mean = np.log(float(weighted_geometric_mean))
+                    else:
+                        addend = np.log(next_value)
+                        geometric_mean += addend
+                        if weights is not None:
+                            weighted_geometric_mean += next_weight * addend
+            else:
+                sum_negative += next_value
+
+    arithmetic_mean = arithmetic_mean / size
+    if weights is not None:
+        weighted_arithmetic_mean = weighted_arithmetic_mean / sum_weights
+
+    if return_exotic_means:
+        quadratic_mean = np.sqrt(quadratic_mean / size)
+        qubic_mean = (qubic_mean / size) ** (1 / 3)
+        if npositive:
+            if not geomean_log_mode:
+                geometric_mean = geometric_mean ** (1 / size)
+            else:
+                geometric_mean = np.exp(geometric_mean / size)
+        else:
+            geometric_mean = np.nan
+        if harmonic_mean:
+            harmonic_mean = size / harmonic_mean
+        else:
+            harmonic_mean = np.nan
+
+        if weights is not None:
+            weighted_quadratic_mean = np.sqrt(weighted_quadratic_mean / sum_weights)
+            weighted_qubic_mean = (weighted_qubic_mean / sum_weights) ** (1 / 3)
+            if npositive and sum_weights != 0.0:
+                if not geomean_log_mode:
+                    weighted_geometric_mean = weighted_geometric_mean ** (1 / sum_weights)
+                else:
+                    weighted_geometric_mean = np.exp(weighted_geometric_mean / sum_weights)
+                if weighted_geometric_mean == np.inf:
+                    weighted_geometric_mean = 0.0
+            else:
+                weighted_geometric_mean = np.nan
+            if weighted_harmonic_mean:
+                weighted_harmonic_mean = sum_weights / weighted_harmonic_mean
+            else:
+                weighted_harmonic_mean = np.nan
+
+        if whiten_means:
+            quadratic_mean = quadratic_mean - arithmetic_mean
+            qubic_mean = qubic_mean - arithmetic_mean
+            geometric_mean = geometric_mean - arithmetic_mean
+            harmonic_mean = harmonic_mean - arithmetic_mean
+            if weights is not None:
+                weighted_quadratic_mean = weighted_quadratic_mean - weighted_arithmetic_mean
+                weighted_qubic_mean = weighted_qubic_mean - weighted_arithmetic_mean
+                weighted_geometric_mean = weighted_geometric_mean - weighted_arithmetic_mean
+                weighted_harmonic_mean = weighted_harmonic_mean - weighted_arithmetic_mean
+
+    res = [arithmetic_mean]
+    if weights is not None:
+        res.append(weighted_arithmetic_mean)
+    res.extend(
+        (
+            minimum,
+            maximum,
+        )
+    )  # cant combine with the next statement as it's failing on interger inputs due to tuple dtypes mismatch
+    res.extend(
+        (
+            arithmetic_mean / first if first else LARGE_CONST * np.sign(arithmetic_mean),
+            first / maximum if maximum else LARGE_CONST * np.sign(first),
+            minimum / first if first else LARGE_CONST * np.sign(minimum),
+            last_to_first,
+        )
+    )
+
+    if return_unsorted_stats:  # must be false for arrays known to be sorted
+        res.extend(((min_index + 1) / size if size else 0, (max_index + 1) / size if size else 0))
+        res.extend((nmaxupdates, nminupdates, n_last_crossings, n_last_touches - 1))
+
+    if return_exotic_means:
+        res.extend((quadratic_mean, qubic_mean, geometric_mean, harmonic_mean))
+
+    if return_n_zer_pos_int:
+        res.extend((cnt_nonzero, npositive, ninteger))
+
+    if weights is not None:
+        if return_exotic_means:
+            res.extend((weighted_quadratic_mean, weighted_qubic_mean, weighted_geometric_mean, weighted_harmonic_mean))
+
+    if return_profit_factor:
+        profit_factor = sum_positive / -sum_negative if sum_negative != 0.0 else (0.0 if sum_positive == 0.0 else LARGE_CONST)
+        res.append(profit_factor)
+
+    if return_drawdown_stats:
+        res.extend(
+            compute_numerical_aggregates_numba(
+                arr=pos_dds[1:],
+                weights=weights if weights is None else weights[1:],
+                geomean_log_mode=geomean_log_mode,
+                directional_only=directional_only,
+                whiten_means=whiten_means,
+                return_drawdown_stats=False,
+                return_profit_factor=False,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+        res.extend(
+            compute_numerical_aggregates_numba(
+                arr=pos_dd_durs[1:] / (size - 1),
+                weights=weights if weights is None else weights[1:],
+                geomean_log_mode=geomean_log_mode,
+                directional_only=directional_only,
+                whiten_means=whiten_means,
+                return_drawdown_stats=False,
+                return_profit_factor=False,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+        res.extend(
+            compute_numerical_aggregates_numba(
+                arr=neg_dds[1:],
+                weights=weights if weights is None else weights[1:],
+                geomean_log_mode=geomean_log_mode,
+                directional_only=directional_only,
+                whiten_means=whiten_means,
+                return_drawdown_stats=False,
+                return_profit_factor=False,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+        res.extend(
+            compute_numerical_aggregates_numba(
+                arr=neg_dd_durs[1:] / (size - 1),
+                weights=weights if weights is None else weights[1:],
+                geomean_log_mode=geomean_log_mode,
+                directional_only=directional_only,
+                whiten_means=whiten_means,
+                return_drawdown_stats=False,
+                return_profit_factor=False,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+
+    return res
+
+
+def get_basic_feature_names(
+    weights: np.ndarray = None,
+    whiten_means: bool = True,
+    return_drawdown_stats: bool = False,
+    return_profit_factor: bool = False,
+    return_n_zer_pos_int: bool = True,
+    return_exotic_means: bool = True,
+    return_unsorted_stats: bool = True,
+):
+    basic_fields = ["arimean"]
+    if weights is not None:
+        basic_fields.append("warimean")
+    basic_fields.extend("min,max,arimean_to_first,first_to_max,min_to_first,last_to_first".split(","))
+
+    if return_unsorted_stats:  # must be false for arrays known to be sorted
+        basic_fields.append("minr")
+        basic_fields.append("maxr")
+        basic_fields.append("nmaxupdates")
+        basic_fields.append("nminupdates")
+        basic_fields.append("lastcross")
+        basic_fields.append("lasttouch")
+
+    if return_exotic_means:
+        basic_fields.extend(("quadmean,qubmean,geomean,harmmean" if not whiten_means else "quadmeanw,qubmeanw,geomeanw,harmmeanw").split(","))
+
+    if return_n_zer_pos_int:
+        basic_fields.append("nonzero")
+        basic_fields.append("npos")
+        basic_fields.append("nint")
+
+    if weights is not None:
+        if return_exotic_means:
+            basic_fields.extend(("wquadmean,wqubmean,wgeomean,wharmmean" if not whiten_means else "wquadmeanw,wqubmeanw,wgeomeanw,wharmmeanw").split(","))
+
+    res = basic_fields.copy()
+
+    if return_profit_factor:
+        res.append("profit_factor")
+
+    if return_drawdown_stats:
+        for var in "pos_dd pos_dd_dur neg_dd neg_dd_dur".split():
+            res.extend([var + "_" + field for field in basic_fields])
+
+    return res
+
+
+def compute_nunique_modes_quantiles_numpy(
+    arr: np.ndarray, q: list = default_quantiles, quantile_method: str = "median_unbiased", max_modes: int = 10, return_unsorted_stats: bool = True
+) -> list:
+    """For a 1d array, computes aggregates:
+    nunique
+    modes:min,max,mean
+    list of quantiles (0 and 1 included by default, therefore, min/max)
+    number of quantiles crossings
+    Can NOT be numba jitted (yet).
+    """
+    if return_unsorted_stats:
+        vals, counts = np.unique(arr, return_counts=True)
+
+        max_modes = min(max_modes, len(counts))
+
+        modes_indices = np.argpartition(counts, -max_modes)[-max_modes:]
+        modes_indices = modes_indices[np.argsort(counts[modes_indices])][::-1]
+
+        first_mode_count = counts[modes_indices[0]]
+
+        if first_mode_count == 1:
+            modes_min, modes_max, modes_mean, modes_qty = (
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            )  # for higher stability. cnt=1 is not really a mode, rather a random pick.
+        else:
+            next_idx = modes_indices[0]
+            best_modes = [vals[next_idx]]
+            for i in range(1, max_modes):
+                next_idx = modes_indices[i]
+                next_mode_count = counts[next_idx]
+                if next_mode_count < first_mode_count:
+                    break
+                else:
+                    best_modes.append(vals[next_idx])
+            best_modes = np.asarray(best_modes)
+            modes_min = best_modes.min()
+            modes_max = best_modes.max()
+            modes_mean = best_modes.mean()
+            modes_qty = len(best_modes)
+
+        nuniques = len(vals)
+
+        res = (nuniques, modes_min, modes_max, modes_mean, modes_qty)
+    else:
+        res = ()
+
+    quantiles = np.quantile(arr, q, method=quantile_method)
+    res = res + tuple(quantiles)  # .tolist()
+
+    if return_unsorted_stats:
+        res = res + tuple(compute_ncrossings(arr=arr, marks=quantiles))  # .tolist()
+
+    return res
+
+
+def compute_ncrossings(arr: np.ndarray, marks: np.ndarray, dtype=np.int32) -> np.ndarray:
+    n_crossings = np.zeros(len(marks), dtype=dtype)
+    prev_ds = np.full(len(marks), dtype=np.float32, fill_value=np.nan)
+
+    for next_value in arr:
+        for i, mark in enumerate(marks):
+            d = next_value - mark
+            if prev_ds[i] is not None:
+                if d * prev_ds[i] < 0:
+                    n_crossings[i] += 1
+            prev_ds[i] = d
+
+    return n_crossings
+
+
+@numba.njit(fastmath=fastmath)
+def compute_nunique_mode_quantiles_numba(arr: np.ndarray, q: list = default_quantiles) -> tuple:
+    """
+    NOT RECOMMENDED. use compute_nunique_modes_quantiles_numpy instead, it's faster and more functional.
+    numUnique and mode calculation from sorted array
+    CAN be numba jitted.
+    """
+    xsorted = np.sort(arr)
+
+    next_unique_value = xsorted[0]
+    numUnique = 1
+    mode = next_unique_value
+    best_count = 1
+    times_occured = 0
+    for next_value in xsorted:
+        if next_value == next_unique_value:
+            times_occured = times_occured + 1
+        else:
+            numUnique = numUnique + 1
+            if times_occured > best_count:
+                best_count = times_occured
+                mode = next_unique_value
+            next_unique_value = next_value
+            times_occured = 1
+    if times_occured > best_count:
+        best_count = times_occured
+        mode = next_unique_value
+
+    factor = len(arr)
+    quantiles = []
+    for q in q:
+        quantiles.append(xsorted[int(np.ceil(q * factor)) - 1])
+
+    # if size % 2 == 0:
+    #    sent = int(size / 2)
+    #    median = xsorted[sent - 1] + xsorted[sent]
+    # else:
+    #    median = xsorted[int(size // 2)]
+
+    if times_occured == 1:
+        mode = np.nan
+    return numUnique, mode, quantiles
+
+
+@numba.njit(fastmath=fastmath)
+def compute_moments_slope_mi(
+    arr: np.ndarray,
+    mean_value: float,
+    weights: np.ndarray = None,
+    weighted_mean_value: float = None,
+    xvals: np.ndarray = None,  # empty_float32_array,
+    directional_only: bool = False,
+    return_lintrend_approx_stats: bool = True,
+) -> list:
+    """Добавить:
+    ? RANSAC-регрессию или что-то подобное, устойчивое к выбросам?
+    V Количество пересечений не просто среднего, а ещё и квантилей. Для финансовых приложений это мб полезно, тк есть гипотеза,
+        что если цена тестирует уровень много раз в течение дня, она его в итоге пробъёт (https://youtu.be/2DrBc35VLvE?t=129).
+    ? Можно для начала добавить отношение крайних квантилей к макс/мин.
+    """
+    slope_over, slope_under = 0.0, 0.0
+    mad, std, skew, kurt = 0.0, 0.0, 0.0, 0.0
+    if weights is not None:
+        sum_weights = 0.0
+        weighted_mad, weighted_std, weighted_skew, weighted_kurt = mad, std, skew, kurt
+
+    size = len(arr)
+
+    if xvals is None:  # len(xvals)==0:
+        xvals = np.arange(size, dtype=np.float32)
+        # xvals_mean = 1 / 2 * (2 * 0 + (size - 1) * 1)
+    xvals_mean = np.mean(xvals)
+
+    n_mean_crossings = 0.0
+    prev_d = None
+
+    r_sum = 0.0
+
+    for i, next_value in enumerate(arr):
+
+        sl_x = xvals[i] - xvals_mean
+
+        slope_over += sl_x * next_value
+        slope_under += sl_x**2
+
+        d = next_value - mean_value
+        r_sum += sl_x * d
+
+        if prev_d is not None:
+            if d * prev_d < 0:
+                n_mean_crossings += 1
+        prev_d = d
+
+        mad += abs(d)
+        if weights is not None:
+            next_weight = weights[i]
+            w_d = next_value - weighted_mean_value
+            sum_weights += next_weight
+            weighted_mad += abs(w_d) * next_weight
+
+        summand = d * d
+        std += summand
+        if weights is not None:
+            w_summand = w_d * w_d
+            weighted_std += w_summand * next_weight
+
+        if not directional_only:
+
+            summand = summand * d
+            skew += summand
+            if weights is not None:
+                w_summand = w_summand * w_d
+                weighted_skew += w_summand * next_weight
+
+            kurt += summand * d
+            if weights is not None:
+                weighted_skew += w_summand * w_d * next_weight
+
+    # mi = mutual_info_regression(xvals.reshape(-1, 1), arr, n_neighbors=2)  # n_neighbors=2 is strictly needed for short sequences
+
+    std = np.sqrt(std / size)
+    if weights is not None:
+        weighted_std = np.sqrt(weighted_std / sum_weights)
+
+    if not directional_only:
+        mad = mad / size
+
+        if std == 0:
+            skew, kurt = 0.0, 0.0
+        else:
+            factor = size * std**3
+            if factor:
+                skew = skew / factor
+
+                factor = factor * std
+                kurt = kurt / factor - 3.0
+
+        if weights is not None:
+            weighted_mad = weighted_mad / sum_weights
+            if weighted_std == 0:
+                weighted_skew, weighted_kurt = 0.0, 0.0
+            else:
+                factor = size * weighted_std**3
+                if factor:
+                    weighted_skew = weighted_skew / factor
+
+                    factor = factor * weighted_std
+                    weighted_kurt = weighted_kurt / factor - 3.0
+
+    if np.isclose(slope_under, 0) or np.isnan(slope_under):
+        r = 0.0
+        slope = np.nan
+        intercept = np.nan
+        n_lintrend_crossings = np.nan
+    else:
+        slope = slope_over / slope_under
+
+        # R-value
+        if np.isclose(std, 0):
+            r = 0
+        else:
+            r = r_sum / (np.sqrt(slope_under) * std * np.sqrt(size))
+            # Test for numerical error propagation (make sure -1 < r < 1)
+            if r > 1.0:
+                r = 1.0
+            elif r < -1.0:
+                r = -1.0
+
+        # slope crossings & trend approximation errors
+
+        prev_d = None
+        intercept = mean_value - slope * xvals_mean
+        n_lintrend_crossings = 0.0
+
+        if return_lintrend_approx_stats:
+            lintrend_data_diffs = np.empty_like(arr)
+        else:
+            lintrend_data_diffs = None
+
+        for i, next_value in enumerate(arr):
+            d = next_value - (slope * xvals[i] + intercept)
+            if prev_d is not None:
+                if d * prev_d < 0:
+                    n_lintrend_crossings += 1
+            prev_d = d
+            if return_lintrend_approx_stats:
+                lintrend_data_diffs[i] = d
+
+    res = []
+    if not directional_only:
+        res.extend((mad, std, skew, kurt))
+        if weights is not None:
+            res.extend((weighted_mad, weighted_std, weighted_skew, weighted_kurt))
+    res.extend((slope, intercept, r, n_mean_crossings, n_lintrend_crossings))
+    return res, lintrend_data_diffs
+
+
+def compute_mutual_info_regression(arr: np.ndarray, xvals: np.ndarray = np.array([], dtype=np.float32)) -> float:
+    if len(xvals):
+        mi = mutual_info_regression(xvals.reshape(-1, 1), arr, n_neighbors=2)
+    else:
+        mi = mutual_info_regression(np.arange(len(arr)).reshape(-1, 1), arr, n_neighbors=2)
+
+    return mi[0]
+
+
+def compute_entropy_features(arr: np.ndarray, sampling_frequency: int = 100, spectral_method: str = "welch") -> list:
+    # hjorth_mobility, hjorth_complexity = hjorth_params(arr)
+    # hjorth_mobility,
+    # hjorth_complexity,
+    # spectral_entropy(arr, sf=sampling_frequency, method=spectral_method),)
+    # num_zerocross(arr),
+
+    nonzero = (~np.isnan(arr)).sum()
+    if nonzero < 10:
+        return (0.0,) * len(entropy_funcs)
+    else:
+        # safe_arr = arr[~np.isnan(arr)]
+        return tuple(np.nan_to_num([f(arr) for f in entropy_funcs], posinf=0, neginf=0))
+
+
+def fit_distribution(dist: object, data: np.ndarray, method: str = "mle"):
+    try:
+        params = dist.fit(data, method=method)
+    except Exception as e:
+        return default_dist_responses[dist.name] + [np.nan, np.nan]
+    else:
+        dist_fitted = dist(*params)
+        ks_stat, ks_pval = kstest(data, dist_fitted.cdf)
+
+        return tuple(np.nan_to_num([*params, ks_stat, ks_pval], posinf=0, neginf=0))
+
+
+def compute_distributional_features(arr: np.ndarray) -> tuple:
+    res = tuple()
+    for dist in distributions:
+        res = res + fit_distribution(dist=dist, data=arr)
+    return res
+
+
+def compute_numaggs(
+    arr: np.ndarray,
+    xvals: np.ndarray = None,
+    weights: np.ndarray = None,
+    geomean_log_mode: bool = False,
+    q: list = default_quantiles,
+    quantile_method: str = "median_unbiased",
+    max_modes: int = 10,
+    sampling_frequency: int = 100,
+    spectral_method: str = "welch",
+    hurst_kwargs: dict = dict(min_window=10, max_window=None, windows_log_step=0.25, take_diffs=False),
+    directional_only: bool = False,
+    whiten_means: bool = True,
+    return_distributional: bool = False,
+    return_entropy: bool = True,
+    return_hurst: bool = True,
+    return_float32: bool = True,
+    return_profit_factor: bool = False,
+    return_drawdown_stats: bool = False,
+    return_n_zer_pos_int: bool = True,
+    return_exotic_means: bool = True,
+    return_unsorted_stats: bool = True,
+    return_lintrend_approx_stats: bool = False,
+):
+    """Compute a plethora of numerical aggregates for all values in an array.
+    Converts an arbitrarily length array into fixed number of aggregates.
+    """
+    if len(arr) == 0:
+        return (np.nan,) * len(
+            get_numaggs_names(
+                q=q,
+                directional_only=directional_only,
+                whiten_means=whiten_means,
+                return_distributional=return_distributional,
+                return_entropy=return_entropy,
+                return_hurst=return_hurst,
+                return_profit_factor=return_profit_factor,
+                return_drawdown_stats=return_drawdown_stats,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+
+    res = compute_numerical_aggregates_numba(
+        arr,
+        weights=weights,
+        geomean_log_mode=geomean_log_mode,
+        directional_only=directional_only,
+        whiten_means=whiten_means,
+        return_profit_factor=return_profit_factor,
+        return_drawdown_stats=return_drawdown_stats,
+        return_n_zer_pos_int=return_n_zer_pos_int,
+        return_exotic_means=return_exotic_means,
+        return_unsorted_stats=return_unsorted_stats,
+    )
+
+    arithmetic_mean = res[0]
+    if weights is not None:
+        weighted_arithmetic_mean = res[1]
+    else:
+        weighted_arithmetic_mean = 0.0
+
+    res = tuple(res)
+    if not directional_only:
+        res = res + compute_nunique_modes_quantiles_numpy(
+            arr=arr, q=q, quantile_method=quantile_method, max_modes=max_modes, return_unsorted_stats=return_unsorted_stats
+        )
+
+    moments_slope_mi, lintrend_data_diffs = compute_moments_slope_mi(
+        arr=arr,
+        weights=weights,
+        mean_value=arithmetic_mean,
+        weighted_mean_value=weighted_arithmetic_mean,
+        xvals=xvals,
+        directional_only=directional_only,
+        return_lintrend_approx_stats=return_lintrend_approx_stats,
+    )
+    res = res + tuple(moments_slope_mi)
+
+    if return_hurst:
+        res = res + compute_hurst_exponent(arr=arr, **hurst_kwargs)
+
+    if not (directional_only or not return_entropy):
+        res = res + compute_entropy_features(arr=arr, sampling_frequency=sampling_frequency, spectral_method=spectral_method)
+    if return_distributional:
+        res = res + tuple(compute_distributional_features(arr=arr))
+    if return_lintrend_approx_stats:
+        params = dict(
+            weights=weights,
+            geomean_log_mode=geomean_log_mode,
+            q=q,
+            quantile_method=quantile_method,
+            max_modes=max_modes,
+            sampling_frequency=sampling_frequency,
+            spectral_method=spectral_method,
+            hurst_kwargs=hurst_kwargs,
+            directional_only=directional_only,
+            whiten_means=whiten_means,
+            return_distributional=return_distributional,
+            return_entropy=return_entropy,
+            return_hurst=return_hurst,
+            return_float32=False,
+            return_profit_factor=False,
+            return_drawdown_stats=False,
+            return_n_zer_pos_int=return_n_zer_pos_int,
+            return_exotic_means=return_exotic_means,
+            return_unsorted_stats=return_unsorted_stats,
+            return_lintrend_approx_stats=False,
+        )
+        res = res + tuple(compute_numaggs(arr=lintrend_data_diffs, xvals=xvals, **params))
+
+    if return_float32:
+        return np.array(res, dtype=np.float32)
+    else:
+        return res
+
+
+def get_numaggs_names(
+    weights: np.ndarray = None,
+    q: list = default_quantiles,
+    directional_only: bool = False,
+    whiten_means: bool = True,
+    return_distributional: bool = False,
+    return_entropy: bool = True,
+    return_hurst: bool = True,
+    return_profit_factor: bool = False,
+    return_drawdown_stats: bool = False,
+    return_n_zer_pos_int: bool = True,
+    return_exotic_means: bool = True,
+    return_unsorted_stats: bool = True,
+    return_lintrend_approx_stats: bool = False,
+    **kwargs
+) -> tuple:
+    res = tuple(
+        (
+            ["arimean", "ratio"]
+            if directional_only
+            else get_basic_feature_names(
+                weights=weights,
+                whiten_means=whiten_means,
+                return_profit_factor=return_profit_factor,
+                return_drawdown_stats=return_drawdown_stats,
+                return_n_zer_pos_int=return_n_zer_pos_int,
+                return_exotic_means=return_exotic_means,
+                return_unsorted_stats=return_unsorted_stats,
+            )
+        )
+        + ([] if (directional_only or not return_unsorted_stats) else "nuniques,modmin,modmax,modmean,modqty".split(","))
+        + ([] if directional_only else (["q" + str(q) for q in q]))
+        + ([] if not return_unsorted_stats else ["ncrs" + str(q) for q in q])
+        + get_moments_slope_mi_feature_names(weights=weights, directional_only=directional_only, return_lintrend_approx_stats=return_lintrend_approx_stats)
+        # + ["mutual_info_regression",]
+        + (["hursth", "hurstc"] if return_hurst else [])
+        + ([] if (directional_only or not return_entropy) else entropy_funcs_names)
+        + (distributions_features_names if return_distributional else [])
+    )
+
+    if return_lintrend_approx_stats:
+        params = dict(
+            weights=weights,
+            q=q,
+            directional_only=directional_only,
+            whiten_means=whiten_means,
+            return_distributional=return_distributional,
+            return_entropy=return_entropy,
+            return_hurst=return_hurst,
+            return_float32=False,
+            return_profit_factor=False,
+            return_drawdown_stats=False,
+            return_n_zer_pos_int=return_n_zer_pos_int,
+            return_exotic_means=return_exotic_means,
+            return_unsorted_stats=return_unsorted_stats,
+            return_lintrend_approx_stats=False,
+        )
+        res += tuple(["ltrappr_" + el for el in get_numaggs_names(**params)])
+    return res
+
+
+def get_moments_slope_mi_feature_names(weights: np.ndarray = None, directional_only: bool = False, return_lintrend_approx_stats: bool = True):
+    res = []
+    if not directional_only:
+        res.extend("mad,std,skew,kurt".split(","))
+        if weights is not None:
+            res.extend("wmad,wstd,wskew,wkurt".split(","))
+    res.extend("slope,intercept,r,meancross,trendcross".split(","))
+    return res
+
+
+def numaggs_over_matrix(vals: np.ndarray, numagg_params: dict, dtype=np.float32) -> np.ndarray:
+    """Computes numaggs on a 2d matrix"""
+    feature_names = get_numaggs_names(**numagg_params)
+
+    res = np.empty(shape=(len(vals), len(feature_names)), dtype=dtype)
+
+    for i in range(vals.shape[0]):
+        res[i, :] = compute_numaggs(vals[i, :], **numagg_params)
+    return res
+
+
+def numaggs_over_df_columns_parallel(
+    df: pd.DataFrame, cols: Sequence, numagg_params: dict, dtype=np.float32, n_jobs=-1, prefetch_factor: int = 2, **parallel_kwargs
+) -> np.ndarray:
+    """Computes numaggs over columns of a dataframe, in parallel fashion.
+    Example of parallel_kwargs: numaggs_over_df_columns_parallel(df=X, cols=cols, temp_folder=r'R:\Temp')
+    """
+
+    res = parallel_run(
+        [delayed(numaggs_over_matrix)(vals=chunk, numagg_params=numagg_params) for chunk in np.array_split(df.loc[:, cols].values, n_jobs * prefetch_factor)],
+        max_nbytes=0,
+        n_jobs=n_jobs,
+        **parallel_kwargs
+    )
+    res = np.vstack(res)
+
+    return res
diff --git a/mlframe/feature_engineering/timeseries.py b/mlframe/feature_engineering/timeseries.py
new file mode 100644
index 0000000..4498aa5
--- /dev/null
+++ b/mlframe/feature_engineering/timeseries.py
@@ -0,0 +1,821 @@
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Packages
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from pyutilz.pythonlib import ensure_installed
+
+# ensure_installed("numpy pandas") #  PyWavelets
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Normal Imports
+# ----------------------------------------------------------------------------------------------------------------------------
+
+from typing import *
+
+import psutil
+from numba import njit
+from mlframe.ewma import ewma
+
+from mlframe.feature_engineering.numerical import compute_numaggs, get_numaggs_names
+from mlframe.feature_engineering.categorical import compute_countaggs, get_countaggs_names
+
+from pyutilz.numpylib import smart_ratios
+
+import pandas as pd, numpy as np
+
+from pyutilz.system import tqdmu
+from pyutilz.strings import slugify
+from pyutilz.pythonlib import get_human_readable_set_size
+from pyutilz.parallel import applyfunc_parallel
+from functools import partial
+
+from scipy.signal import welch
+from scipy.fftpack import fft
+from scipy import signal
+import pywt
+
+cCOMPACT_WAVELETS = True
+
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+# Inits
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+def get_numaggs_metadata(numaggs_kwds: dict = {},numaggs_names:list=[]):
+    
+    if not numaggs_names:
+        numaggs_names = list(get_numaggs_names(**numaggs_kwds))
+
+    try:
+        q1_idx = numaggs_names.index("q0.25")
+        q3_idx = numaggs_names.index("q0.75")
+    except ValueError as e:
+        q1_idx, q3_idx = None, None
+
+    return numaggs_names, q1_idx, q3_idx
+
+
+default_countaggs_names = list(get_countaggs_names())
+default_numaggs_names, default_q1_idx, default_q3_idx = get_numaggs_metadata()
+
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+@njit()
+def find_next_cumsum_left_index(window_var_values: np.ndarray, amount: float, right_index: int = None, min_samples: int = 1,use_abs:bool=False) -> tuple:
+    """Calculating windows having required turnovers."""
+    total = 0.0
+    if right_index <= 0:
+        return 0, total
+
+    if right_index is None:
+        right_index = len(window_var_values)
+    for i in range(1, right_index):
+        if not np.isnan(window_var_values[right_index - i]):
+            total += window_var_values[right_index - i]
+            if use_abs:
+                if np.abs(total) >= amount and i >= min_samples:
+                    return right_index - i, total                
+            else:
+                if total >= amount and i >= min_samples:
+                    return right_index - i, total
+
+    return 0, total
+
+@njit()
+def find_next_cumsum_right_index(window_var_values: np.ndarray, amount: float, left_index: int = None, min_samples: int = 1,use_abs:bool=False) -> tuple:
+    """Calculating windows having required turnovers."""
+    total = 0.0
+    l = len(window_var_values)
+    if left_index >= l - 1:
+        return l - 1, total
+
+    if left_index is None:
+        left_index = 0
+    for i in range(1, (l - left_index)):
+        if not np.isnan(window_var_values[left_index + i]):
+            total += window_var_values[left_index + i]
+            if use_abs:
+                if np.abs(total) >= amount and i >= min_samples:
+                    return left_index + i, total                
+            else:                
+                if total >= amount and i >= min_samples:
+                    return left_index + i, total
+
+    return l - 1, total
+
+
+def get_nwindows_expected(windows: dict) -> int:
+    """How many windows are expected from the entire set?"""
+    r = 0
+    for window_var, windows_lengths in windows.items():
+        r += len(windows_lengths)
+    return r
+
+
+def get_ts_window_name(window_var: str, window_size: float, window_index_name: str = "") -> str:
+    """Give a timeseries window a human readable short name."""
+
+    if window_var == "":  # just index
+        dataset_name = str(window_size) + window_index_name
+    else:
+        dataset_name = window_var + ":" + get_human_readable_set_size(window_size)
+
+    return dataset_name
+
+
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+# Windows and ML features over them
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+def create_aggregated_features(
+    window_df: pd.DataFrame,
+    row_features: list,
+    create_features_names: bool,
+    features_names: list,
+    dataset_name: str,
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    # common settings
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    vars_mask_regexp: object = None,
+    vars_mask_exclude_regexp: object = None,
+    captions_vars_sep: str = "-",
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    # numericals
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    differences_features: bool = False,
+    ratios_features: bool = False,
+    robust_features: bool = False,
+    weighting_vars: Sequence = (),
+    na_fills: dict = {"": 1e3},
+    span_corrections: dict = {"": 1e2},
+    ewma_alphas: Sequence = (),
+    rolling: Sequence = (), # method_params can also include engine="numba", engine_kwargs={"parallel": True}
+    nonlinear_transforms=[np.cbrt],
+    nonnormal_vars: Sequence = (),
+    waveletnames="", #"rbio3.1",
+    numaggs_kwds: dict = {},
+    splitting_vars:dict={},
+    drawdown_vars:dict={},
+    groupby_vars:dict={}, #{'ticker':['Volume']}=deals.groupby("ticker").Volume.agg("sum").values / deals.Volume.sum()
+    return_n_finite:bool=False,
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    # categoricals
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    process_categoricals: bool = False,  # categoricals will be processed as counts data
+    counts_processing_mask_regexp: object = None,  # separate variables can be processed as counts as well
+    countaggs_kwds: dict = {},
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    # subsets
+    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+    subsets: dict = {},
+    checked_subsets: list = [],
+    subset_token: str = "_",
+    nested_subsets: bool = False,
+):
+    """
+    Each suitable variable of a dataframe gets numerical aggregates computed over a number of transformations over the set of its raw values
+        1) as is: raw_vals
+        2) ratios: div0(raw_vals[1:], raw_vals[:-1], fill=0.0) **ordered feature
+        3) wavelets of raw_vals
+        4) raw_vals weighted by second var, if the main var is not related to second var (has no second var in its name)
+        5) exponentially weighted row_wals, for example with alphas=[0.6]: ewma(prices.Price.values, 0.6) **ordered feature
+        5.1) rolling with optional scipy windows: rolling=[(dict(window=2,win_type='parzen'),'mean',dict(sym=False))]; can also include engine="numba", engine_kwargs={"parallel": True}
+        6) log, or cubic root, or some other non-linear transform (yeo-johnson) of raw_vals (benefitial for non-normally distributed vars)
+        7) robust subset of raw_vals, ie, within 0.1 and 0.9 quantiles
+        8) for some variables, especially with many repeated values, or categorical, we can do value_counts(normalize=True or False). Further we can return
+            1) Top N highest/lowest values along with their counts (missing are padded with NaNs)
+            2) numaggs over counts data
+            3) if variable is numeric, numaggs(timeseries_features=True) for values series sorted by counts (timeseries_features=True leaves only aggregates depending on the order of values)
+        9*) possibly, ratios of numfeatures over raw values of a smaller window compared to bigger windows
+        10*) lags: closest same day of month, week, year. at least for Price! pd.offsets.DateOffset(months=1)
+    """
+    assert len(window_df)>1
+
+    if numaggs_kwds:
+        numaggs_names, q1_idx, q3_idx = get_numaggs_metadata(numaggs_kwds)
+    else:
+        numaggs_names, q1_idx, q3_idx = default_numaggs_names, default_q1_idx, default_q3_idx
+
+    if not countaggs_kwds:
+        countaggs_names = default_countaggs_names
+    else:
+        countaggs_names = list(get_countaggs_names(**countaggs_kwds))        
+
+    for var in window_df.columns:
+        if var in subsets or var in checked_subsets: continue
+        if (vars_mask_regexp is None or vars_mask_regexp.search(var)) and (vars_mask_exclude_regexp is None or not vars_mask_exclude_regexp.search(var)):
+
+            if var in groupby_vars:
+                groupby=window_df.groupby(var,observed=True) #TODO! investigate: observed=True leads to missing blocks??
+                for sum_var in groupby_vars[var]:
+                    if sum_var in window_df:
+                        total=window_df[sum_var].sum()
+                        raw_vals=groupby[sum_var].agg("sum").values
+                        if total: raw_vals=raw_vals/ total
+                        row_features.extend(compute_numaggs(raw_vals, **numaggs_kwds))
+                        if create_features_names:
+                            features_names.extend((captions_vars_sep.join((dataset_name, sum_var, "grpby", var, feat)) for feat in numaggs_names))            
+            
+            # is this categorical?
+            if window_df[var].dtype.name in (
+                "category",
+            ):  # we do not list "object", "str" to exclude pure textual columns. They can be added explicilty though.
+                if process_categoricals or (counts_processing_mask_regexp and counts_processing_mask_regexp.search(var)):
+                    row_features.extend(compute_countaggs(window_df[var], **countaggs_kwds))
+                    if create_features_names:
+                        features_names.extend((captions_vars_sep.join((dataset_name, var, "vlscnt", feat)) for feat in countaggs_names))
+            else:
+                if not (window_df[var].dtype.name in ("object", "str")):
+                    if "datetime" in window_df[var].dtype.name:
+                        raw_vals = window_df[var].diff(1).dt.total_seconds().values
+                    else:
+                        raw_vals = window_df[var].values
+
+                    # safe values without nans and infs
+
+                    idx = np.isfinite(raw_vals)
+                    raw_vals = raw_vals[idx]
+
+                    if return_n_finite:
+                        row_features.append(idx.sum())
+                        if create_features_names:
+                            features_names.append(captions_vars_sep.join((dataset_name, var, "n_finite")))
+
+                    # 1) as is: numaggs of raw_vals
+
+                    if var in drawdown_vars:
+                        custom_numaggs_kwds=numaggs_kwds.copy()
+                        custom_numaggs_kwds['return_drawdown_stats']=True
+                        simple_numaggs_names=get_numaggs_names(**custom_numaggs_kwds)
+                    else:
+                        custom_numaggs_kwds=numaggs_kwds
+                        simple_numaggs_names=numaggs_names
+
+                    simple_numerical_features = compute_numaggs(raw_vals, **custom_numaggs_kwds)
+                    row_features.extend(simple_numerical_features)
+                    if create_features_names:
+                        features_names.extend((captions_vars_sep.join((dataset_name, var, feat)) for feat in simple_numaggs_names))
+
+                    if splitting_vars and (custom_numaggs_kwds.get('return_unsorted_stats')!=False):
+                        if splitting_vars and var in splitting_vars:
+                            compute_splitting_stats(window_df=window_df,dataset_name=dataset_name,splitting_vars=splitting_vars,var=var,numaggs_names=simple_numaggs_names,numaggs_values=simple_numerical_features,
+                                                    row_features=row_features,features_names=features_names,create_features_names=create_features_names,captions_vars_sep=captions_vars_sep)
+                    
+                    if differences_features:
+                        differences = np.diff(raw_vals, 1)
+
+                        custom_numaggs_kwds=numaggs_kwds.copy()
+                        custom_numaggs_kwds['return_profit_factor']=True
+                        row_features.extend(compute_numaggs(differences, **custom_numaggs_kwds))                        
+                        if create_features_names:
+                            custom_numaggs_names=list(get_numaggs_names(**custom_numaggs_kwds))
+                            features_names.extend((captions_vars_sep.join((dataset_name, var, "dif", feat)) for feat in custom_numaggs_names))
+
+                    if ratios_features:
+                        # 2) ratios: div0(raw_vals[1:], raw_vals[:-1], fill=0.0)
+                        #print(var)
+                        ratios = smart_ratios(
+                            raw_vals[1:],
+                            raw_vals[:-1],
+                            span_correction=span_corrections.get(var, span_corrections.get("", 1e2)),
+                            na_fill=na_fills.get(var, na_fills.get("")),
+                        )
+
+                        custom_numaggs_kwds=numaggs_kwds.copy()
+                        custom_numaggs_kwds['return_profit_factor']=True
+                        row_features.extend(compute_numaggs(ratios, **custom_numaggs_kwds))                        
+                        if create_features_names:
+                            custom_numaggs_names=list(get_numaggs_names(**custom_numaggs_kwds))
+                            features_names.extend((captions_vars_sep.join((dataset_name, var, "rat", feat)) for feat in custom_numaggs_names))
+
+                    # 3) wavelets of raw_vals
+                    if waveletnames:
+                        custom_numaggs_kwds=numaggs_kwds.copy()
+                        custom_numaggs_kwds['return_hurst']=False
+                        custom_numaggs_kwds['return_entropy']=False #they all are constant anyways () 
+                        for waveletname in waveletnames:                        
+                            all_coeffs=[]                       
+                            for i, coeffs in enumerate(pywt.wavedec(raw_vals, waveletname)):
+                                all_coeffs.append(coeffs)
+                            all_coeffs=np.hstack(all_coeffs)
+                            row_features.extend(compute_numaggs(all_coeffs, **custom_numaggs_kwds))
+                            if create_features_names:
+                                custom_numaggs_names=list(get_numaggs_names(**custom_numaggs_kwds))
+                                features_names.extend((captions_vars_sep.join((dataset_name, var, waveletname, feat)) for feat in custom_numaggs_names))
+
+                    # 4) raw_vals weighted by second var, if the main var is not related to second var (has no second var in its name)
+                    for weighting_var in weighting_vars:
+                        if weighting_var not in var and weighting_var in window_df:
+                            weighting_values = window_df.loc[idx, weighting_var].values
+                            row_features.extend(compute_numaggs((raw_vals / weighting_values.sum()) * weighting_values, **numaggs_kwds))
+                            if create_features_names:
+                                features_names.extend((captions_vars_sep.join((dataset_name, var, "wgt", weighting_var, feat)) for feat in numaggs_names))
+
+                    # 5) exponentially weighted raw_vals with some alphas, like [0.6, 0.9]:
+                    for alpha in ewma_alphas:
+                        row_features.extend(compute_numaggs(ewma(raw_vals, alpha), **numaggs_kwds))
+                        if create_features_names:
+                            features_names.extend((captions_vars_sep.join((dataset_name, var, "ewma", str(alpha), feat)) for feat in numaggs_names))
+
+                    # 5.1) rolling
+                    for window, method, method_params in rolling:
+                        if "datetime" in window_df[var].dtype.name: 
+                            #logger.warning(f"Cant't use rolling features for var {var} 'cause of datetime-like dtype.")
+                            break
+                        vals = getattr(window_df[var].rolling(**window), method)(**method_params).values
+                        safe_idx = np.isfinite(vals)
+                        vals = vals[safe_idx]
+                        row_features.extend(compute_numaggs(vals, **numaggs_kwds))
+                        if create_features_names:
+                            specs = ";".join((f"{key}={value}" for key,value in dict(**window, m=method, **method_params).items())) # comma , refused by lgbm: lightgbm.basic.LightGBMError: Do not support special JSON characters in feature name.
+                            specs=specs.replace("win_type","t").replace("window","w")
+                            features_names.extend((captions_vars_sep.join((dataset_name, var, "rol", specs, feat)) for feat in numaggs_names))
+
+                    # 6) log, or cubic root, or some other non-linear transform (yeo-johnson) of raw_vals
+                    if var in nonnormal_vars:
+                        for nonlinear_func in nonlinear_transforms:
+                            transform_name = nonlinear_func.__name__
+                            row_features.extend(compute_numaggs(nonlinear_func(raw_vals), **numaggs_kwds))
+                            if create_features_names:
+                                features_names.extend((captions_vars_sep.join((dataset_name, var, transform_name, feat)) for feat in numaggs_names))
+
+                    # 7) robust subset of raw_vals, ie, within 0.1 and 0.9 quantiles. Or, better, using Tukey fences to identify outliers.
+                    if robust_features:
+                        _, q1_idx, q3_idx = get_numaggs_metadata(numaggs_names=simple_numaggs_names)
+                        if q1_idx and q3_idx:
+                            Q3 = simple_numerical_features[q3_idx]
+                            Q1 = simple_numerical_features[q1_idx]
+                            IQR = Q3 - Q1
+                            Lower_Bound = Q1 - 1.5 * IQR
+                            Upper_Bound = Q3 + 1.5 * IQR                            
+                            robust_subset = raw_vals[(raw_vals >= Lower_Bound) & (raw_vals <= Upper_Bound)]
+                            #logger.info(f"Var={var}, Q1={Q1}, Q3={Q3}, len(robust_subset)={len(robust_subset)}")
+
+                            if len(robust_subset) == 0:
+                                row_features.extend([np.nan] * len(numaggs_names))
+                            else:
+                                row_features.extend(compute_numaggs(robust_subset, **numaggs_kwds))
+                            if create_features_names:
+                                features_names.extend((captions_vars_sep.join((dataset_name, var, "rbst", feat)) for feat in numaggs_names))                            
+                # 8) for some variables, especially with many repeated values, or categorical, we can do value_counts(normalize=True or False).
+                if counts_processing_mask_regexp and counts_processing_mask_regexp.search(var):
+                    row_features.extend(compute_countaggs(window_df[var], **countaggs_kwds))
+                    if create_features_names:
+                        features_names.extend((captions_vars_sep.join((dataset_name, var, "vlscnt", feat)) for feat in countaggs_names))
+
+    if subsets:
+        for subset_var, subset_var_values in subsets.items():
+            if subset_var in checked_subsets or subset_var not in window_df:
+                continue        
+
+            for subset_var_value in subset_var_values:
+                idx=window_df[subset_var] == subset_var_value
+                subset_df = window_df[idx]
+                subset_direct=True
+                if len(subset_df)<=1:
+                    #logger.warning(f"Empty subset {subset_var}={subset_var_value} (size={len(subset_df)}). let's use reverse to keep ndims. Block length before was {len(row_features)}")
+                    subset_df = window_df[~idx]
+                    subset_direct=False
+                
+                row_features.append(subset_direct)
+                subset_dataset_name=None if dataset_name is None else dataset_name + subset_token + subset_var+ "=" + str(subset_var_value)
+                if create_features_names:
+                    features_names.append(subset_dataset_name+captions_vars_sep+"subset_direct")                
+
+                create_aggregated_features(
+                    window_df=subset_df,
+                    row_features=row_features,
+                    create_features_names=create_features_names,
+                    features_names=features_names,
+                    dataset_name=subset_dataset_name,
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    # common settings
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    vars_mask_regexp=vars_mask_regexp,
+                    vars_mask_exclude_regexp=vars_mask_exclude_regexp,
+                    captions_vars_sep=captions_vars_sep,
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    # numericals
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    differences_features=differences_features,
+                    ratios_features=ratios_features,
+                    robust_features=robust_features,
+                    weighting_vars=weighting_vars,
+                    na_fills=na_fills,
+                    span_corrections=span_corrections,  
+                    ewma_alphas=ewma_alphas,
+                    rolling=rolling,
+                    nonlinear_transforms=nonlinear_transforms,
+                    nonnormal_vars=nonnormal_vars,
+                    waveletnames=waveletnames,
+                    numaggs_kwds=numaggs_kwds,
+                    splitting_vars=splitting_vars,
+                    drawdown_vars=drawdown_vars,
+                    groupby_vars=groupby_vars,
+                    return_n_finite=return_n_finite,
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    # categoricals
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    process_categoricals=process_categoricals,
+                    counts_processing_mask_regexp=counts_processing_mask_regexp,
+                    countaggs_kwds=countaggs_kwds,
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    # subsets
+                    # -----------------------------------------------------------------------------------------------------------------------------------------------------
+                    subsets={} if not nested_subsets else subsets,
+                    checked_subsets=checked_subsets + [subset_var],
+                    subset_token=subset_token,
+                    nested_subsets=nested_subsets,               
+
+                )
+                
+                # if not subset_direct:
+                #    logger.warning(f"Block length after subset processing became {len(row_features)}")
+
+def compute_splitting_stats(window_df:pd.DataFrame,dataset_name:str,splitting_vars:dict,var:str,numaggs_names:list,numaggs_values:list,
+                                row_features:list,features_names:list,create_features_names:bool,captions_vars_sep:str="-")->None:
+    splitting_vals=[]
+    if create_features_names:
+        splitting_ratios_names=[]
+    subvars=splitting_vars[var]
+    for col in ('minr','maxr'):
+        try:
+            col_idx=numaggs_names.index(col)
+        except Exception as e:
+            logger.warning(f"Could not find col={col} in numagg fields {numaggs_names}")
+        else:
+            index=int(numaggs_values[col_idx]*len(window_df))-1
+            for subvar in subvars:
+                if subvar in window_df:
+                    if "datetime" in window_df[subvar].dtype.name:
+                        pre_sum=(window_df[subvar].iloc[index]-window_df[subvar].iloc[0]).total_seconds()
+                        post_sum=(window_df[subvar].iloc[-1]-window_df[subvar].iloc[index]).total_seconds()
+                    else:
+                        pre_sum=window_df[subvar].iloc[:index].sum()
+                        post_sum=window_df[subvar].iloc[index:].sum()
+                    tot=(pre_sum+post_sum)
+                    splitting_vals.append(pre_sum/tot if tot else 0)
+                    if create_features_names:
+                        splitting_ratios_names.append(captions_vars_sep.join([dataset_name, var, col,subvar,'split']))
+    
+    row_features.extend(splitting_vals)
+    if create_features_names:
+        features_names.extend(splitting_ratios_names)
+
+def create_windowed_features(
+    start_index: int = 0,
+    end_index: int = 0,
+    df: pd.DataFrame = None,
+    past_processing_fcn: object = None,
+    future_processing_fcn: object = None,
+    features_creation_fcn: object = None,
+    targets_creation_fcn: object = None,
+    step_size: int = 1,
+    nrecords_per_period: int = 1,  # use when fixed number of records belongs to the same period, for example, features contains values for 24 consecutive hours of a day.
+    past_windows: dict = {},  # example: {"": [7, 31, 31 * 12], "Load_West": [200e3, 1e6, 5e6]},
+    future_windows: dict = {},  # example: {"": [7, 31, 31 * 12], "Load_West": [200e3, 1e6, 5e6]},
+    window_index_name: str = "",  # example: D for days, or T for Ticks
+    overlapping: bool = False,
+    dtype=np.float32,
+    verbose: bool = False,
+):
+    """Creates, for a given range of indexes/time moments, start_index to end_index over step_size, a number of past and future windows.
+    Past window(s) will be used to compute ML features, future window(s) - to compute ML target(s).
+    Windows can be overlapping or not. They can be of a fixed size in records or in units of any numerical variable from the dataframe.
+    Resulting features & targets are merged into a pandas dataframe.
+
+    How to support a scenario where we want as features the RATIOS of aggregated features over 2 consecutive windows?
+    What if we want as a target the ratio of some parameter in the future window by its average over 3 consecutive past windows?
+    For that, we need the ability to combine calculated windows features in an arbitrary way.
+
+    Another advanced scenario:
+        we need to take 1 big past and 1 big future window for the entire market, compute features (past and future, they are different) for it.
+        then from that 2 big windows we need to partition by instrument code and create the same features for each instrument.
+        Finally, as ML features we need the ratio of instrument past features to market past features.
+        As the ML targets we need one future feature of the instrument divided by the same past feature of the same instrument.
+
+    There are 2 modes. In legacy one, features/captions over different windows are simply merged together in the downstream aggregates calculation funct.
+    In the advanced mode, ML features and targets have to be constructed manually from the dicts of individually computed windows variables.
+
+
+    """
+    logger.info("got ranges from %s to %s", start_index, end_index)
+
+    targets = []
+    features = []
+
+    targets_names = []
+    features_names = []
+
+    past_vars_names = []
+    future_vars_names = []
+
+    if not end_index:
+        end_index = len(df)
+
+    past_nwindows_expected = get_nwindows_expected(future_windows)
+    future_nwindows_expected = get_nwindows_expected(future_windows)
+
+    for index in tqdmu(range(start_index, end_index, step_size), desc="dataset range", leave=False):
+
+        # one step means only one features line will be added to features and targets lists.
+
+        row_targets = []
+        row_features = []
+        base_point = index * nrecords_per_period
+
+        # -----------------------------------------------------------------------------------------------------------------------------------------------------
+        # Compute future features (targets constituents) first: they are usually less expensive
+        # -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+        future_windows_features = create_and_process_windows(
+            df=df,
+            base_point=base_point,
+            windows=future_windows,
+            apply_fcn=future_processing_fcn,
+            window_index_name=window_index_name,
+            overlapping=overlapping,
+            forward_direction=True,
+            window_features_names=future_vars_names,
+            window_features=None if targets_creation_fcn else row_targets,
+            create_features_names=targets,
+            verbose=verbose,
+        )
+
+        if (len(row_targets) < future_nwindows_expected) and len(future_windows_features) == 0:
+            # targets could not be fully created, skipping
+            continue
+
+        # -----------------------------------------------------------------------------------------------------------------------------------------------------
+        # Compute past features
+        # -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+        past_windows_features = create_and_process_windows(
+            df=df,
+            base_point=base_point,
+            windows=past_windows,
+            apply_fcn=past_processing_fcn,
+            window_index_name=window_index_name,
+            overlapping=overlapping,
+            forward_direction=False,
+            window_features_names=past_vars_names,
+            window_features=None if features_creation_fcn else row_features,
+            create_features_names=targets,
+            verbose=verbose,
+        )
+
+        if row_features or past_windows_features:
+
+            features.append(row_features)
+
+            if targets_creation_fcn:
+                targets.append(
+                    targets_creation_fcn(past_windows=past_windows_features if features_creation_fcn else row_features, future_windows=future_windows_features)
+                )
+            else:
+                targets.append(row_targets)
+        else:
+            if not targets:
+                features_names = []
+                targets_names = []
+
+    if features:
+
+        if targets_creation_fcn:
+            targets_names = [targets_creation_fcn.__name__]
+
+        X = pd.DataFrame(data=features, columns=features_names if features_creation_fcn else past_vars_names, dtype=dtype)
+        Y = pd.DataFrame(data=targets, columns=targets_names if targets_creation_fcn else future_vars_names, dtype=dtype)
+
+        if Y.shape[1] == 1:
+            Y = Y.iloc[:, 0]
+
+        logger.info("computed the features")
+
+        return X, Y
+    else:
+        return None, None
+
+
+def create_and_process_windows(
+    df: pd.DataFrame,
+    base_point: int,
+    apply_fcn: object,
+    windows: dict,
+    window_features_names: list,
+    window_features: list,
+    targets: list,
+    forward_direction: bool = True,
+    window_index_name: str = "",  # example: D for days, or T for Ticks, R for rows
+    nrecords_per_period: int = 1,  # use when fixed number of records belongs to the same period, for example, data contains values for 24 consecutive hours of a day.
+    overlapping: bool = False,
+    verbose: bool = False,
+):
+    """Build all required windows (target or features), from the current base_point of the dataframe.
+    Apply function to compute features to each window, return dict with window scpecification as a key, list of features as a value.
+    """
+    res = {}
+    for window_var, windows_lengths in windows.items():  # (p_window_var := tqdmu(windows.items(), desc="window var", leave=False)):
+        # p_window_var.set_description("window var=" + window_var)
+        # windows_lengths must be sorted
+
+        if forward_direction:
+            windows_l = base_point
+        else:
+            windows_r = base_point
+
+        if window_var:
+            if window_var not in df:
+                break
+            if forward_direction:
+                window_var_values = df[window_var].values[base_point:]
+            else:
+                window_var_values = df[window_var].values[:base_point]
+
+        for window_order, window_size in enumerate(windows_lengths):  # (p_window_size := tqdmu(enumerate(windows_lengths), desc=f"window size", leave=False)):
+            # p_window_size.set_description("window size=" + str(window_size))
+            if window_var == "":  # just index
+                dataset_name = str(window_size) + window_index_name
+                if forward_direction:
+                    windows_r = min(windows_l + window_size * nrecords_per_period, len(df))
+                else:
+                    windows_l = max(windows_r - window_size * nrecords_per_period, 0)
+            else:
+                dataset_name = window_var + ":" + get_human_readable_set_size(window_size)
+                # binary search along that var's cumsum until it reaches the required size
+                if forward_direction:
+                    windows_r, accumulated_amount = find_next_cumsum_right_index(window_var_values=window_var_values, amount=window_size, left_index=windows_l)
+                else:
+                    windows_l, accumulated_amount = find_next_cumsum_left_index(window_var_values=window_var_values, amount=window_size, right_index=windows_r)
+                if accumulated_amount > 0 and accumulated_amount * 2 < window_size:
+                    logger.warning("Insufficient data for window %s of size %s: real size=%s", window_var, window_size, accumulated_amount)
+                    continue
+
+            # Window established. Now need to create features for it.
+
+            window_df = df.iloc[windows_l:windows_r]
+            if len(window_df):
+
+                if verbose:
+                    logger.info(
+                        "%s, acc.size %s, l=%s, r=%s (%s to %s)",
+                        dataset_name + " " + ("future" if forward_direction else "past"),
+                        accumulated_amount if window_var else "",
+                        windows_l,
+                        windows_r,
+                        window_df.index[0],
+                        window_df.index[-1],
+                    )
+                if window_features is not None:
+                    apply_fcn(df=window_df, row_features=window_features, targets=targets, features_names=window_features_names, dataset_name=dataset_name)
+                else:
+                    temp_window_features = []
+                    apply_fcn(df=window_df, row_features=temp_window_features, targets=targets, features_names=window_features_names, dataset_name=dataset_name)
+                    res[dataset_name] = temp_window_features
+
+                # print(
+                #    window_var, window_order, window_size, time_from_ts(window_df.MOMENT.values[0]), time_from_ts(window_df.MOMENT.values[-1])
+                # )  # ,"->",time_from_ts(future.MOMENT.values[0]),time_from_ts(future.MOMENT.values[-1]),)
+
+                if not overlapping:
+                    if forward_direction:
+                        windows_l = windows_r
+                    else:
+                        windows_r = windows_l
+    return res
+
+
+def create_ts_features_parallel(
+    start_index: int, end_index: int = None, ts_func: object = None, n_cores: int = None, logical: bool = False, n_chunks: int = None, **kwargs
+):
+    """
+    Divides a span (end_index-start_index) into n_chunks non-overlapping consecutive chunks.
+    Submits FE pipeline execution in parallel, joins results.
+    """
+    nrecords_per_period = kwargs.get("nrecords_per_period", 1)
+
+    if not end_index:
+        end_index = len(kwargs.get("df", []))
+        if not end_index:
+            return None, None
+        end_index = end_index // nrecords_per_period
+
+    if not n_chunks or n_chunks < 0:
+        n_chunks = min(int(psutil.cpu_count(logical=True) * 1.5), (end_index - start_index))
+
+    args = []
+    step = (end_index - start_index) // n_chunks
+    if step < 1:
+        return None, None
+
+    l = start_index
+    for i in range(n_chunks):
+        r = min(l + step, end_index)
+        if i == n_chunks - 1:
+            r = end_index
+
+        args.append((l, r))
+
+        l = r
+
+    logger.info("starting applyfunc_parallel using args %s", args)
+    res = applyfunc_parallel(args, partial(ts_func, **kwargs), return_dataframe=False, logical=logical, n_cores=n_cores)
+    X_parts, Y_parts = [], []
+    for X_part, Y_part in res:
+        X_parts.append(X_part)
+        Y_parts.append(Y_part)
+
+    del res
+    return pd.concat(X_parts, ignore_index=True), pd.concat(Y_parts, ignore_index=True)
+
+
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+# Detecting optimal lags for FE
+# -----------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+def compute_corr(dependent_vals: np.ndarray, independent_vals: np.ndarray, deciding_func: object, absolutize: bool = True):
+    if deciding_func is np.corrcoef:
+        corr = deciding_func(dependent_vals, independent_vals)[0][1]
+    else:
+        corr = deciding_func(dependent_vals.reshape(-1, 1), independent_vals)[0]
+
+    if absolutize:
+        corr = np.abs(corr)
+
+    return corr
+
+
+def general_acf(
+    Y: np.ndarray, X: np.ndarray = None, windows: dict = {}, deciding_func: object = np.corrcoef, lag_len: int = 30, min_samples=500, absolutize: bool = True
+):
+    """Advanced ACF(nonlinear, +over variables with non-fixed offsets).
+    windows={var: {from,to,nsteps]}, ie {"Load":{"from":40_000,"to":1e6,"nsteps":100}}
+    """
+    res = {}
+
+    if lag_len:
+        acfs_vals = [1.0]
+        acfs_index = [1.0]
+        for i in tqdmu(range(lag_len), desc="Fixed offsets"):
+            if len(Y) - i >= min_samples:
+                dependent_vals = Y[i + 1 :]
+                independent_vals = Y[: -(i + 1)]
+
+                corr = compute_corr(dependent_vals=dependent_vals, independent_vals=independent_vals, deciding_func=deciding_func, absolutize=absolutize)
+
+                acfs_vals.append(corr)
+                acfs_index.append(i + 1)
+
+        res["fixed_offsets"] = pd.Series(data=acfs_vals, name="fixed_offsets", index=acfs_index)
+
+    if windows:
+        for window_var, windows_params in tqdmu(windows.items(), desc="Flexible windows"):  # windows_lengths must be sorted
+            window_var_values = X[window_var].values
+            acfs_vals = [1.0]
+            acfs_index = [0.0]
+            window_sizes = np.linspace(
+                start=windows_params.get("from", max(1, window_var_values.min())),
+                stop=windows_params.get("to", window_var_values.sum()),
+                num=windows_params.get("nsteps", 100),
+            )
+
+            for window_size in tqdmu(window_sizes, desc=window_var):
+                dependent_vals = []
+                independent_vals = []
+                windows_r = len(window_var_values) - 1
+
+                while True:
+                    # binary search along that var's cumsum until it reaches required size
+                    windows_l, accumulated_amount = find_next_cumsum_left_index(window_var_values=window_var_values, amount=window_size, right_index=windows_r)
+                    if accumulated_amount * 2 < window_size:  # or (windows_r - windows_l) < min_samples
+                        break
+                    else:
+                        dependent_vals.append(Y[windows_r])
+                        independent_vals.append(Y[windows_l])
+
+                    windows_r = windows_l
+                    if windows_l <= 0:
+                        break
+                if dependent_vals:
+                    corr = compute_corr(
+                        dependent_vals=np.array(dependent_vals), independent_vals=np.array(independent_vals), deciding_func=deciding_func, absolutize=absolutize
+                    )
+
+                    acfs_vals.append(corr)
+                    acfs_index.append(window_size)
+
+            res[window_var] = pd.Series(data=acfs_vals, name=window_var, index=acfs_index)
+    return res
diff --git a/feature_importance.py b/mlframe/feature_importance.py
similarity index 100%
rename from feature_importance.py
rename to mlframe/feature_importance.py
diff --git a/mlframe/feature_selection/__init__.py b/mlframe/feature_selection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlframe/feature_selection/filters.py b/mlframe/feature_selection/filters.py
new file mode 100644
index 0000000..b3a14aa
--- /dev/null
+++ b/mlframe/feature_selection/filters.py
@@ -0,0 +1,3449 @@
+"""Feature selection within ML pipelines. Based on filters. Currently includes mRMR."""
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+while True:
+    try:
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Normal Imports
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        from typing import *
+
+        import copy
+
+        import pandas as pd, numpy as np
+        import os
+        import gc
+
+        from pyutilz.system import tqdmu
+        from mlframe.utils import set_random_seed
+        from pyutilz.pythonlib import store_params_in_object, get_parent_func_args
+        from pyutilz.parallel import mem_map_array, split_list_into_chunks, parallel_run
+        from pyutilz.numbalib import set_numba_random_seed, arr2str, python_dict_2_numba_dict, generate_combinations_recursive_njit
+
+        # from mlframe.boruta_shap import BorutaShap
+        from timeit import default_timer as timer
+
+        from scipy.stats import mode
+        from scipy import special as sp
+        from itertools import combinations
+        from numpy.polynomial.hermite import hermval
+        from pyutilz.pythonlib import sort_dict_by_value
+
+        from sklearn.preprocessing import KBinsDiscretizer, OrdinalEncoder
+        from sklearn.model_selection import KFold
+        from sklearn.base import is_classifier, is_regressor, BaseEstimator, TransformerMixin
+        from sklearn.impute import SimpleImputer
+        from itertools import combinations
+        from numba.core import types
+        from mlframe.arrays import arrayMinMax
+        from numba import njit, jit
+        import numba
+        import math
+
+        from joblib import Parallel, delayed
+
+        from astropy.stats import histogram
+
+        from numba import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
+        import warnings
+
+    except ModuleNotFoundError as e:
+
+        logger.warning(e)
+
+        if "cannot import name" in str(e):
+            raise (e)
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Packages auto-install
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        from pyutilz.pythonlib import ensure_installed
+
+        ensure_installed("numpy pandas scikit-learn")  # cupy-cuda11x
+
+    else:
+        break
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Inits
+# ----------------------------------------------------------------------------------------------------------------------------
+
+warnings.filterwarnings("ignore", module=".*_discretization")
+warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
+warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
+
+MAX_JOBLIB_NBYTES = 1e3
+NMAX_NONPARALLEL_ITERS = 2
+MAX_ITERATIONS_TO_TRACK = 5
+
+LARGE_CONST: float = 1e30
+GPU_MAX_BLOCK_SIZE: int = 1024
+MAX_CONFIRMATION_CAND_NBINS: int = 50
+
+caching_hits_xyz = 0
+caching_hits_z = 0
+caching_hits_xz = 0
+caching_hits_yz = 0
+
+
+def init_kernels():
+
+    global compute_joint_hist_cuda, compute_mi_from_classes_cuda
+
+    import cupy as cp  # pip install cupy-cuda11x; python -m cupyx.tools.install_library --cuda 11.x --library cutensor
+
+    compute_joint_hist_cuda = cp.RawKernel(
+        r"""
+    extern "C" __global__
+    void compute_joint_hist_cuda(const int *classes_x, const int *classes_y, int *joint_counts, int n, int nbins_y) {
+        int tid = blockDim.x * blockIdx.x + threadIdx.x;
+        if (tid<n){
+            atomicAdd(&joint_counts[classes_x[tid]*nbins_y+classes_y[tid]],1);
+        }
+    }
+    """,
+        "compute_joint_hist_cuda",
+    )
+    compute_mi_from_classes_cuda = cp.RawKernel(
+        r"""
+    extern "C" __global__
+    void compute_mi_from_classes_cuda(const int *classes_x, const double *freqs_x,const int *classes_y, const double *freqs_y, int *joint_counts, double *totals, int n,int nbins_x,int nbins_y) {
+        int tid = blockDim.x * blockIdx.x + threadIdx.x;    
+        if (tid==0){
+            double total = 0.0;
+            for (int i=0;i<nbins_x;++i){
+                float prob_x = freqs_x[i];
+                for (int j=0;j<nbins_y;++j){
+                    int jc = joint_counts[i*2+j];
+                    if (jc>0){
+                        float prob_y = freqs_y[j];
+                        double jf=(float)jc/ (float)n;
+                        total += jf* log(jf / (prob_x * prob_y));
+                    }
+                }
+            }    
+            totals[0]=total;
+        }
+    }
+    """,
+        "compute_mi_from_classes_cuda",
+    )
+
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Old code
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+def find_impactful_features(
+    X: pd.DataFrame,
+    Y: pd.DataFrame,
+    feature_selector: object = None,
+    model: object = None,
+    importance_measure: str = "shap",
+    classification: bool = False,
+    n_trials: int = 150,
+    normalize: bool = True,
+    train_or_test="train",
+    verbose: bool = True,
+    fit_params: dict = {},
+) -> dict:
+    """
+    Create a dict of inputs impacting every and all target (multitarget supported).
+    Wrapped models are not supported (like TransformedTargetRegressor).
+    """
+
+    if verbose:
+        logger.info("Starting impact analysis for %s row(s), %s feature(s), %s target(s)", X.shape[0], X.shape[1], Y.shape[1])
+
+    if not feature_selector:
+        feature_selector = BorutaShap(
+            model=model,
+            importance_measure=importance_measure,
+            classification=classification,
+            n_trials=n_trials,
+            normalize=normalize,
+            verbose=False,
+            train_or_test=train_or_test,
+            fit_params=fit_params,
+        )
+
+    if False:  # when multioutput is not supported
+        max_targets = 0
+        res = {"accepted": {}, "tentative": {}}
+        wholeset_accepted, wholeset_tentative = [], []
+
+        for var in tqdmu(range(Y.shape[1]), desc="target #"):
+            if max_targets:
+                if var >= max_targets:
+                    break
+            feature_selector.fit(X=X, y=Y.iloc[:, var], n_trials=n_trials, normalize=normalize, verbose=False, train_or_test=train_or_test)
+
+            res["accepted"][var] = feature_selector.accepted
+            res["tentative"][var] = feature_selector.tentative
+
+            if verbose:
+                logger.info(
+                    "%s feature(s) found impactful on target %s: %s, %s tentative: %s",
+                    len(feature_selector.accepted),
+                    var,
+                    sorted(feature_selector.accepted),
+                    len(feature_selector.tentative),
+                    sorted(feature_selector.tentative),
+                )
+
+            wholeset_accepted.extend(feature_selector.accepted)
+            wholeset_tentative.extend(feature_selector.tentative)
+
+        res["wholeset"] = {"accepted": set(wholeset_accepted), "tentative": set(wholeset_tentative)}
+        res["mostvoted_accepted"] = Counter(wholeset_accepted)
+        res["mostvoted_tentative"] = Counter(wholeset_tentative)
+    else:
+        feature_selector.fit(X=X, y=Y)
+        res = {"accepted": sorted(feature_selector.accepted), "tentative": sorted(feature_selector.tentative)}
+        if verbose:
+            logger.info(res)
+    return res
+
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Multidimensial vectors
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+@njit()
+def merge_vars(
+    factors_data: np.ndarray,
+    vars_indices: Sequence,
+    var_is_nominal: Sequence,
+    factors_nbins: Sequence,
+    dtype=np.int32,
+    min_occupancy: int = None,
+    current_nclasses: int = 1,
+    final_classes: np.ndarray = None,  # , dtype=np.int32
+    verbose: bool = False,
+) -> tuple:
+    """Melts multiple vectors (partitioned into bins or categories) into one-dimensional one.
+    factors_data columns are assumed to be ordinal-encoded already (by KBinsDiscretizer or similar).
+    Bins, scarcely populated in higher dimension, may be merged with nearest neighbours (in eucl. distance).
+
+    Process truly numerical vars first? To be able to distribute scarcely populated bins more meaningfully, according to the distances.
+    What order is best to use in regard of number of categories and variance of their population density? Big-small,big-small, etc? Starting from most (un)evenly distributed?
+    """
+
+    if final_classes is None:
+        final_classes = np.zeros(len(factors_data), dtype=dtype)
+    for var_number, var_index in enumerate(vars_indices):
+
+        expected_nclasses = current_nclasses * factors_nbins[var_index]
+        freqs = np.zeros(expected_nclasses, dtype=dtype)
+        values = factors_data[:, var_index].astype(dtype)
+        if verbose:
+            print(f"var={var_index}, classes from {values.min()} to {values.max()}")
+        for sample_row, sample_class in enumerate(values):
+            newclass = final_classes[sample_row] + sample_class * current_nclasses
+            freqs[newclass] += 1
+            final_classes[sample_row] = newclass
+
+        # clean zeros
+        nzeros = 0
+        lookup_table = np.empty(expected_nclasses, dtype=dtype)
+        for oldclass, npoints in enumerate(freqs):
+            if npoints == 0:
+                nzeros += 1
+            else:
+                pass
+                # points from low-populated regions can be
+                # 1) distributed into closest N regions proportional to distances to them
+                # 2) ALL placed into some separate OUTLIERS region
+                # 3) left as is
+
+                # if npoints<min_occupancy: ...
+
+            lookup_table[oldclass] = oldclass - nzeros
+
+        if nzeros:
+            if verbose:
+                print(
+                    f"skipped {nzeros} cells out of {expected_nclasses}, classes from {final_classes.min()} to {final_classes.max()}, lookup_table={lookup_table}"
+                )
+
+            for sample_row, old_class in enumerate(final_classes):
+                final_classes[sample_row] = lookup_table[old_class]
+            if var_number == len(vars_indices) - 1:
+                freqs = freqs[freqs > 0]
+        current_nclasses = expected_nclasses - nzeros
+    return final_classes, freqs / len(factors_data), current_nclasses
+
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Information-theory measures
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+@njit()
+def entropy(freqs: np.ndarray, min_occupancy: float = 0) -> float:
+    if min_occupancy:
+        freqs = freqs[freqs >= min_occupancy]
+
+    return -(np.log(freqs) * freqs).sum()
+
+
+@njit()
+def mi(factors_data, x: np.ndarray, y: np.ndarray, factors_nbins: np.ndarray, verbose: bool = False, dtype=np.int32) -> float:
+    """Computes Mutual Information of X, Y via entropy calculations."""
+
+    classes_x, freqs_x, _ = merge_vars(
+        factors_data=factors_data, vars_indices=x, var_is_nominal=None, factors_nbins=factors_nbins, verbose=verbose, dtype=dtype
+    )
+    entropy_x = entropy(freqs=freqs_x)
+    if verbose:
+        print(f"entropy_x={entropy_x}, nclasses_x={len(freqs_x)} ({classes_x.min()} to {classes_x.max()})")
+
+    _, freqs_y, _ = merge_vars(factors_data=factors_data, vars_indices=y, var_is_nominal=None, factors_nbins=factors_nbins, verbose=verbose, dtype=dtype)
+    entropy_y = entropy(freqs=freqs_y)
+    if verbose:
+        print(f"entropy_y={entropy_y}, nclasses_y={len(freqs_y)}")
+
+    classes_xy, freqs_xy, _ = merge_vars(
+        factors_data=factors_data, vars_indices=set(x) | set(y), var_is_nominal=None, factors_nbins=factors_nbins, verbose=verbose, dtype=dtype
+    )
+    entropy_xy = entropy(freqs=freqs_xy)
+    if verbose:
+        print(f"entropy_xy={entropy_xy}, nclasses_x={len(freqs_xy)} ({classes_xy.min()} to {classes_xy.max()})")
+
+    return entropy_x + entropy_y - entropy_xy
+
+
+@njit()
+def unpack_and_sort(x, z):
+    res = []
+    for el in x:
+        res.append(el)
+    for el in z:
+        res.append(el)
+    return sorted(res)
+
+
+@njit()
+def conditional_mi(
+    factors_data: np.ndarray,
+    x: tuple,
+    y: tuple,
+    z: tuple,
+    var_is_nominal: Sequence,
+    factors_nbins: Sequence,
+    entropy_z: float = -1.0,
+    entropy_xz: float = -1.0,
+    entropy_yz: float = -1.0,
+    entropy_xyz: float = -1.0,
+    entropy_cache: dict = None,
+    can_use_x_cache: bool = False,
+    can_use_y_cache: bool = False,
+    dtype=np.int32,
+) -> float:
+    """
+    Conditional Mutual Information about Y by X given Z = I (X ;Y | Z ) = H(X, Z) + H(Y, Z) - H(Z) - H(X, Y, Z)
+    When y is constant, and both X(candidates) and Z (veteranes) repeat a lot, there's room to optimize.
+    Also when parts of X repeat a lot (2, 3 way interactions). Z and Y are always 1-dim.
+    """
+    # global caching_hits_xyz, caching_hits_z, caching_hits_xz, caching_hits_yz
+
+    key = ""
+
+    if entropy_z < 0:
+        if entropy_cache is not None:
+            key = arr2str(sorted(z))
+            entropy_z = entropy_cache.get(key, -1)
+        if entropy_z < 0:
+            _, freqs_z, _ = merge_vars(factors_data=factors_data, vars_indices=z, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)  # always 1-dim
+            entropy_z = entropy(freqs=freqs_z)
+            if entropy_cache is not None:
+                entropy_cache[key] = entropy_z
+        # else:
+        #    caching_hits_z += 1
+
+    if entropy_xz < 0:
+
+        # indices = sorted([*x, *z])
+        indices = unpack_and_sort(x, z)
+
+        if can_use_x_cache and entropy_cache is not None:
+            key = arr2str(indices)
+            entropy_xz = entropy_cache.get(key, -1)
+        if entropy_xz < 0:
+            _, freqs_xz, _ = merge_vars(factors_data=factors_data, vars_indices=indices, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+            entropy_xz = entropy(freqs=freqs_xz)
+            if can_use_x_cache and entropy_cache is not None:
+                entropy_cache[key] = entropy_xz
+        # else:
+        #    caching_hits_xz += 1
+
+    current_nclasses_yz = 1
+    if can_use_y_cache:
+        if entropy_yz < 0:
+
+            # indices = sorted([*y, *z])
+            indices = unpack_and_sort(y, z)
+
+            if entropy_cache is not None:
+                key = arr2str(indices)
+                entropy_yz = entropy_cache.get(key, -1)
+            if entropy_yz < 0:
+                classes_yz, freqs_yz, current_nclasses_yz = merge_vars(
+                    factors_data=factors_data, vars_indices=indices, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype
+                )
+                entropy_yz = entropy(freqs=freqs_yz)
+                if entropy_cache is not None:
+                    entropy_cache[key] = entropy_yz
+            # else:
+            #    caching_hits_yz += 1
+    else:
+        classes_yz, freqs_yz, current_nclasses_yz = merge_vars(
+            factors_data=factors_data, vars_indices=unpack_and_sort(y, z), var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype  # [*y, *z]
+        )  # always 2-dim
+        entropy_yz = entropy(freqs=freqs_yz)
+
+    if entropy_xyz < 0:
+        if can_use_y_cache and can_use_x_cache:
+
+            # indices = sorted([*x, *y, *z])
+            indices = unpack_and_sort(x, y)
+            indices = unpack_and_sort(indices, z)
+
+            if entropy_cache is not None:
+                key = arr2str(indices)
+                entropy_xyz = entropy_cache.get(key, -1)
+        if entropy_xyz < 0:
+            if current_nclasses_yz == 1:
+                classes_yz, freqs_yz, current_nclasses_yz = merge_vars(
+                    factors_data=factors_data, vars_indices=unpack_and_sort(y, z), var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype  # [*y, *z]
+                )  # always 2-dim
+
+            _, freqs_xyz, _ = merge_vars(
+                # factors_data=factors_data, vars_indices=[*y, *z, *x], var_is_nominal=None, factors_nbins=factors_nbins
+                factors_data=factors_data,
+                vars_indices=x,
+                var_is_nominal=None,
+                factors_nbins=factors_nbins,
+                current_nclasses=current_nclasses_yz,
+                final_classes=classes_yz,
+                dtype=dtype,
+            )  # upper classes of [*y, *z] can serve here. (2+x)-dim, ie 3 to 5 dim.
+            entropy_xyz = entropy(freqs=freqs_xyz)
+            if entropy_cache is not None and can_use_y_cache and can_use_x_cache:
+                entropy_cache[key] = entropy_xyz
+        # else:
+        #    caching_hits_xyz += 1
+    res = entropy_xz + entropy_yz - entropy_z - entropy_xyz
+    if res < 0.0:
+        # print(x, y, z, res, can_use_y_cache)
+        res = 0.0
+    return res
+
+
+@njit()
+def compute_mi_from_classes(
+    classes_x: np.ndarray,
+    freqs_x: np.ndarray,
+    classes_y: np.ndarray,
+    freqs_y: np.ndarray,
+    dtype=np.int32,
+) -> float:
+
+    joint_counts = np.zeros((len(freqs_x), len(freqs_y)), dtype=dtype)
+    for i, j in zip(classes_x, classes_y):
+        joint_counts[i, j] += 1
+    joint_freqs = joint_counts / len(classes_x)
+
+    total = 0.0
+    for i in range(len(freqs_x)):
+        prob_x = freqs_x[i]
+        for j in range(len(freqs_y)):
+            jf = joint_freqs[i, j]
+            if jf:
+                prob_y = freqs_y[j]
+                total += jf * math.log(jf / (prob_x * prob_y))
+    return total
+
+
+@njit()
+def distribute_permutations(npermutations: int, n_workers: int) -> list:
+
+    avg_perms_per_worker = npermutations // n_workers
+    diff = npermutations - avg_perms_per_worker * n_workers
+    workload = [avg_perms_per_worker] * n_workers
+    if diff > 0:
+        workload[-1] = workload[-1] + diff
+    return workload
+
+
+# @njit()
+def mi_direct(
+    factors_data,
+    x: tuple,
+    y: tuple,
+    factors_nbins: np.ndarray,
+    min_occupancy: int = None,
+    dtype=np.int32,
+    npermutations: int = 10,
+    max_failed: int = None,
+    min_nonzero_confidence: float = 0.95,
+    classes_y: np.ndarray = None,
+    classes_y_safe: np.ndarray = None,
+    freqs_y: np.ndarray = None,
+    n_workers: int = 1,
+    workers_pool: object = None,
+    parallel_kwargs: dict = {},
+) -> tuple:
+
+    classes_x, freqs_x, _ = merge_vars(factors_data=factors_data, vars_indices=x, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+    if classes_y is None:
+        classes_y, freqs_y, _ = merge_vars(factors_data=factors_data, vars_indices=y, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+
+    original_mi = compute_mi_from_classes(classes_x=classes_x, freqs_x=freqs_x, classes_y=classes_y, freqs_y=freqs_y, dtype=dtype)
+
+    # logger.info(f"original_mi={original_mi}")
+
+    if original_mi > 0 and npermutations > 0:
+
+        # Inits
+
+        nfailed = 0
+        if not max_failed:
+            max_failed = int(npermutations * (1 - min_nonzero_confidence))
+            if max_failed <= 1:
+                max_failed = 1
+
+        if n_workers and n_workers > 1 and npermutations > NMAX_NONPARALLEL_ITERS:
+
+            # logger.info("Memmapping classes_x...")
+            # classes_x_memmap = mem_map_array(obj=classes_x, file_name="classes_x", mmap_mode="r")
+
+            if workers_pool is None:
+                workers_pool = Parallel(n_jobs=n_workers, **parallel_kwargs)
+
+            res = workers_pool(
+                delayed(parallel_mi)(
+                    classes_x=classes_x,
+                    freqs_x=freqs_x,
+                    classes_y=classes_y_safe,
+                    freqs_y=freqs_y,
+                    dtype=dtype,
+                    npermutations=worker_npermutations,
+                    original_mi=original_mi,
+                    max_failed=max_failed,
+                )
+                for worker_npermutations in distribute_permutations(npermutations=npermutations, n_workers=n_workers)
+            )
+
+            i = 0
+            for worker_nfailed, worker_i in res:
+                nfailed += worker_nfailed
+                i += worker_i
+
+            if nfailed >= max_failed:
+                original_mi = 0.0
+
+        else:
+
+            if classes_y_safe is None:
+                classes_y_safe = classes_y.copy()
+
+            # Create a Generator instance with a specific RNG state
+            # seed=int.from_bytes(os.urandom(4), byteorder='little')
+            # rng = np.random.default_rng(seed)
+
+            for i in range(npermutations):
+                # logger.info(f"x={x} perm {i}")
+                # np.random.shuffle(classes_y_safe)
+                shuffle_arr(classes_y_safe)
+                # Shuffle the array using the local RNG
+                # classes_y_shuffled=np.random.choice(classes_y, len(classes_y), replace=False)
+                # rng.shuffle(classes_y_safe)
+                # logger.info(f"x={x} shuffled")
+                # mi = compute_mi_from_classes(classes_x=classes_x, freqs_x=freqs_x, classes_y=classes_y_shuffled, freqs_y=freqs_y, dtype=dtype)
+                mi = compute_mi_from_classes(classes_x=classes_x, freqs_x=freqs_x, classes_y=classes_y_safe, freqs_y=freqs_y, dtype=dtype)
+
+                if mi >= original_mi:
+                    nfailed += 1
+                    if nfailed >= max_failed:
+                        original_mi = 0.0
+                        break
+
+        confidence = 1 - nfailed / (i + 1)
+
+    else:
+        confidence = 0
+
+    return original_mi, confidence
+
+
+@njit()
+def shuffle_arr(arr: np.ndarray) -> None:
+    np.random.shuffle(arr)
+
+
+@njit()
+def parallel_mi(
+    classes_x: np.ndarray,
+    freqs_x: np.ndarray,
+    classes_y: np.ndarray,
+    freqs_y: np.ndarray,
+    npermutations: int,
+    original_mi: float,
+    max_failed: int,
+    dtype=np.int32,
+) -> int:
+
+    # print("In parallel_mi. Copying classes_y...")
+    nfailed = 0
+    classes_y_safe = np.asarray(classes_y).copy()
+    # print("In parallel_mi. Copied.")
+    for i in range(npermutations):
+
+        np.random.shuffle(classes_y_safe)
+        mi = compute_mi_from_classes(classes_x=classes_x, freqs_x=freqs_x, classes_y=classes_y_safe, freqs_y=freqs_y, dtype=dtype)
+
+        if mi >= original_mi:
+            nfailed += 1
+            if nfailed >= max_failed:
+                break
+    return nfailed, i + 1
+
+
+def mi_direct_gpu(
+    factors_data,
+    x: tuple,
+    y: tuple,
+    factors_nbins: np.ndarray,
+    min_occupancy: int = None,
+    dtype=np.int32,
+    npermutations: int = 10,
+    max_failed: int = None,
+    min_nonzero_confidence: float = 0.95,
+    classes_y: np.ndarray = None,
+    classes_y_safe: np.ndarray = None,
+    freqs_y: np.ndarray = None,
+    freqs_y_safe: np.ndarray = None,
+    use_gpu: bool = True,
+) -> tuple:
+
+    classes_x, freqs_x, _ = merge_vars(factors_data=factors_data, vars_indices=x, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+    if classes_y is None:
+        classes_y, freqs_y, _ = merge_vars(factors_data=factors_data, vars_indices=y, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+
+    original_mi = compute_mi_from_classes(classes_x=classes_x, freqs_x=freqs_x, classes_y=classes_y, freqs_y=freqs_y, dtype=dtype)
+
+    if original_mi > 0 and npermutations > 0:
+
+        # Inits
+
+        if not max_failed:
+            max_failed = int(npermutations * (1 - min_nonzero_confidence))
+            if max_failed <= 1:
+                max_failed = 1
+
+        if classes_y_safe is None:
+            classes_y_safe = cp.asarray(classes_y).astype(cp.int32)
+        if freqs_y_safe is None:
+            freqs_y_safe = cp.asarray(freqs_y)
+
+        totals = cp.zeros(1, dtype=np.float64)
+        joint_counts = cp.zeros((len(freqs_x), len(freqs_y)), dtype=cp.int32)
+
+        block_size = GPU_MAX_BLOCK_SIZE
+        grid_size = math.ceil(len(classes_x) / block_size)
+
+        classes_x = cp.asarray(classes_x.astype(np.int32))
+        freqs_x = cp.asarray(freqs_x)
+        nfailed = 0
+        for i in range(npermutations):
+
+            cp.random.shuffle(classes_y_safe)
+            joint_counts.fill(0)
+            compute_joint_hist_cuda(
+                (grid_size,),
+                (block_size,),
+                (classes_x, classes_y_safe, joint_counts, len(classes_x), len(freqs_y)),
+            )
+            compute_mi_from_classes_cuda(
+                (1,),
+                (1,),
+                (classes_x, freqs_x, classes_y_safe, freqs_y_safe, joint_counts, totals, len(classes_x), len(freqs_x), len(freqs_y)),
+            )
+
+            mi = totals.get()[0]
+
+            if mi >= original_mi:
+                nfailed += 1
+                if nfailed >= max_failed:
+                    original_mi = 0.0
+                    break
+        confidence = 1 - nfailed / (i + 1)
+    else:
+        confidence = 0
+
+    return original_mi, confidence
+
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Factor screening
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+def get_candidate_name(candidate_indices: list, factors_names: list) -> str:
+    cand_name = "-".join([factors_names[el] for el in candidate_indices])
+    return cand_name
+
+
+def should_skip_candidate(
+    cand_idx: int,
+    X: tuple,
+    interactions_order: int,
+    failed_candidates: set,
+    added_candidates: set,
+    expected_gains: np.ndarray,
+    selected_vars: list,
+    selected_interactions_vars: list,
+    only_unknown_interactions: bool = True,
+) -> tuple:
+    """Decides if current candidate for predictors should be skipped
+    ('cause of being already accepted, failed, computed).
+    """
+
+    nexisting = 0
+
+    if (cand_idx in failed_candidates) or (cand_idx in added_candidates) or expected_gains[cand_idx]:
+        return True, nexisting
+
+    if interactions_order > 1:  # disabled for single predictors 'cause Fleuret formula won't detect pairs predictors
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Check if any of sub-elements is already selected at this stage
+        # ---------------------------------------------------------------------------------------------------------------
+
+        skip_cand = False
+        for subel in X:
+            if subel in selected_interactions_vars:
+                skip_cand = True
+                break
+        if skip_cand:
+            return True, nexisting
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Or all selected at the lower stages
+        # ---------------------------------------------------------------------------------------------------------------
+
+        skip_cand = [(subel in selected_vars) for subel in X]
+        nexisting = sum(skip_cand)
+        if (only_unknown_interactions and any(skip_cand)) or all(skip_cand):
+            return True, nexisting
+
+    return False, nexisting
+
+
+def get_fleuret_criteria_confidence_parallel(
+    data_copy: np.ndarray,
+    factors_nbins: np.ndarray,
+    x: tuple,
+    y: tuple,
+    selected_vars: list,
+    bootstrapped_gain: float,
+    npermutations: int,
+    max_failed: int,
+    nexisting: int,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 1,
+    extra_knowledge_multipler: float = -1.0,
+    sink_threshold: float = -1.0,
+    cached_cond_MIs: dict = None,
+    n_workers: int = 1,
+    workers_pool: object = None,
+    parallel_kwargs: dict = {},
+    entropy_cache: dict = None,
+    extra_x_shuffling: bool = True,
+    dtype=np.int32,
+) -> tuple:
+
+    nfailed = 0
+
+    if workers_pool is None:
+        workers_pool = Parallel(n_jobs=n_workers, **parallel_kwargs)
+
+    gc.collect()
+    res = workers_pool(
+        delayed(parallel_fleuret)(
+            data=data_copy,
+            factors_nbins=factors_nbins,
+            x=x,
+            y=y,
+            selected_vars=selected_vars,
+            npermutations=worker_npermutations,
+            bootstrapped_gain=bootstrapped_gain,
+            max_failed=max_failed,
+            nexisting=nexisting,
+            mrmr_relevance_algo=mrmr_relevance_algo,
+            mrmr_redundancy_algo=mrmr_redundancy_algo,
+            max_veteranes_interactions_order=max_veteranes_interactions_order,
+            extra_knowledge_multipler=extra_knowledge_multipler,
+            sink_threshold=sink_threshold,
+            cached_cond_MIs=dict(cached_cond_MIs),  # cached_cond_MIs needs to be unjitted before sending to joblib.
+            entropy_cache=dict(entropy_cache),  # entropy_cache needs to be unjitted before sending to joblib.
+            extra_x_shuffling=extra_x_shuffling,
+            dtype=dtype,
+        )
+        for worker_npermutations in distribute_permutations(npermutations=npermutations, n_workers=n_workers)
+    )
+
+    nchecked = 0
+    for worker_nfailed, worker_i, entropy_cache_dict in res:
+        nfailed += worker_nfailed
+        nchecked += worker_i
+        for key, value in entropy_cache_dict.items():
+            entropy_cache[key] = value
+
+    if nfailed >= max_failed:
+        bootstrapped_gain = 0.0
+
+    confidence = 1 - nfailed / nchecked
+
+    return bootstrapped_gain, confidence, entropy_cache
+
+
+def parallel_fleuret(
+    data: np.ndarray,
+    factors_nbins: np.ndarray,
+    x: tuple,
+    y: tuple,
+    selected_vars: list,
+    npermutations: int,
+    bootstrapped_gain: float,
+    max_failed: int,
+    nexisting: int,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 1,
+    extra_knowledge_multipler: float = -1.0,
+    sink_threshold: float = -1.0,
+    cached_cond_MIs: dict = None,
+    entropy_cache: dict = None,
+    extra_x_shuffling: bool = True,
+    dtype=np.int32,
+):
+    """Stub that gets called by joblib directly. Njits entropy_cache, allocates data copy. Calls fast njitted core sub."""
+    data_copy = data.copy()
+
+    entropy_cache_dict = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+    python_dict_2_numba_dict(python_dict=entropy_cache, numba_dict=entropy_cache_dict)
+
+    cached_cond_MIs_dict = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+    python_dict_2_numba_dict(python_dict=cached_cond_MIs, numba_dict=cached_cond_MIs_dict)
+
+    nfailed, i = get_fleuret_criteria_confidence(
+        data_copy=data_copy,
+        factors_nbins=factors_nbins,
+        x=x,
+        y=y,
+        selected_vars=selected_vars,
+        npermutations=npermutations,
+        bootstrapped_gain=bootstrapped_gain,
+        max_failed=max_failed,
+        nexisting=nexisting,
+        mrmr_relevance_algo=mrmr_relevance_algo,
+        mrmr_redundancy_algo=mrmr_redundancy_algo,
+        max_veteranes_interactions_order=max_veteranes_interactions_order,
+        cached_cond_MIs=cached_cond_MIs_dict,
+        entropy_cache=entropy_cache_dict,
+        extra_x_shuffling=extra_x_shuffling,
+        dtype=dtype,
+    )
+
+    return nfailed, i, dict(entropy_cache_dict)
+
+
+@njit()
+def get_fleuret_criteria_confidence(
+    data_copy: np.ndarray,
+    factors_nbins: np.ndarray,
+    x: tuple,
+    y: tuple,
+    selected_vars: list,
+    npermutations: int,
+    bootstrapped_gain: float,
+    max_failed: int,
+    nexisting: int,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 1,
+    extra_knowledge_multipler: float = -1.0,
+    sink_threshold: float = -1.0,
+    cached_cond_MIs: dict = None,
+    entropy_cache: dict = None,
+    extra_x_shuffling: bool = True,
+    dtype=np.int32,
+) -> tuple:
+    """Sub to njit work with random shuffling as well."""
+
+    nfailed = 0
+
+    for i in range(npermutations):
+
+        for idx in y:
+            np.random.shuffle(data_copy[:, idx])
+
+        if extra_x_shuffling:
+            for idx in x:
+                np.random.shuffle(data_copy[:, idx])
+
+        stopped_early, current_gain, k, sink_reasons = evaluate_gain(
+            current_gain=LARGE_CONST,
+            last_checked_k=-1,
+            X=x,
+            y=y,
+            best_gain=None,
+            factors_data=data_copy,
+            factors_nbins=factors_nbins,
+            selected_vars=selected_vars,
+            nexisting=nexisting,
+            direct_gain=bootstrapped_gain,
+            mrmr_relevance_algo=mrmr_relevance_algo,
+            mrmr_redundancy_algo=mrmr_redundancy_algo,
+            max_veteranes_interactions_order=max_veteranes_interactions_order,
+            extra_knowledge_multipler=extra_knowledge_multipler,
+            sink_threshold=sink_threshold,
+            cached_cond_MIs=cached_cond_MIs,
+            entropy_cache=entropy_cache,
+            can_use_x_cache=not extra_x_shuffling,
+            can_use_y_cache=False,
+            confidence_mode=True,
+        )
+
+        if current_gain >= bootstrapped_gain:
+            nfailed += 1
+            if nfailed >= max_failed:
+                break
+
+    return nfailed, i + 1
+
+
+def evaluate_candidates(
+    workload: list,
+    y: Sequence[int],
+    best_gain: float,
+    factors_data: np.ndarray,
+    factors_nbins: np.ndarray,
+    factors_names: Sequence[str],
+    partial_gains: dict,
+    selected_vars: list,
+    baseline_npermutations: int,
+    classes_y: np.ndarray = None,
+    freqs_y: np.ndarray = None,
+    freqs_y_safe: np.ndarray = None,
+    use_gpu: bool = True,
+    cached_MIs: dict = None,
+    cached_confident_MIs: dict = None,
+    cached_cond_MIs: dict = None,
+    entropy_cache: dict = None,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 1,
+    dtype=np.int32,
+    max_runtime_mins: int = None,
+    start_time: float = None,
+    min_relevance_gain: float = None,
+    verbose: int = 1,
+    ndigits: int = 5,
+) -> None:
+
+    best_gain = -LARGE_CONST
+    best_candidate = None
+    expected_gains = {}
+
+    from pyutilz.logginglib import init_logging
+
+    global logger
+    logger = init_logging(default_caller_name="scalping.py", format="%(asctime)s - %(levelname)s - %(funcName)s-line:%(lineno)d - %(message)s")
+
+    # if verbose: logger.info("In evaluate_candidates")
+
+    entropy_cache_dict = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+    python_dict_2_numba_dict(python_dict=entropy_cache, numba_dict=entropy_cache_dict)
+
+    cached_cond_MIs_dict = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+    python_dict_2_numba_dict(python_dict=cached_cond_MIs, numba_dict=cached_cond_MIs_dict)
+
+    classes_y_safe = classes_y.copy()
+    # np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))
+
+    for cand_idx, X, nexisting in (candidates_pbar := tqdmu(workload, leave=False, desc="Thread Candidates")):
+
+        # if verbose: logger.info(f"Evaluating cand {X}")
+
+        current_gain, sink_reasons = evaluate_candidate(
+            cand_idx=cand_idx,
+            X=X,
+            y=y,
+            nexisting=nexisting,
+            best_gain=best_gain,
+            factors_data=factors_data,
+            factors_nbins=factors_nbins,
+            factors_names=factors_names,
+            classes_y=classes_y,
+            classes_y_safe=classes_y_safe,
+            freqs_y=freqs_y,
+            use_gpu=use_gpu,
+            freqs_y_safe=freqs_y_safe,
+            partial_gains=partial_gains,
+            baseline_npermutations=baseline_npermutations,
+            mrmr_relevance_algo=mrmr_relevance_algo,
+            mrmr_redundancy_algo=mrmr_redundancy_algo,
+            max_veteranes_interactions_order=max_veteranes_interactions_order,
+            expected_gains=expected_gains,
+            selected_vars=selected_vars,
+            cached_MIs=cached_MIs,
+            cached_confident_MIs=cached_confident_MIs,
+            cached_cond_MIs=cached_cond_MIs_dict,
+            entropy_cache=entropy_cache_dict,
+            verbose=verbose,
+            ndigits=ndigits,
+            dtype=dtype,
+        )
+
+        # if verbose: logger.info(f"X={X}, gain={current_gain}")
+
+        best_gain, best_candidate, run_out_of_time = handle_best_candidate(
+            current_gain=current_gain,
+            best_gain=best_gain,
+            X=X,
+            best_candidate=best_candidate,
+            factors_names=factors_names,
+            verbose=verbose,
+            ndigits=ndigits,
+            max_runtime_mins=max_runtime_mins,
+            start_time=start_time,
+            min_relevance_gain=min_relevance_gain,
+        )
+
+        if run_out_of_time:
+            break
+
+    for key, value in entropy_cache_dict.items():
+        entropy_cache[key] = value
+    for key, value in cached_cond_MIs_dict.items():
+        cached_cond_MIs[key] = value
+
+    if verbose:
+        logger.info(f"Batch finished.")
+
+    return best_gain, best_candidate, partial_gains, expected_gains, cached_MIs, cached_cond_MIs, entropy_cache
+
+
+def handle_best_candidate(
+    current_gain: float,
+    best_gain: float,
+    X: Sequence,
+    best_candidate: Sequence,
+    factors_names: list,
+    verbose: int = 1,
+    ndigits: int = 5,
+    max_runtime_mins: int = None,
+    start_time: float = None,
+    min_relevance_gain: float = None,
+):
+    # ---------------------------------------------------------------------------------------------------------------
+    # Save best known candidate, to be able to use early stopping
+    # ---------------------------------------------------------------------------------------------------------------
+
+    run_out_of_time = False
+
+    if current_gain > best_gain:
+        best_candidate = X
+        best_gain = current_gain
+        if verbose > 1:
+            logger.info(
+                f"\t{get_candidate_name(best_candidate,factors_names=factors_names)} is so far the best candidate with best_gain={best_gain:.{ndigits}f}"
+            )
+    else:
+        if min_relevance_gain and verbose > 1 and current_gain > min_relevance_gain:
+            logger.info(f"\t\t{get_candidate_name(X,factors_names=factors_names)} current_gain={current_gain:.{ndigits}f}")
+
+    if max_runtime_mins and not run_out_of_time:
+        run_out_of_time = (timer() - start_time) > max_runtime_mins * 60
+
+    return best_gain, best_candidate, run_out_of_time
+
+
+@njit()
+def evaluate_gain(
+    current_gain: float,
+    last_checked_k: int,
+    direct_gain: float,
+    X: Sequence[int],
+    y: Sequence[int],
+    nexisting: int,
+    best_gain: float,
+    factors_data: np.ndarray,
+    factors_nbins: np.ndarray,
+    selected_vars: list,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 2,
+    extra_knowledge_multipler: float = -1.0,
+    sink_threshold: float = -1.0,
+    entropy_cache: dict = None,
+    cached_cond_MIs: dict = None,
+    can_use_x_cache=False,
+    can_use_y_cache=False,
+    dtype=np.int32,
+    confidence_mode: bool = False,
+) -> tuple:
+
+    positive_mode = False
+    stopped_early = False
+    sink_reasons = None
+
+    k = 0
+    for interactions_order in range(max_veteranes_interactions_order):
+        combs = generate_combinations_recursive_njit(np.array(selected_vars, dtype=np.int32), interactions_order + 1)[::-1]
+        # if X==(425,): logger.info(f"\t combs={combs}")
+
+        for Z in combs:
+
+            if k > last_checked_k:
+                if confidence_mode and count_cand_nbins(Z, factors_nbins) > MAX_CONFIRMATION_CAND_NBINS:
+                    additional_knowledge = 0.0  # this is needed to skip checking agains hi cardinality approved factors
+                else:
+                    if mrmr_relevance_algo == "fleuret":
+
+                        # ---------------------------------------------------------------------------------------------------------------
+                        # additional_knowledge = I (X ;Y | Z ) = H(X, Z) + H(Y, Z) - H(Z) - H(X, Y, Z)
+                        # I (X,Z) would be entropy_x + entropy_z - entropy_xz.
+                        # ---------------------------------------------------------------------------------------------------------------
+
+                        key_found = False
+                        if not confidence_mode:
+                            key = arr2str(X) + "_" + arr2str(Z)
+                            if key in cached_cond_MIs:
+                                additional_knowledge = cached_cond_MIs[key]
+                                # if X==(425,): logger.info(f"\t additional_knowledge from {Z} found to be {additional_knowledge}, k={k}, last_checked_k={last_checked_k}")
+                                key_found = True
+
+                        if not key_found:
+
+                            additional_knowledge = conditional_mi(
+                                factors_data=factors_data,
+                                x=X,
+                                y=y,
+                                z=Z,
+                                var_is_nominal=None,
+                                factors_nbins=factors_nbins,
+                                entropy_cache=entropy_cache,
+                                can_use_x_cache=can_use_x_cache,
+                                can_use_y_cache=can_use_y_cache,
+                                dtype=dtype,
+                            )
+
+                            if nexisting > 0:
+                                additional_knowledge = additional_knowledge ** (nexisting + 1)
+                            # else:
+                            #    if len(X) > 1:
+                            #        additional_knowledge = additional_knowledge ** (len(X) + 1)
+
+                            if not confidence_mode:
+                                cached_cond_MIs[key] = additional_knowledge
+
+                            # if X==(425,): logger.info(f"\t additional_knowledge from {Z}={additional_knowledge}, k={k}, last_checked_k={last_checked_k}")
+
+                    # ---------------------------------------------------------------------------------------------------------------
+                    # Account for possible extra knowledge from conditioning on Z?
+                    # that must update best_gain globally. log such cases. Note that we do not guarantee finding them in order,
+                    # but they are too precious to ignore. Adding this will also allow to skip higher order interactions
+                    # containing all of already approved candidates.
+                    # ---------------------------------------------------------------------------------------------------------------
+
+                    if extra_knowledge_multipler > 0 and additional_knowledge > direct_gain * extra_knowledge_multipler:
+                        bwarn = False
+                        if not positive_mode:
+                            current_gain = additional_knowledge
+                            positive_mode = True
+                            bwarn = True
+                        else:
+                            # rare chance that a candidate has many excellent relationships
+                            if additional_knowledge > current_gain:
+                                current_gain = additional_knowledge
+                                bwarn = True
+
+                        # if bwarn:
+                        #    if verbose:
+                        #        if current_gain > best_gain:
+                        #            logger.info(
+                        #                f"\tCandidate {get_candidate_name(X,factors_names=factors_names)} together with factor {get_candidate_name(Z,factors_names=factors_names)} has synergetic influence {additional_knowledge:{ndigits}f} (direct MI={direct_gain:{ndigits}f})"
+                        #            )
+
+                if not positive_mode and (additional_knowledge < current_gain):
+
+                    current_gain = additional_knowledge
+
+                    if best_gain is not None and current_gain <= best_gain:
+
+                        # ---------------------------------------------------------------------------------------------------------------
+                        # No point checking other Zs, 'cause current_gain already won't be better than the best_gain
+                        # (if best_gain was estimated confidently, which we'll check at the end.)
+                        # ---------------------------------------------------------------------------------------------------------------
+
+                        # let's also fix what Z caused X (the most) to sink
+
+                        if sink_threshold > -1 and current_gain < sink_threshold:
+                            sink_reasons = Z
+
+                        stopped_early = True
+                        return stopped_early, current_gain, k, sink_reasons
+            k += 1
+
+    return stopped_early, current_gain, k - 1, sink_reasons
+
+
+def evaluate_candidate(
+    cand_idx: int,
+    X: Sequence[int],
+    y: Sequence[int],
+    nexisting: int,
+    best_gain: float,
+    factors_data: np.ndarray,
+    factors_nbins: np.ndarray,
+    factors_names: Sequence[str],
+    expected_gains: np.ndarray,
+    partial_gains: dict,
+    selected_vars: list,
+    baseline_npermutations: int,
+    classes_y: np.ndarray = None,
+    classes_y_safe: np.ndarray = None,
+    freqs_y: np.ndarray = None,
+    freqs_y_safe: np.ndarray = None,
+    use_gpu: bool = True,
+    cached_MIs: dict = None,
+    cached_confident_MIs: dict = None,
+    cached_cond_MIs: dict = None,
+    entropy_cache: dict = None,
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    max_veteranes_interactions_order: int = 2,
+    extra_knowledge_multipler: float = -1.0,
+    sink_threshold: float = -1.0,
+    dtype=np.int32,
+    verbose: int = 1,
+    ndigits: int = 5,
+) -> None:
+    # logger.info("In evaluate_candidate")
+    sink_reasons = set()
+
+    # ---------------------------------------------------------------------------------------------------------------
+    # Is this candidate any good for target 1-vs-1?
+    # ---------------------------------------------------------------------------------------------------------------
+
+    if X in cached_confident_MIs:  # use cached_confident_MIs first here as they are more reliable. (but not fill them)
+        direct_gain = cached_confident_MIs[X]
+    else:
+        if X in cached_MIs:
+            direct_gain = cached_MIs[X]
+        else:
+            if use_gpu:
+                direct_gain, _ = mi_direct_gpu(
+                    factors_data,
+                    x=X,
+                    y=y,
+                    factors_nbins=factors_nbins,
+                    classes_y=classes_y,
+                    classes_y_safe=classes_y_safe,
+                    freqs_y=freqs_y,
+                    freqs_y_safe=freqs_y_safe,
+                    min_nonzero_confidence=1.0,
+                    npermutations=baseline_npermutations,
+                    dtype=dtype,
+                )
+            else:
+                # logger.info("Computing mi_direct")
+                direct_gain, _ = mi_direct(
+                    factors_data,
+                    x=X,
+                    y=y,
+                    factors_nbins=factors_nbins,
+                    classes_y=classes_y,
+                    classes_y_safe=classes_y_safe,
+                    freqs_y=freqs_y,
+                    min_nonzero_confidence=1.0,
+                    npermutations=baseline_npermutations,
+                    dtype=dtype,
+                )
+                # logger.info("Computed mi_direct")
+            cached_MIs[X] = direct_gain
+
+    if direct_gain > 0:
+        if selected_vars:
+
+            # ---------------------------------------------------------------------------------------------------------------
+            # Some factors already selected.
+            # best gain from including X is the minimum of I (X ;Y | Z ) over every Z in already selected_vars.
+            # but imaging some variable is correlated to every real predictor plus has random noise. It's real value is zero.
+            # only computing I (X ;Y | Z ) will still leave significant impact. but if we sum I(X,Z) over Zs we'll see it shares
+            # all its knowledge with the rest of factors and has no value by itself. But to see that, we must already have all real factors included in S.
+            # otherwise, such 'connected-to-all' trash variables will dominate the scene. So how to handle them?
+            # Solution is to compute sum(X,Z) not only at the step of adding Z, but to repeat this procedure for all Zs once new X is added.
+            # Maybe some Zs will render useless by adding that new X.
+            # ---------------------------------------------------------------------------------------------------------------
+
+            if cand_idx in partial_gains:
+                current_gain, last_checked_k = partial_gains[cand_idx]
+                # if X==(425,): logger.info(f"\t cand_idx in partial_gains: {current_gain, last_checked_k}")
+                if best_gain is not None and (current_gain <= best_gain):
+                    return current_gain, sink_reasons
+            else:
+                current_gain = LARGE_CONST
+                last_checked_k = -1
+
+            stopped_early, current_gain, k, sink_reasons = evaluate_gain(
+                current_gain=current_gain,
+                last_checked_k=last_checked_k,
+                direct_gain=direct_gain,
+                X=X,
+                y=y,
+                nexisting=nexisting,
+                best_gain=best_gain,
+                factors_data=factors_data,
+                factors_nbins=factors_nbins,
+                selected_vars=selected_vars,
+                mrmr_relevance_algo=mrmr_relevance_algo,
+                mrmr_redundancy_algo=mrmr_redundancy_algo,
+                max_veteranes_interactions_order=max_veteranes_interactions_order,
+                extra_knowledge_multipler=extra_knowledge_multipler,
+                sink_threshold=sink_threshold,
+                entropy_cache=entropy_cache,
+                cached_cond_MIs=cached_cond_MIs,
+                can_use_x_cache=True,
+                can_use_y_cache=True,
+            )
+            # if X==(425,): logger.info(f"\t stopped_early, current_gain, k, sink_reasons={stopped_early, current_gain, k, sink_reasons}")
+
+            partial_gains[cand_idx] = current_gain, k
+            if not stopped_early:  # there was no break. current_gain computed fully. this line was (and most likely should be) commented out.
+                expected_gains[cand_idx] = current_gain
+        else:
+            # no factors selected yet. current_gain is just direct_gain
+            current_gain = direct_gain
+            expected_gains[cand_idx] = current_gain
+    else:
+        current_gain = 0
+
+    return current_gain, sink_reasons
+
+
+def test(a):
+    logger.info("test")
+    return 0
+
+
+# @njit()
+def screen_predictors(
+    # factors
+    factors_data: np.ndarray,
+    factors_nbins: Sequence[int],
+    factors_names: Sequence[str] = None,
+    factors_names_to_use: Sequence[str] = None,
+    factors_to_use: Sequence[int] = None,
+    # targets
+    targets_data: np.ndarray = None,
+    targets_nbins: Sequence[int] = None,
+    y: Sequence[int] = None,
+    # algorithm
+    mrmr_relevance_algo: str = "fleuret",
+    mrmr_redundancy_algo: str = "fleuret",
+    reduce_gain_on_subelement_chosen: bool = True,
+    # performance
+    extra_x_shuffling: bool = True,
+    dtype=np.int32,
+    random_seed: int = None,
+    use_gpu: bool = False,
+    n_workers: int = 1,
+    # confidence
+    min_occupancy: int = None,
+    min_nonzero_confidence: float = 0.99,
+    full_npermutations: int = 1_000,
+    baseline_npermutations: int = 100,
+    # stopping conditions
+    min_relevance_gain: float = 0.00001,
+    max_consec_unconfirmed: int = 30,
+    max_runtime_mins: int = None,
+    interactions_min_order: int = 1,
+    interactions_max_order: int = 1,
+    interactions_order_reversed: bool = False,
+    max_veteranes_interactions_order: int = 1,
+    only_unknown_interactions: bool = False,
+    # verbosity and formatting
+    verbose: int = 1,
+    ndigits: int = 5,
+    parallel_kwargs=dict(max_nbytes=MAX_JOBLIB_NBYTES),
+) -> float:
+    """Finds best predictors for the target.
+    x must be n-x-m array of integers (ordinal encoded)
+    Parameters:
+        full_npermutations: when computing every MI, repeat calculations with randomly shuffled indices that many times
+        min_nonzero_confidence: if in random permutation tests this or higher % of cases had worse current_gain than original, current_gain value is considered valid, otherwise, it's set to zero.
+    Returns:
+        1) best set of non-redundant single features influencing the target
+        2) subsets of size 2..interactions_max_order influencing the target. Such subsets will be candidates for predictors and OtherVarsEncoding.
+        3) all 1-vs-1 influencers (not necessarily in mRMR)
+    Parameters:
+        only_unknown_interactions: True for speed, False for completeness of higher order interactions discovery.
+        verbose: int  1=log only important info,>1=also log additional details
+        mrmr_relevance_algo:str
+                        "fleuret": max(min(I(X,Y|Z)),max(I(X,Y|Z)-I(X,Y))) Possible to use n-way interactions here.
+                        "pld": I(X,Y)
+        mrmr_redundancy_algo:str
+                        "fleuret": 0 ('cause redundancy already accounted for)
+                        "pld_max": max(I(veterane,cand)) Possible to use n-way interactions here.
+                        "pld_mean": mean(I(veterane,cand)) Possible to use n-way interactions here.
+    """
+    # ---------------------------------------------------------------------------------------------------------------
+    # Input checks
+    # ---------------------------------------------------------------------------------------------------------------
+
+    assert mrmr_relevance_algo in ("fleuret", "pld")
+    assert mrmr_redundancy_algo in ("fleuret", "pld_max", "pld_mean")
+
+    assert len(factors_data) >= 10
+    if targets_data is None:
+        targets_data = factors_data
+    else:
+        assert len(factors_data) == len(targets_data)
+
+    if targets_nbins is None:
+        targets_nbins = factors_nbins
+
+    assert targets_data.shape[1] == len(targets_nbins)
+    assert factors_data.shape[1] == len(factors_nbins)
+
+    if len(factors_names) == 0:
+        factors_names = ["F" + str(i) for i in range(len(factors_data))]
+    else:
+        assert factors_data.shape[1] == len(factors_names)
+
+    # warn if inputs are identical to targets
+    if factors_data.shape == targets_data.shape:
+        if np.shares_memory(factors_data, targets_data):
+            if factors_to_use is None and factors_names_to_use is None:
+                if verbose > 1:
+                    logger.info(
+                        "factors_data and targets_data share the same memory. factors_to_use will be determined automatically to not contain any target columns."
+                    )
+                x = set(range(factors_data.shape[1])) - set(y)
+            else:
+                if factors_to_use is not None:
+                    x = set(factors_to_use) - set(y)
+                    if verbose > 1:
+                        logger.info(f"Using only {len(factors_to_use):_} predefined factors: {factors_to_use}")
+                else:
+                    x = [i for i, col_name in enumerate(factors_names) if col_name in factors_names_to_use and i != y]
+                    if verbose > 1:
+                        logger.info(f"Using only {len(factors_names_to_use):_} predefined factors: {factors_names_to_use}")
+        else:
+
+            assert not set(y).issubset(set(x))
+
+    # ---------------------------------------------------------------------------------------------------------------
+    # Inits
+    # ---------------------------------------------------------------------------------------------------------------
+
+    start_time = timer()
+    run_out_of_time = False
+
+    if random_seed is not None:
+        np.random.seed(random_seed)
+        cp.random.seed(random_seed)
+        set_numba_random_seed(random_seed)
+
+    max_failed = int(full_npermutations * (1 - min_nonzero_confidence))
+    if max_failed <= 1:
+        max_failed = 1
+
+    selected_interactions_vars = []
+    selected_vars = []  # stores just indices. can't use set 'cause the order is important for efficient computing
+    predictors = []  # stores more details.
+
+    cached_MIs = dict()
+    # cached_cond_MIs = dict()
+    cached_confident_MIs = dict()
+    entropy_cache = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+    cached_cond_MIs = numba.typed.Dict.empty(
+        key_type=types.unicode_type,
+        value_type=types.float64,
+    )
+
+    data_copy = factors_data.copy()
+
+    classes_y, freqs_y, _ = merge_vars(factors_data=factors_data, vars_indices=y, var_is_nominal=None, factors_nbins=factors_nbins, dtype=dtype)
+    classes_y_safe = classes_y.copy()
+
+    if use_gpu:
+        classes_y_safe = cp.asarray(classes_y.astype(np.int32))
+        freqs_y_safe = cp.asarray(freqs_y)
+    else:
+        freqs_y_safe = None
+
+    if n_workers and n_workers > 1:
+        #    global classes_y_memmap
+        #    classes_y_memmap = mem_map_array(obj=classes_y, file_name="classes_y", mmap_mode="r")
+        if verbose:
+            logger.info("Starting parallel pool...")
+        workers_pool = Parallel(n_jobs=n_workers, **parallel_kwargs)
+        workers_pool(delayed(test)(i) for i in range(n_workers))
+    else:
+        workers_pool = None
+
+    subsets = range(interactions_min_order, interactions_max_order + 1)
+    if interactions_order_reversed:
+        subsets = subsets[::-1]
+
+    if verbose:
+        logger.info(
+            f"Starting work with full_npermutations={full_npermutations:_}, min_nonzero_confidence={min_nonzero_confidence:.{ndigits}f}, max_failed={max_failed:_}"
+        )
+
+    for interactions_order in (subsets_pbar := tqdmu(subsets, desc="Interactions order", leave=False)):
+
+        if run_out_of_time:
+            break
+        subsets_pbar.set_description(f"{interactions_order}-way interactions")
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Generate candidates
+        # ---------------------------------------------------------------------------------------------------------------
+
+        candidates = [tuple(el) for el in combinations(x, interactions_order)]
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Subset level inits
+        # ---------------------------------------------------------------------------------------------------------------
+
+        total_disproved = 0
+        total_checked = 0
+        partial_gains = {}
+        added_candidates = set()
+        failed_candidates = set()
+        nconsec_unconfirmed = 0
+
+        for n_confirmed_predictors in (predictors_pbar := tqdmu(range(len(candidates)), leave=False, desc="Confirmed predictors")):
+            # if n_confirmed_predictors>4: n_jobs=1
+            if run_out_of_time:
+                break
+
+            # ---------------------------------------------------------------------------------------------------------------
+            # Find candidate X with the highest current_gain given already selected factors
+            # ---------------------------------------------------------------------------------------------------------------
+
+            best_candidate = None
+            best_gain = min_relevance_gain - 1
+            expected_gains = np.zeros(len(candidates), dtype=np.float64)
+
+            while True:  # confirmation loop (by random permutations)
+
+                if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                    eval_start = timer()
+
+                feasible_candidates = []
+                for cand_idx, X in enumerate(candidates):
+                    skip, nexisting = should_skip_candidate(
+                        cand_idx=cand_idx,
+                        X=X,
+                        interactions_order=interactions_order,
+                        only_unknown_interactions=only_unknown_interactions,
+                        failed_candidates=failed_candidates,
+                        added_candidates=added_candidates,
+                        expected_gains=expected_gains,
+                        selected_vars=selected_vars,
+                        selected_interactions_vars=selected_interactions_vars,
+                    )
+                    if skip:
+                        continue
+
+                    feasible_candidates.append((cand_idx, X, nexisting if reduce_gain_on_subelement_chosen else 0))
+
+                if n_workers and n_workers > 1 and len(feasible_candidates) > NMAX_NONPARALLEL_ITERS:
+
+                    res = workers_pool(
+                        delayed(evaluate_candidates)(
+                            workload=workload,
+                            y=y,
+                            best_gain=best_gain,
+                            factors_data=factors_data,
+                            factors_nbins=factors_nbins,
+                            factors_names=factors_names,
+                            classes_y=classes_y,
+                            freqs_y=freqs_y,
+                            use_gpu=use_gpu,
+                            freqs_y_safe=freqs_y_safe,
+                            partial_gains=partial_gains,
+                            baseline_npermutations=baseline_npermutations,
+                            mrmr_relevance_algo=mrmr_relevance_algo,
+                            mrmr_redundancy_algo=mrmr_redundancy_algo,
+                            max_veteranes_interactions_order=max_veteranes_interactions_order,
+                            selected_vars=selected_vars,
+                            cached_MIs=cached_MIs,
+                            cached_confident_MIs=cached_confident_MIs,
+                            cached_cond_MIs=dict(cached_cond_MIs),
+                            entropy_cache=dict(entropy_cache),
+                            max_runtime_mins=max_runtime_mins,
+                            start_time=start_time,
+                            min_relevance_gain=min_relevance_gain,
+                            verbose=verbose,
+                            ndigits=ndigits,
+                        )
+                        for workload in split_list_into_chunks(feasible_candidates, max(1, len(feasible_candidates) // n_workers))
+                    )
+
+                    for (
+                        worker_best_gain,
+                        worker_best_candidate,
+                        worker_partial_gains,
+                        worker_expected_gains,
+                        worker_cached_MIs,
+                        worker_cached_cond_MIs,
+                        worker_entropy_cache,
+                    ) in res:
+
+                        if worker_best_gain > best_gain:
+                            best_candidate = worker_best_candidate
+                            best_gain = worker_best_gain
+
+                        # sync caches
+                        for local_storage, global_storage in [
+                            (worker_expected_gains, expected_gains),
+                            (worker_cached_MIs, cached_MIs),
+                            (worker_cached_cond_MIs, cached_cond_MIs),
+                            (worker_entropy_cache, entropy_cache),
+                        ]:
+                            for key, value in local_storage.items():
+                                global_storage[key] = value
+
+                        for cand_idx, (worker_current_gain, worker_z_idx) in worker_partial_gains.items():
+                            if cand_idx in partial_gains:
+                                current_gain, z_idx = partial_gains[cand_idx]
+                            else:
+                                z_idx = -2
+                            if worker_z_idx > z_idx:
+                                partial_gains[cand_idx] = (worker_current_gain, worker_z_idx)
+
+                    if max_runtime_mins and not run_out_of_time:
+                        run_out_of_time = (timer() - start_time) > max_runtime_mins * 60
+                        if run_out_of_time:
+                            logging.info(f"Time limit exhausted. Finalizing the search...")
+                            break
+
+                else:
+                    for cand_idx, X, nexisting in (candidates_pbar := tqdmu(feasible_candidates, leave=False, desc="Candidates")):
+
+                        # tmp_idx=X[0]
+                        # print(X,factors_nbins[tmp_idx],factors_names[tmp_idx])
+                        # from time import sleep
+                        # sleep(5)
+
+                        current_gain, sink_reasons = evaluate_candidate(
+                            cand_idx=cand_idx,
+                            X=X,
+                            y=y,
+                            nexisting=nexisting,
+                            best_gain=best_gain,
+                            factors_data=factors_data,
+                            factors_nbins=factors_nbins,
+                            factors_names=factors_names,
+                            classes_y=classes_y,
+                            classes_y_safe=classes_y_safe,
+                            freqs_y=freqs_y,
+                            use_gpu=use_gpu,
+                            freqs_y_safe=freqs_y_safe,
+                            partial_gains=partial_gains,
+                            baseline_npermutations=baseline_npermutations,
+                            mrmr_relevance_algo=mrmr_relevance_algo,
+                            mrmr_redundancy_algo=mrmr_redundancy_algo,
+                            max_veteranes_interactions_order=max_veteranes_interactions_order,
+                            expected_gains=expected_gains,
+                            selected_vars=selected_vars,
+                            cached_MIs=cached_MIs,
+                            cached_confident_MIs=cached_confident_MIs,
+                            cached_cond_MIs=cached_cond_MIs,
+                            entropy_cache=entropy_cache,
+                            verbose=verbose,
+                            ndigits=ndigits,
+                        )
+
+                        best_gain, best_candidate, run_out_of_time = handle_best_candidate(
+                            current_gain=current_gain,
+                            best_gain=best_gain,
+                            X=X,
+                            best_candidate=best_candidate,
+                            factors_names=factors_names,
+                            max_runtime_mins=max_runtime_mins,
+                            start_time=start_time,
+                            min_relevance_gain=min_relevance_gain,
+                            verbose=verbose,
+                            ndigits=ndigits,
+                        )
+
+                        if run_out_of_time:
+                            if verbose:
+                                logging.info(f"Time limit exhausted. Finalizing the search...")
+                            break
+
+                if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                    logger.info(f"evaluate_candidates took {timer() - eval_start:.1f} sec.")
+
+                if best_gain < min_relevance_gain:
+                    if verbose:
+                        logger.info("Minimum expected gain reached or no candidates to check anymore.")
+                    break  # exit confirmation while loop
+
+                # ---------------------------------------------------------------------------------------------------------------
+                # Now need to confirm best expected gain with a permutation test
+                # ---------------------------------------------------------------------------------------------------------------
+
+                cand_confirmed = False
+                any_cand_considered = False
+                for n, next_best_candidate_idx in enumerate(np.argsort(expected_gains)[::-1]):
+                    next_best_gain = expected_gains[next_best_candidate_idx]
+                    # logger.info(f"{n}, {next_best_gain}, {min_relevance_gain}")
+                    if next_best_gain >= min_relevance_gain:  # only can consider here candidates fully checked against every Z
+
+                        X = candidates[next_best_candidate_idx]
+
+                        # ---------------------------------------------------------------------------------------------------------------
+                        # For cands other than the top one, if best partial gain <= next_best_gain, we can proceed with confirming next_best_gain.
+                        # else we have to recompute partial gains
+                        # ---------------------------------------------------------------------------------------------------------------
+
+                        if n > 0:
+                            best_partial_gain, best_key = find_best_partial_gain(
+                                partial_gains=partial_gains,
+                                failed_candidates=failed_candidates,
+                                added_candidates=added_candidates,
+                                candidates=candidates,
+                                selected_vars=selected_vars,
+                            )
+
+                            if best_partial_gain > next_best_gain:
+                                best_gain = next_best_gain
+                                if verbose > 1:
+                                    print(
+                                        "Have no best_candidate anymore. Need to recompute partial gains. best_partial_gain of candidate",
+                                        get_candidate_name(candidates[best_key], factors_names=factors_names),
+                                        "was",
+                                        best_partial_gain,
+                                    )
+                                break  # out of best candidates confirmation, to retry all cands evaluation
+
+                        any_cand_considered = True
+
+                        if full_npermutations:
+
+                            if verbose > 1:
+                                logger.info(
+                                    f"confirming candidate {get_candidate_name(X, factors_names=factors_names)}, next_best_gain={next_best_gain:.{ndigits}f}"
+                                )
+
+                            # ---------------------------------------------------------------------------------------------------------------
+                            # Compute confidence by bootstrap
+                            # ---------------------------------------------------------------------------------------------------------------
+
+                            total_checked += 1
+                            if X in cached_confident_MIs:
+                                bootstrapped_gain, confidence = cached_confident_MIs[X]
+                            else:
+                                if use_gpu:
+                                    bootstrapped_gain, confidence = mi_direct_gpu(
+                                        factors_data,
+                                        x=X,
+                                        y=y,
+                                        factors_nbins=factors_nbins,
+                                        classes_y=classes_y,
+                                        freqs_y=freqs_y,
+                                        freqs_y_safe=freqs_y_safe,
+                                        classes_y_safe=classes_y_safe,
+                                        min_nonzero_confidence=min_nonzero_confidence,
+                                        npermutations=full_npermutations,
+                                    )
+                                else:
+                                    if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                                        eval_start = timer()
+                                    bootstrapped_gain, confidence = mi_direct(
+                                        factors_data,
+                                        x=X,
+                                        y=y,
+                                        factors_nbins=factors_nbins,
+                                        classes_y=classes_y,
+                                        freqs_y=freqs_y,
+                                        classes_y_safe=classes_y_safe,
+                                        min_nonzero_confidence=min_nonzero_confidence,
+                                        npermutations=full_npermutations,
+                                        n_workers=n_workers,
+                                        workers_pool=workers_pool,
+                                        parallel_kwargs=parallel_kwargs,
+                                    )
+                                    if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                                        logger.info(f"mi_direct bootstrapped eval took {timer() - eval_start:.1f} sec.")
+                                cached_confident_MIs[X] = bootstrapped_gain, confidence
+                        else:
+                            if X in cached_confident_MIs:
+                                bootstrapped_gain, confidence = cached_confident_MIs[X]
+                            else:
+                                bootstrapped_gain, confidence = next_best_gain, 1.0
+
+                        if full_npermutations and bootstrapped_gain > 0 and selected_vars:  # additional check of Fleuret criteria
+
+                            if count_cand_nbins(X, factors_nbins) <= MAX_CONFIRMATION_CAND_NBINS:
+
+                                skip_cand = [(subel in selected_vars) for subel in X]
+                                nexisting = sum(skip_cand)
+
+                                # ---------------------------------------------------------------------------------------------------------------
+                                # external bootstrapped recheck. is minimal MI of candidate X with Y given all current Zs THAT BIG as next_best_gain?
+                                # ---------------------------------------------------------------------------------------------------------------
+
+                                if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                                    eval_start = timer()
+
+                                if n_workers and n_workers > 1 and full_npermutations > NMAX_NONPARALLEL_ITERS:
+                                    bootstrapped_gain, confidence, parallel_entropy_cache = get_fleuret_criteria_confidence_parallel(
+                                        data_copy=data_copy,
+                                        factors_nbins=factors_nbins,
+                                        x=X,
+                                        y=y,
+                                        selected_vars=selected_vars,
+                                        bootstrapped_gain=next_best_gain,
+                                        npermutations=full_npermutations,
+                                        max_failed=max_failed,
+                                        nexisting=nexisting,
+                                        mrmr_relevance_algo=mrmr_relevance_algo,
+                                        mrmr_redundancy_algo=mrmr_redundancy_algo,
+                                        max_veteranes_interactions_order=max_veteranes_interactions_order,
+                                        cached_cond_MIs=cached_cond_MIs,
+                                        entropy_cache=entropy_cache,
+                                        extra_x_shuffling=extra_x_shuffling,
+                                        n_workers=n_workers,
+                                        workers_pool=workers_pool,
+                                        parallel_kwargs=parallel_kwargs,
+                                    )
+                                    for key, value in parallel_entropy_cache.items():
+                                        entropy_cache[key] = value
+                                else:
+                                    nfailed, nchecked = get_fleuret_criteria_confidence(
+                                        data_copy=data_copy,
+                                        factors_nbins=factors_nbins,
+                                        x=X,
+                                        y=y,
+                                        selected_vars=selected_vars,
+                                        bootstrapped_gain=next_best_gain,
+                                        npermutations=full_npermutations,
+                                        max_failed=max_failed,
+                                        nexisting=nexisting,
+                                        mrmr_relevance_algo=mrmr_relevance_algo,
+                                        mrmr_redundancy_algo=mrmr_redundancy_algo,
+                                        max_veteranes_interactions_order=max_veteranes_interactions_order,
+                                        cached_cond_MIs=cached_cond_MIs,
+                                        entropy_cache=entropy_cache,
+                                        extra_x_shuffling=extra_x_shuffling,
+                                    )
+                                    # logger.info(f"nfailed={nfailed}, nchecked={nchecked}")
+                                    confidence = 1 - nfailed / nchecked
+                                    if nfailed >= max_failed:
+                                        bootstrapped_gain = 0.0
+
+                                if verbose and len(selected_vars) < MAX_ITERATIONS_TO_TRACK:
+                                    logger.info(f"get_fleuret_criteria_confidence bootstrapped eval took {timer() - eval_start:.1f} sec.")
+
+                        # ---------------------------------------------------------------------------------------------------------------
+                        # Report this particular best candidate
+                        # ---------------------------------------------------------------------------------------------------------------
+
+                        if bootstrapped_gain > 0:
+
+                            nconsec_unconfirmed = 0
+
+                            # ---------------------------------------------------------------------------------------------------------------
+                            # Bad confidence can make us consider other candidates!
+                            # ---------------------------------------------------------------------------------------------------------------
+
+                            next_best_gain = next_best_gain * confidence
+                            expected_gains[next_best_candidate_idx] = next_best_gain
+
+                            best_partial_gain, best_key = find_best_partial_gain(
+                                partial_gains=partial_gains,
+                                failed_candidates=failed_candidates,
+                                added_candidates=added_candidates,
+                                candidates=candidates,
+                                selected_vars=selected_vars,
+                                skip_indices=(next_best_candidate_idx,),
+                            )
+                            if best_partial_gain > next_best_gain:
+                                if verbose > 1:
+                                    logger.info(
+                                        f"\t\tCandidate's lowered confidence {confidence} requires re-checking other candidates, as now its expected gain is only {next_best_gain:.{ndigits}f}, vs {best_partial_gain:.{ndigits}f}, of {get_candidate_name(candidates[best_key], factors_names=factors_names)}"
+                                    )
+                                break  # out of best candidates confirmation, to retry all cands evaluation
+                            else:
+                                cand_confirmed = True
+                                if full_npermutations:
+                                    if verbose > 1:
+                                        logger.info(f"\t\tconfirmed with confidence {confidence:.{ndigits}f}")
+                                break  # out of best candidates confirmation, to add candidate to the list, and go to more candidates
+                        else:
+                            expected_gains[next_best_candidate_idx] = 0.0
+                            failed_candidates.add(next_best_candidate_idx)
+                            if verbose > 1:
+                                logger.info(f"\t\tconfirmation failed with confidence {confidence:.{ndigits}f}")
+
+                            nconsec_unconfirmed += 1
+                            total_disproved += 1
+                            if max_consec_unconfirmed and (nconsec_unconfirmed > max_consec_unconfirmed):
+                                if verbose:
+                                    logger.info(f"Maximum consecutive confirmation failures reached.")
+                                break  # out of best candidates confirmation, to finish the level
+
+                    else:  # next_best_gain = 0
+                        break  # nothing wrong, just retry all cands evaluation
+
+                # ---------------------------------------------------------------------------------------------------------------
+                # Let's act upon results of the permutation test
+                # ---------------------------------------------------------------------------------------------------------------
+
+                if cand_confirmed:
+                    added_candidates.add(next_best_candidate_idx)  # so it won't be selected again
+                    best_candidate = X
+                    best_gain = next_best_gain
+                    break  # exit confirmation while loop
+                else:
+                    if not any_cand_considered:
+                        best_gain = min_relevance_gain - 1
+                        if verbose:
+                            logger.info("No more candidates to confirm.")
+                        break  # exit confirmation while loop
+                    else:
+                        best_gain = min_relevance_gain - 1
+                        if max_consec_unconfirmed and (nconsec_unconfirmed > max_consec_unconfirmed):
+                            break  # exit confirmation while loop
+                        else:
+                            pass  # retry all cands evaluation
+
+            # ---------------------------------------------------------------------------------------------------------------
+            # Add best candidate to the list, if criteria are met, or proceed to the next interactions_order
+            # ---------------------------------------------------------------------------------------------------------------
+
+            if best_gain >= (min_relevance_gain if interactions_order == 1 else min_relevance_gain ** (1 / (interactions_order + 1))):
+                for var in best_candidate:
+                    if var not in selected_vars:
+                        selected_vars.append(var)
+                        if interactions_order > 1:
+                            selected_interactions_vars.append(var)
+                cand_name = get_candidate_name(best_candidate, factors_names=factors_names)
+
+                res = {"name": cand_name, "indices": best_candidate, "gain": best_gain}
+                if full_npermutations:
+                    res["confidence"] = confidence
+                predictors.append(res)
+
+                if verbose:
+                    mes = f"Added new predictor {cand_name} to the list with expected gain={best_gain:.{ndigits}f}"
+                    if full_npermutations:
+                        mes += f" and confidence={confidence:.3f}"
+                    logger.info(mes)
+
+            else:
+                if verbose:
+                    if total_checked > 0:
+                        details = f" Total candidates disproved: {total_disproved:_}/{total_checked:_} ({total_disproved*100/total_checked:.2f}%)"
+                    else:
+                        details = ""
+                    logger.info(f"Can't add anything valuable anymore for interactions_order={interactions_order}.{details}")
+                predictors_pbar.total = len(candidates)
+                predictors_pbar.close()
+                break
+
+    # postprocess_candidates(selected_vars)
+    # print(caching_hits_xyz, caching_hits_z, caching_hits_xz, caching_hits_yz)
+    if verbose:
+        logger.info(f"Finished.")
+
+    any_influencing = set()
+    for vars_combination, (bootstrapped_gain, confidence) in cached_confident_MIs.items():
+        if bootstrapped_gain > 0:
+            any_influencing.update(set(vars_combination))
+
+    """Выбрать группы/кластера скоррелированных факторов. Вместо использования 1 самого крутого, рассмотреть средние от всех
+        отброшенных факторов, имеющих высокое прямое direct_MI с таргетом, но близкое к 0 additional_knowledge с каждым 
+        "победившим" фактором, проверить, не могут ли они усилить свой победивший фактор через ансамблирование. Усиление в смысле
+        среднего и вариативности MI с таргетом на бутстрепе подвыборок?
+
+        key = arr2str(X) + "_" + arr2str(Z)
+        if key in cached_cond_MIs:
+            additional_knowledge = cached_cond_MIs[key]                        
+    """
+    return selected_vars, predictors, any_influencing, entropy_cache, cached_MIs, cached_confident_MIs, cached_cond_MIs, classes_y, classes_y_safe, freqs_y
+
+
+@njit()
+def count_cand_nbins(X, factors_nbins) -> int:
+    sum_cand_nbins = 0
+    for factor in X:
+        sum_cand_nbins += factors_nbins[factor]
+    return sum_cand_nbins
+
+
+def find_best_partial_gain(
+    partial_gains: dict, failed_candidates: set, added_candidates: set, candidates: list, selected_vars: list, skip_indices: tuple = ()
+) -> float:
+    best_partial_gain = -LARGE_CONST
+    best_key = None
+    for key, value in partial_gains.items():
+        if (key not in failed_candidates) and (key not in added_candidates) and (key not in skip_indices):
+            skip_cand = False
+            for subel in candidates[key]:
+                if subel in selected_vars:
+                    skip_cand = True  # the subelement or var itself is already selected
+                    break
+            if skip_cand:
+                continue
+            partial_gain, _ = value
+            if partial_gain > best_partial_gain:
+                best_partial_gain = partial_gain
+                best_key = key
+    return best_partial_gain, best_key
+
+
+def postprocess_candidates(
+    selected_vars: list,
+    factors_data: np.ndarray,
+    y: np.ndarray,
+    factors_nbins: np.ndarray,
+    classes_y: np.ndarray = None,
+    freqs_y: np.ndarray = None,
+    classes_y_safe: np.ndarray = None,
+    min_nonzero_confidence: float = 0.99999,
+    npermutations: int = 10_000,
+    interactions_max_order: int = 1,
+    ensure_target_influence: bool = True,
+    dtype=np.int32,
+    verbose: bool = True,
+    ndigits: int = 4,
+):
+    """Post-analysis of prescreened candidates.
+
+    1) repeat standard Fleuret screening process. maybe some vars will be removed when taken into account all other candidates.
+    2)
+    3) in the final set, compute for every factor
+        a) MI with every remaining predictor (and 2,3 way subsets)
+
+    """
+    # ---------------------------------------------------------------------------------------------------------------
+    # Repeat standard Fleuret screening process. maybe some vars will be removed when taken into account all other candidates.
+    # ---------------------------------------------------------------------------------------------------------------
+    for cand_idx, X, nexisting in (candidates_pbar := tqdmu(selected_vars, leave=False, desc="Finalizing Candidates")):
+        current_gain, sink_reasons = evaluate_candidate(
+            cand_idx=cand_idx,
+            X=X,
+            y=y,
+            nexisting=nexisting,
+            best_gain=best_gain,
+            factors_data=factors_data,
+            factors_nbins=factors_nbins,
+            factors_names=factors_names,
+            classes_y=classes_y,
+            classes_y_safe=classes_y_safe,
+            freqs_y=freqs_y,
+            use_gpu=use_gpu,
+            freqs_y_safe=freqs_y_safe,
+            partial_gains=partial_gains,
+            baseline_npermutations=baseline_npermutations,
+            mrmr_relevance_algo=mrmr_relevance_algo,
+            mrmr_redundancy_algo=mrmr_redundancy_algo,
+            max_veteranes_interactions_order=max_veteranes_interactions_order,
+            expected_gains=expected_gains,
+            selected_vars=selected_vars,
+            cached_MIs=cached_MIs,
+            cached_confident_MIs=cached_confident_MIs,
+            cached_cond_MIs=cached_cond_MIs,
+            entropy_cache=entropy_cache,
+            verbose=verbose,
+            ndigits=ndigits,
+        )
+    # ---------------------------------------------------------------------------------------------------------------
+    # Make sure with confidence that every candidate is related to the target
+    # ---------------------------------------------------------------------------------------------------------------
+
+    if ensure_target_influence:
+        removed = []
+        for X in tqdmu(selected_vars, desc="Ensuring target influence", leave=False):
+            bootstrapped_mi, confidence = mi_direct(
+                factors_data,
+                x=[X],
+                y=y,
+                factors_nbins=factors_nbins,
+                classes_y=classes_y,
+                freqs_y=freqs_y,
+                classes_y_safe=classes_y_safe,
+                min_nonzero_confidence=min_nonzero_confidence,
+                npermutations=npermutations,
+            )
+            if bootstrapped_mi == 0:
+                if verbose:
+                    print("Factor", X, "not related to target with confidence", confidence)
+                    removed.append(X)
+        selected_vars = [el for el in selected_vars if el not in removed]
+
+    # ---------------------------------------------------------------------------------------------------------------
+    # Repeat Fleuret process as many times as there is candidates left.
+    # This time account for possible interactions (fix bug in the professor's formula).
+    # ---------------------------------------------------------------------------------------------------------------
+
+    """
+    Compute redundancy stats for every X
+
+    кто связан с каким количеством других факторов, какое количество информации с ними разделяет, в % к своей собственной энтропии. 
+    Можно даже спуститься вниз по уровню и посчитать взвешенные суммы тех же метрик для его партнёров. 
+    Тем самым можно косвенно определить, какие фичи скорее всего просто сливные бачки, и попробовать их выбросить.  В итоге мы получим:
+
+    ценные фичи, которые ни с кем другим не связаны, кроме мусорных и таргета. они содержат уникальное знание;
+    потенциально мусорные X, которые связаны с множеством других, и шарят очень много общей инфы с другими факторами Z, при том, 
+    что эти другие факторы имеют много уникального знания о таргете помимо X: sum(I(Y;Z|X))>e;
+    все остальные "середнячки".    
+    """
+
+    entropies = {}
+    mutualinfos = {}
+
+    for X in tqdmu(selected_vars, desc="Marginal stats", leave=False):
+        _, freqs, _ = merge_vars(factors_data=factors_data, vars_indices=[X], factors_nbins=factors_nbins, var_is_nominal=None, dtype=dtype)
+        factor_entropy = entropy(freqs=freqs)
+        entropies[X] = factor_entropy
+
+    for a, b in tqdmu(combinations(selected_vars, 2), desc="1-way interactions", leave=False):
+        bootstrapped_mi, confidence = mi_direct(
+            factors_data,
+            x=[a],
+            y=[b],
+            factors_nbins=factors_nbins,
+            classes_y=classes_y,
+            freqs_y=freqs_y,
+            classes_y_safe=classes_y_safe,
+            min_nonzero_confidence=min_nonzero_confidence,
+            npermutations=npermutations,
+        )
+        if bootstrapped_mi > 0:
+            mutualinfos[(a, b)] = bootstrapped_mi
+
+    for y in tqdmu(selected_vars, desc="2-way interactions", leave=False):
+        for pair in combinations(set(selected_vars) - set([y]), 2):
+            bootstrapped_mi, confidence = mi_direct(
+                factors_data,
+                x=pair,
+                y=[y],
+                factors_nbins=factors_nbins,
+                classes_y=classes_y,
+                freqs_y=freqs_y,
+                classes_y_safe=classes_y_safe,
+                min_nonzero_confidence=min_nonzero_confidence,
+                npermutations=npermutations,
+            )
+            if bootstrapped_mi > 0:
+                mutualinfos[(y, pair)] = bootstrapped_mi
+
+    return entropies, mutualinfos
+
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Helpers
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+def create_redundant_continuous_factor(
+    df: pd.DataFrame,
+    factors: Sequence[str],
+    agg_func: object = np.sum,
+    noise_percent: float = 5.0,
+    dist: object = None,
+    dist_args: tuple = (),
+    name: str = None,
+    sep: str = "_",
+) -> None:
+    """In a pandas dataframe, out of a few continuous factors, craft a new factor with known relationship and amount of redundancy."""
+    if dist:
+        rvs = getattr(dist, "rvs")
+        assert rvs
+        noise = rvs(*dist_args, size=len(df))
+    else:
+        noise = np.random.random(len(df))
+
+    # now the entire range of generated noise is scaled to the noise_percent of factors interaction's range
+    val_min, val_max = noise.min(), noise.max()
+    if np.isclose(val_max, val_min):
+        noise = np.zeros(len(noise), dtype=np.float32)
+    else:
+        noise = (noise - val_min) / (val_max - val_min)
+
+    if not name:
+        name = sep.join(factors) + sep + f"{noise_percent:.0f}%{dist.name if dist else ''}noise"
+
+    df[name] = agg_func(df[factors].values, axis=1) * (1 + (noise - 0.5) * noise_percent / 100)
+
+
+def categorize_1d_array(vals: np.ndarray, min_ncats: int, method: str, astropy_sample_size: int, method_kwargs: dict, dtype=np.int16, nan_filler: float = 0.0):
+
+    ordinal_encoder = OrdinalEncoder()
+
+    # ----------------------------------------------------------------------------------------------------------------------------
+    # Booleans bust become int8
+    # ----------------------------------------------------------------------------------------------------------------------------
+
+    if vals.dtype.name != "category" and np.issubdtype(vals.dtype, np.bool_):
+        vals = vals.astype(np.int8)
+
+    # ----------------------------------------------------------------------------------------------------------------------------
+    # Missings are imputed using rolling median (for ts safety)
+    # ----------------------------------------------------------------------------------------------------------------------------
+
+    if pd.isna(vals).any():
+        # imputer = SimpleImputer(strategy="most_frequent", add_indicator=False)
+        # vals = imputer.fit_transform(vals.reshape(-1, 1))
+        vals = pd.Series(vals)
+        # vals=vals.fillna(vals.rolling(window=nan_rolling_window,min_periods=nan_rolling_min_periods).apply(lambda x: mode(x)[0])).fillna(nan_filler).values
+        vals = vals.fillna(nan_filler).values
+
+    vals = vals.reshape(-1, 1)
+
+    if vals.dtype.name != "category":
+        nuniques = len(np.unique(vals[: min_ncats * 10]))
+        if nuniques <= min_ncats:
+            nuniques = len(np.unique(vals))
+    else:
+        nuniques = min_ncats
+
+    if method == "discretizer":
+        bins = method_kwargs.get("n_bins")
+    else:
+        bins = method_kwargs.get("bins")
+
+    if vals.dtype.name != "category" and nuniques > min_ncats:
+        if method == "discretizer":
+            if nuniques > bins:
+                discretizer = KBinsDiscretizer(**method_kwargs, encode="ordinal")
+                new_vals = discretizer.fit_transform(vals)
+            else:
+                new_vals = ordinal_encoder.fit_transform(vals)
+        else:
+            if method == "numpy":
+
+                bin_edges = np.histogram_bin_edges(
+                    vals,
+                    bins=bins,
+                )
+            elif method == "astropy":
+                if bins == "blocks" and len(vals) >= astropy_sample_size:
+                    _, bin_edges = histogram(np.random.choice(vals.ravel(), size=astropy_sample_size, replace=False), bins=bins)
+                elif bins == "knuth" and len(vals) >= astropy_sample_size:
+                    _, bin_edges = histogram(np.random.choice(vals.ravel(), size=astropy_sample_size, replace=False), bins=bins)
+                else:
+                    _, bin_edges = histogram(vals, bins=bins)
+
+            if bin_edges[0] <= vals.min():
+                bin_edges = bin_edges[1:]
+
+            new_vals = ordinal_encoder.fit_transform(np.digitize(vals, bins=bin_edges, right=True))
+
+    else:
+        new_vals = ordinal_encoder.fit_transform(vals)
+
+    return new_vals.ravel().astype(dtype)
+
+
+def categorize_dataset_old(
+    df: pd.DataFrame,
+    method: str = "discretizer",
+    method_kwargs: dict = dict(strategy="quantile", n_bins=4),
+    min_ncats: int = 50,
+    astropy_sample_size: int = 10_000,
+    dtype=np.int16,
+    n_jobs: int = -1,
+    parallel_kwargs: dict = {},
+):
+    """
+    Convert dataframe into ordinal-encoded one.
+    """
+
+    data = None
+    numerical_cols = []
+    categorical_factors = []
+
+    numerical_cols = df.head(5).select_dtypes(exclude=("category", "object", "bool")).columns.values.tolist()
+
+    data = []
+    if n_jobs == -1 or n_jobs > 1:
+        fnc = delayed(categorize_1d_array)
+    else:
+        fnc = categorize_1d_array
+
+    for col in tqdmu(numerical_cols, leave=False, desc="Binning of numericals"):
+        data.append(
+            fnc(vals=df[col].values, min_ncats=min_ncats, method=method, astropy_sample_size=astropy_sample_size, method_kwargs=method_kwargs, dtype=dtype)
+        )
+
+    if n_jobs == -1 or n_jobs > 1:
+        data = parallel_run(data, n_jobs=n_jobs, **parallel_kwargs)
+    data = np.vstack(data).T
+
+    categorical_factors = df.select_dtypes(include=("category", "object", "bool"))
+    categorical_cols = []
+    if categorical_factors.shape[1] > 0:
+        categorical_cols = categorical_factors.columns.values.tolist()
+        ordinal_encoder = OrdinalEncoder()
+        new_vals = ordinal_encoder.fit_transform(categorical_factors)
+
+        max_cats = new_vals.max(axis=0)
+        exc_idx = max_cats > np.iinfo(dtype).max
+        n_max_cats = exc_idx.sum()
+        if n_max_cats:
+            logger.warning(f"{n_max_cats:_} factors exceeded dtype {dtype} and were truncated: {np.asarray(categorical_cols)[exc_idx]}")
+        new_vals = new_vals.astype(dtype)
+
+        if data is None:
+            data = new_vals
+        else:
+            data = np.append(data, new_vals, axis=1)
+
+    nbins = data.max(axis=0).astype(np.int32) + 1 - data.min(axis=0).astype(np.int32)
+
+    return data, numerical_cols + categorical_cols, nbins
+
+
+@njit
+def digitize(arr: np.ndarray, bins: np.ndarray, dtype=np.int32) -> np.ndarray:
+    res = np.empty(len(arr), dtype=dtype)
+    for i, val in enumerate(arr):
+        for j, bin_edge in enumerate(bins):
+            if val <= bin_edge:
+                res[i] = j
+                break
+    return res
+
+
+from numba import prange
+
+
+# @njit()
+def edges(arr, quantiles):
+    bin_edges = np.asarray(np.percentile(arr, quantiles))
+    return bin_edges
+
+
+@njit()
+def quantize_dig(arr, bins):
+    return np.digitize(arr, bins[1:-1], right=True)
+
+
+@njit()
+def quantize_search(arr, bins):
+    return np.searchsorted(bins[1:-1], arr, side="right")
+
+
+@njit()
+def discretize_uniform(arr: np.ndarray, n_bins: int, min_value: float = None, max_value: float = None, dtype: object = np.int8) -> np.ndarray:
+    if min_value is None or max_value is None:
+        min_value, max_value = arrayMinMax(arr)
+    rev_bin_width = n_bins / (max_value - min_value + min_value / 2)
+    return ((arr - min_value) * rev_bin_width).astype(dtype)
+
+
+@njit()
+def discretize_array(
+    arr: np.ndarray, n_bins: int = 10, method: str = "quantile", min_value: float = None, max_value: float = None, dtype: object = np.int8
+) -> np.ndarray:
+    """Discretize cont variable into bins.
+
+    Optimized version with mix of pure numpy and njitting.
+
+
+    %timeit quantize_search(df['a'].values,bins) #njitted
+    24.6 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 2 s (started: 2024-02-09 19:58:31 +03:00)
+
+    %timeit quantize_search(df['a'].values,bins) #just numpy
+    27.2 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 2.2 s (started: 2024-02-09 19:52:59 +03:00)
+
+    %timeit quantize_dig(df['a'].values, bins) #njitted
+    23.7 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 1.92 s (started: 2024-02-09 19:58:24 +03:00)
+
+    %timeit quantize_dig(df['a'].values, bins) #just numpy
+    31.1 ms ± 292 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 2.52 s (started: 2024-02-09 19:53:01 +03:00)
+
+
+    """
+    if method == "uniform":
+        return discretize_uniform(arr=arr, n_bins=n_bins, min_value=min_value, max_value=max_value, dtype=dtype)
+    elif method == "quantile":
+        bins_edges = get_binning_edges(arr=arr, n_bins=n_bins, method=method, min_value=min_value, max_value=max_value)  # pure numpy
+    # return quantize_dig(arr,bins_edges).astype(dtype) #njitted
+    return quantize_search(arr, bins_edges).astype(dtype)  # njitted
+
+
+@njit(parallel=True)
+def discretize_2d_array(
+    arr: np.ndarray,
+    n_bins: int = 10,
+    method: str = "quantile",
+    min_ncats: int = 50,
+    min_values: float = None,
+    max_values: float = None,
+    dtype: object = np.int8,
+) -> np.ndarray:
+    """ """
+
+    res = np.empty_like(arr, dtype=dtype)
+
+    for col in prange(arr.shape[1]):
+        res[:, col] = discretize_array(
+            arr=arr[:, col],
+            n_bins=n_bins,
+            method=method,
+            min_value=min_values[col] if min_values is not None else None,
+            max_value=max_values[col] if max_values is not None else None,
+            dtype=dtype,
+        )
+    return res
+
+
+@jit(nopython=False)
+def get_binning_edges(arr: np.ndarray, n_bins: int = 10, method: str = "uniform", min_value: float = None, max_value: float = None):
+    """
+    np.quantiles works faster when unjitted
+
+    %timeit edges(df['a'].values,quantiles) #njitted
+    83.9 ms ± 274 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 6.81 s (started: 2024-02-09 17:36:50 +03:00)
+
+    %timeit edges(df['a'].values,quantiles) #just numpy
+    30.9 ms ± 541 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    time: 2.52 s (started: 2024-02-09 17:35:58 +03:00)
+    """
+    if method == "uniform":
+        if min_value is None or max_value is None:
+            min_value, max_value = arrayMinMax(arr)
+        bin_edges = np.linspace(min_value, max_value, n_bins + 1)
+
+    elif method == "quantile":
+        quantiles = np.linspace(0, 100, n_bins + 1)
+        bin_edges = np.asarray(np.percentile(arr, quantiles))
+
+    return bin_edges
+
+
+def discretize_sklearn(
+    arr: np.ndarray, n_bins: int = 10, method: str = "uniform", min_value: float = None, max_value: float = None, dtype: object = np.int8
+) -> np.ndarray:
+    """Simplified vesrion taken from Sklearn's KBinsdiscretizer.
+    np.searchsorted runs twice faster when unjitted (as of Feb 2024 at least), so the func is not njitted.
+    """
+
+    bins_edges = get_binning_edges(arr=arr, n_bins=n_bins, method=method, min_value=min_value, max_value=max_value)
+    return np.searchsorted(bins_edges[1:-1], arr, side="right").astype(dtype)
+
+
+def categorize_dataset(
+    df: pd.DataFrame,
+    method: str = "quantile",
+    n_bins: int = 4,
+    min_ncats: int = 50,
+    dtype=np.int16,
+):
+    """
+    Convert dataframe into ordinal-encoded one.
+    For cat columns uses OrdinalEncoder.
+    For the rest uses new discretize_2d_array.
+    Does not care for min_cats yet.
+    """
+
+    data = None
+    numerical_cols = []
+    categorical_factors = []
+
+    numerical_cols = df.head(5).select_dtypes(exclude=("category", "object", "bool")).columns.values.tolist()
+
+    data = discretize_2d_array(arr=df[numerical_cols].values, n_bins=n_bins, method=method, min_ncats=min_ncats, min_values=None, max_values=None, dtype=dtype)
+
+    categorical_factors = df.select_dtypes(include=("category", "object", "bool"))
+    categorical_cols = []
+    if categorical_factors.shape[1] > 0:
+        categorical_cols = categorical_factors.columns.values.tolist()
+        ordinal_encoder = OrdinalEncoder()
+        new_vals = ordinal_encoder.fit_transform(categorical_factors)
+
+        max_cats = new_vals.max(axis=0)
+        exc_idx = max_cats > np.iinfo(dtype).max
+        n_max_cats = exc_idx.sum()
+        if n_max_cats:
+            logger.warning(f"{n_max_cats:_} factors exceeded dtype {dtype} and were truncated: {np.asarray(categorical_cols)[exc_idx]}")
+        new_vals = new_vals.astype(dtype)
+
+        if data is None:
+            data = new_vals
+        else:
+            data = np.append(data, new_vals, axis=1)
+
+    nbins = data.max(axis=0).astype(np.int32) + 1  # -data.min(axis=0).astype(np.int32)
+
+    return data, numerical_cols + categorical_cols, nbins.tolist()
+
+
+class MRMR(BaseEstimator, TransformerMixin):
+    """Finds subset of features having highest impact on target and least redundancy.
+
+    Parameters
+    ----------
+        cv : int, cross-validation generator or an iterable, default=None
+
+    Attributes
+    ----------
+
+
+    n_features_ : int
+        The number of selected features with cross-validation.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`. Only defined if the
+        underlying estimator exposes such an attribute when fit.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    ranking_ ?: narray of shape (n_features,)
+        The feature ranking, such that `ranking_[i]`
+        corresponds to the ranking
+        position of the i-th feature.
+        Selected (i.e., estimated best)
+        features are assigned rank 1.
+
+    support_ : ndarray of shape (n_features,)
+        The mask of selected features.
+
+    """
+
+    def __init__(
+        self,
+        # quantization
+        quantization_method: str = "quantile",
+        quantization_nbins: int = 10,
+        quantization_dtype: object = np.int16,
+        # factors
+        factors_names_to_use: Sequence[str] = None,
+        factors_to_use: Sequence[int] = None,
+        # algorithm
+        mrmr_relevance_algo: str = "fleuret",
+        mrmr_redundancy_algo: str = "fleuret",
+        reduce_gain_on_subelement_chosen: bool = True,
+        # performance
+        extra_x_shuffling: bool = True,
+        dtype=np.int32,
+        random_seed: int = None,
+        use_gpu: bool = False,
+        n_workers: int = 1,
+        # confidence
+        min_occupancy: int = None,
+        min_nonzero_confidence: float = 0.99,
+        full_npermutations: int = 1_000,
+        baseline_npermutations: int = 100,
+        # stopping conditions
+        min_relevance_gain: float = 0.00001,
+        max_consec_unconfirmed: int = 10,
+        max_runtime_mins: int = None,
+        interactions_min_order: int = 1,
+        interactions_max_order: int = 1,
+        interactions_order_reversed: bool = False,
+        max_veteranes_interactions_order: int = 1,
+        only_unknown_interactions: bool = False,
+        # feature engineering settings
+        fe_max_steps=1,
+        fe_npermutations=0,
+        fe_unary_preset="minimal",
+        fe_binary_preset="minimal",
+        fe_max_pair_features=1,
+        fe_min_nonzero_confidence=1.0,
+        fe_min_pair_mi_prevalence=1.05,  # transformations of what exactly pairs of factors we consider, at all. mi of entire pair must be at least that higher than the mi of its individual factors.
+        fe_min_engineered_mi_prevalence=0.98,  # mi of transformed pair must be at least that higher than the mi of the entire pair
+        fe_good_to_best_feature_mi_threshold=0.98,  # when multiple good transformations exist for the same factors pair.
+        fe_max_polynoms: int = 0,
+        fe_print_best_mis_only: bool = True,
+        fe_smart_polynom_iters: int = 0,
+        fe_smart_polynom_optimization_steps: int = 1000,
+        fe_min_polynom_degree: int = 3,
+        fe_max_polynom_degree: int = 8,
+        fe_min_polynom_coeff: float = -10.0,
+        fe_max_polynom_coeff: float = 10.0,
+        # verbosity and formatting
+        verbose: Union[bool, int] = 0,
+        ndigits: int = 5,
+        parallel_kwargs=dict(max_nbytes=MAX_JOBLIB_NBYTES),
+        # CV
+        cv: Union[object, int, None] = 3,
+        cv_shuffle: bool = False,
+        # service
+        random_state: int = None,
+        n_jobs: int = -1,
+        skip_retraining_on_same_shape: bool = False,
+        # hidden
+        n_features_in_: int = 0,
+        feature_names_in_: Sequence = None,
+        support_: np.ndarray = None,
+    ):
+
+        # checks
+
+        # assert isinstance(estimator, (BaseEstimator,))
+
+        # save params
+
+        params = get_parent_func_args()
+        store_params_in_object(obj=self, params=params)
+        self.signature = None
+
+    def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.DataFrame, pd.Series, np.ndarray], groups: Union[pd.Series, np.ndarray] = None, **fit_params):
+        """We run N selections on data subsets, and pick only features that appear in all selections"""
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Compute inputs/outputs signature
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        signature = (X.shape, y.shape)
+        if self.skip_retraining_on_same_shape:
+            if signature == self.signature:
+                if self.verbose:
+                    logger.info(f"Skipping retraining on the same inputs signature {signature}")
+                return self
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Inits
+        # ---------------------------------------------------------------------------------------------------------------
+
+        start_time = timer()
+        ran_out_of_time = False
+
+        quantization_method = self.quantization_method
+        quantization_nbins = self.quantization_nbins
+        dtype = self.dtype
+
+        max_runtime_mins = self.max_runtime_mins
+        random_state = self.random_state
+        parallel_kwargs = self.parallel_kwargs
+        n_jobs = self.n_jobs
+        verbose = self.verbose
+        cv_shuffle = self.cv_shuffle
+        cv = self.cv
+
+        fe_max_steps = self.fe_max_steps
+        fe_npermutations = self.fe_npermutations
+        fe_unary_preset = self.fe_unary_preset
+        fe_binary_preset = self.fe_binary_preset
+        fe_max_pair_features = self.fe_max_pair_features
+        fe_min_nonzero_confidence = self.fe_min_nonzero_confidence
+        fe_min_pair_mi_prevalence = self.fe_min_pair_mi_prevalence
+        fe_min_engineered_mi_prevalence = self.fe_min_engineered_mi_prevalence
+        fe_good_to_best_feature_mi_threshold = self.fe_good_to_best_feature_mi_threshold
+        fe_max_polynoms = self.fe_max_polynoms
+        fe_print_best_mis_only = self.fe_print_best_mis_only
+        fe_smart_polynom_iters = self.fe_smart_polynom_iters
+        fe_smart_polynom_optimization_steps = self.fe_smart_polynom_optimization_steps
+        fe_min_polynom_degree = self.fe_min_polynom_degree
+        fe_max_polynom_degree = self.fe_max_polynom_degree
+        fe_min_polynom_coeff = self.fe_min_polynom_coeff
+        fe_max_polynom_coeff = self.fe_max_polynom_coeff
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Init cv
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        """
+        if cv is None or str(cv).isnumeric():
+            if cv is None:
+                cv = 3
+
+            if groups is not None:
+                cv = GroupKFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+            else:
+                cv = KFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+            
+            if verbose:
+                logger.info(f"Using cv={cv}")
+        """
+
+        self.feature_names_in_ = X.columns.tolist()
+        self.n_features_in_ = len(self.feature_names_in_)
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Temporarily inject targets
+        # ---------------------------------------------------------------------------------------------------------------
+
+        target_prefix = "targ_" + str(np.random.random())[3:9]
+        y_shape = y.shape
+        if len(y_shape) == 2:
+            y_shape = y_shape[1]
+        else:
+            y_shape = 1
+        target_names = [target_prefix + str(i) for i in range(y_shape)]
+
+        if isinstance(y, np.ndarray):
+            vals = y
+        else:
+            vals = y.values
+
+        X[target_names] = vals.reshape(-1, 1)
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Discretize continuous data
+        # ---------------------------------------------------------------------------------------------------------------
+
+        data, cols, nbins = categorize_dataset(
+            df=X.ffill().bfill(),
+            method=self.quantization_method,
+            n_bins=self.quantization_nbins,
+            dtype=self.quantization_dtype,
+        )
+        target_indices = [cols.index(col) for col in target_names]
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Core
+        # ---------------------------------------------------------------------------------------------------------------
+
+        """
+        if random_state is not None:            
+            set_random_seed(random_state) 
+
+        splitter = cv.split(X=X, y=y, groups=groups)
+
+        subsets_selections=[]
+        if n_jobs==-1 or n_jobs>1:
+            fnc=delayed(screen_predictors)
+        else:
+            fnc=screen_predictors
+            
+            if verbose:
+                splitter = tqdmu(splitter, desc="CV folds", leave=False, total=cv.n_splits)            
+
+        for nfold, (train_index, test_index) in enumerate(splitter):
+            subsets_selections.append(fnc(
+                            factors_data=data,
+                            y=[target_idx],
+                            factors_nbins=n_bins,
+                            factors_names=cols,
+                            interactions_max_order=1,
+                            full_npermutations=0,
+                            baseline_npermutations=10,
+                            min_nonzero_confidence=1.0,
+                            max_consec_unconfirmed=20,
+                            min_relevance_gain=0.025,
+                            max_runtime_mins=None,
+                            max_veteranes_interactions_order=1,
+                            reduce_gain_on_subelement_chosen=True,
+                            random_seed=None,
+                            use_gpu=False,
+                            n_workers=n_jobs,
+                            verbose=2,
+                            ndigits=5,
+                            parallel_kwargs=parallel_kwargs,
+
+                    ))
+
+        if n_jobs==-1 or n_jobs>1:
+            subsets_selections = parallel_run(jobs,n_jobs=n_jobs,**parallel_kwargs)
+        
+        for selected_vars, predictors, any_influencing in subsets_selections:
+            pass
+            
+        """
+
+        """
+        #service
+        random_state: int = None,
+        n_jobs:int=-1,  
+        """
+
+        if fe_max_steps > 0:
+            unary_transformations = create_unary_transformations(preset=fe_unary_preset)
+            binary_transformations = create_binary_transformations(preset=fe_binary_preset)
+            if fe_max_polynoms:
+                polynomial_transformations = {}  # 'identity':lambda x: x,
+                for _ in range(fe_max_polynoms):
+                    length = np.random.randint(3, 9)
+                    # coef=(np.random.random(length)-0.5)*1
+                    coef = np.empty(shape=length, dtype=np.float32)
+                    for i in range(length):
+                        coef[i] = np.random.normal((1.0 if i == 1 else 0.0), scale=0.05)
+
+                    unary_transformations["poly_" + str(coef)] = coef
+
+            if verbose > 1:
+                print(f"nunary_transformations: {len(unary_transformations):_}")
+                print(f"nbinary_transformations: {len(binary_transformations):_}")
+
+            categorical_vars = X.head().select_dtypes(include=("category", "object", "bool")).columns.values.tolist()
+            categorical_vars = [cols.index(col) for col in categorical_vars]
+
+            engineered_features = set()
+            checked_pairs = set()
+
+        num_fs_steps = 0
+        while True:
+            n_recommended_features = 0
+            times_spent = DefaultDict(float)
+            selected_vars, predictors, any_influencing, entropy_cache, cached_MIs, cached_confident_MIs, cached_cond_MIs, classes_y, classes_y_safe, freqs_y = (
+                screen_predictors(
+                    factors_data=data,
+                    y=target_indices,
+                    factors_nbins=nbins,
+                    factors_names=cols,
+                    factors_names_to_use=self.factors_names_to_use,
+                    factors_to_use=self.factors_to_use,
+                    # algorithm
+                    mrmr_relevance_algo=self.mrmr_relevance_algo,
+                    mrmr_redundancy_algo=self.mrmr_redundancy_algo,
+                    reduce_gain_on_subelement_chosen=self.reduce_gain_on_subelement_chosen,
+                    # performance
+                    extra_x_shuffling=self.extra_x_shuffling,
+                    dtype=self.dtype,
+                    random_seed=self.random_seed,
+                    use_gpu=self.use_gpu,
+                    n_workers=self.n_workers,
+                    # confidence
+                    min_occupancy=self.min_occupancy,
+                    min_nonzero_confidence=self.min_nonzero_confidence,
+                    full_npermutations=self.full_npermutations,
+                    baseline_npermutations=self.baseline_npermutations,
+                    # stopping conditions
+                    min_relevance_gain=self.min_relevance_gain,
+                    max_consec_unconfirmed=self.max_consec_unconfirmed,
+                    max_runtime_mins=self.max_runtime_mins,
+                    interactions_min_order=self.interactions_min_order,
+                    interactions_max_order=self.interactions_max_order,
+                    interactions_order_reversed=self.interactions_order_reversed,
+                    max_veteranes_interactions_order=self.max_veteranes_interactions_order,
+                    only_unknown_interactions=self.only_unknown_interactions,
+                    # verbosity and formatting
+                    verbose=self.verbose,
+                    ndigits=self.ndigits,
+                    parallel_kwargs=self.parallel_kwargs,
+                )
+            )
+
+            if fe_max_steps == 0 or num_fs_steps >= fe_max_steps:
+                break
+
+            # Feature engineering part here
+
+            numeric_vars_to_consider = set(selected_vars) - set(categorical_vars)
+            for raw_vars_pair in combinations(numeric_vars_to_consider, 2):
+                # check that every element of a pair has computed its MI with target
+                for var in raw_vars_pair:
+                    if (var,) not in cached_confident_MIs and (var,) not in cached_MIs:
+                        mi, conf = mi_direct(
+                            data,
+                            x=(var,),
+                            y=target_indices,
+                            factors_nbins=nbins,
+                            classes_y=classes_y,
+                            classes_y_safe=classes_y_safe,
+                            freqs_y=freqs_y,
+                            min_nonzero_confidence=fe_min_nonzero_confidence,
+                            npermutations=fe_npermutations,
+                        )
+                        cached_MIs[(var,)] = mi
+
+                # ensure that pair as a whole has computed its MI with target
+                if raw_vars_pair not in cached_confident_MIs and raw_vars_pair not in cached_MIs:
+                    mi, conf = mi_direct(
+                        data,
+                        x=raw_vars_pair,
+                        y=target_indices,
+                        factors_nbins=nbins,
+                        classes_y=classes_y,
+                        classes_y_safe=classes_y_safe,
+                        freqs_y=freqs_y,
+                        min_nonzero_confidence=fe_min_nonzero_confidence,
+                        npermutations=fe_npermutations,
+                    )
+                    cached_MIs[raw_vars_pair] = mi
+
+            # ---------------------------------------------------------------------------------------------------------------
+            # For every pair of factors (A,B), select ones having MI((A,B),Y)>MI(A,Y)+MI(B,Y). Such ones must posess more special connection!
+            # ---------------------------------------------------------------------------------------------------------------
+
+            vars_usage_counter = DefaultDict(int)
+            prospective_pairs = []
+            for raw_vars_pair, pair_mi in sort_dict_by_value(cached_MIs).items():
+                if len(raw_vars_pair) == 2:
+                    if raw_vars_pair in checked_pairs:
+                        continue
+                    if raw_vars_pair[0] in numeric_vars_to_consider and raw_vars_pair[1] in numeric_vars_to_consider:
+                        ind_elems_mi_sum = cached_MIs[(raw_vars_pair[0],)] + cached_MIs[(raw_vars_pair[1],)]
+                        if pair_mi > ind_elems_mi_sum * fe_min_pair_mi_prevalence:
+                            if verbose:
+                                logger.info(
+                                    f"Factors pair {raw_vars_pair} will be considered for Feature Engineering, {pair_mi:.4f}>{ind_elems_mi_sum:.4f}, rat={pair_mi/ind_elems_mi_sum:.2f}"
+                                )
+                            prospective_pairs.append((raw_vars_pair, pair_mi))
+                            for var in raw_vars_pair:
+                                vars_usage_counter[var] += 1
+
+            if fe_smart_polynom_iters:
+
+                # ---------------------------------------------------------------------------------------------------------------
+                # We search for best unary & binary transforms using Hermit polinomials & Optuna!
+                # Degrees kep reasonable small as a form of regularization.
+                # In theory (esp if inputs are normalized), Hermit polynomials can approximate any functional form, therefore replacing our
+                # random experimenting with arbitrary functions (that was pretty limited anyways).
+                # ---------------------------------------------------------------------------------------------------------------
+
+                import optuna
+                from optuna.samplers import TPESampler
+
+                optuna.logging.set_verbosity(optuna.logging.WARNING)
+
+                def get_best_polynom_mi(coef_a, coef_b, vals_a, vals_b) -> float:
+
+                    transformed_var_a = hermval(vals_a, c=coef_a)
+                    transformed_var_b = hermval(vals_b, c=coef_b)
+
+                    best_mi = -1
+                    best_config = None
+
+                    for bin_func_name, bin_func in binary_transformations.items():
+
+                        final_transformed_vals = bin_func(transformed_var_a, transformed_var_b)
+
+                        discretized_transformed_values = discretize_array(
+                            arr=final_transformed_vals, n_bins=self.quantization_nbins, method=self.quantization_method, dtype=self.quantization_dtype
+                        )
+                        fe_mi, fe_conf = mi_direct(
+                            discretized_transformed_values.reshape(-1, 1),
+                            x=[0],
+                            y=None,
+                            factors_nbins=[self.quantization_nbins],
+                            classes_y=classes_y,
+                            classes_y_safe=classes_y_safe,
+                            freqs_y=freqs_y,
+                            min_nonzero_confidence=fe_min_nonzero_confidence,
+                            npermutations=fe_npermutations,
+                        )
+
+                        if fe_mi > best_mi:
+                            best_mi = fe_mi
+                            best_config = bin_func_name
+
+                    return best_mi
+
+                for raw_vars_pair, pair_mi in prospective_pairs:
+                    vals_a = X.iloc[:, raw_vars_pair[0]].values
+                    vals_b = X.iloc[:, raw_vars_pair[1]].values
+
+                    for _ in range(fe_smart_polynom_iters):
+
+                        length_a = np.random.randint(fe_min_polynom_degree, fe_max_polynom_degree)
+                        length_b = np.random.randint(fe_min_polynom_degree, fe_max_polynom_degree)
+
+                        # Define an objective function to be minimized.
+                        def objective(trial):
+
+                            coef_a = np.empty(shape=length_a, dtype=np.float32)
+                            for i in range(length_a):
+                                coef_a[i] = trial.suggest_float(f"a_{i}", fe_min_polynom_coeff, fe_max_polynom_coeff)
+
+                            coef_b = np.empty(shape=length_b, dtype=np.float32)
+                            for i in range(length_b):
+                                coef_b[i] = trial.suggest_float(f"b_{i}", fe_min_polynom_coeff, fe_max_polynom_coeff)
+
+                            res = get_best_polynom_mi(coef_a=coef_a, coef_b=coef_b, vals_a=vals_a, vals_b=vals_b)
+
+                            return res
+
+                        study = optuna.create_study(direction="maximize", sampler=TPESampler(multivariate=True))  # Create a new study.
+                        study.optimize(objective, n_trials=fe_smart_polynom_optimization_steps)  # Invoke optimization of the objective function.
+
+                        print(f"Best MI: {study.best_trial.value:.4f}, pair_mi={pair_mi:.4f}")
+                        print(f"Best hyperparameters: {study.best_params}")
+            else:
+
+                # ---------------------------------------------------------------------------------------------------------------
+                # Starting from the most heavily connected pairs, create a big pool of original features+their unary transforms.
+                # ---------------------------------------------------------------------------------------------------------------
+
+                # individual vars referenced more than once go to the global pool, rest to the local (not stored)?
+
+                transformed_vars = np.empty(shape=(len(X), len(prospective_pairs) * len(unary_transformations) * 2), dtype=np.float32)
+                vars_transformations = {}
+                i = 0
+                for raw_vars_pair, pair_mi in prospective_pairs:
+                    for var in raw_vars_pair:
+                        vals = X.iloc[:, var].values
+                        for tr_name, tr_func in unary_transformations.items():
+                            key = (var, tr_name)
+                            if key not in vars_transformations:
+                                if "poly_" in tr_name:
+                                    transformed_vars[:, i] = hermval(vals, c=tr_func)
+                                else:
+                                    transformed_vars[:, i] = tr_func(vals)
+                                vars_transformations[key] = i
+                                i += 1
+                # ---------------------------------------------------------------------------------------------------------------
+                # Then, for every pair from the pool, try all known functions of 2 variables (not storing results in persistent RAM).
+                # Record best pairs.
+                # ---------------------------------------------------------------------------------------------------------------
+
+                for raw_vars_pair, pair_mi in prospective_pairs:  # !TODO! better to start considering form the most prospective pairs with highest mis ratio!
+                    combs = list(
+                        combinations(
+                            [(raw_vars_pair[0], key) for key in unary_transformations.keys()]
+                            + [(raw_vars_pair[1], key) for key in unary_transformations.keys()],
+                            2,
+                        )
+                    )
+                    combs = [
+                        transformations_pair for transformations_pair in combs if transformations_pair[0][0] != transformations_pair[1][0]
+                    ]  # let's skip trying to transform the same factor for now
+                    # print(f"trying {len(combs):_} combs")
+
+                    best_config, best_mi = None, -1
+                    var_pairs_perf = {}
+
+                    final_transformed_vals = np.empty(
+                        shape=(len(X), len(combs) * len(binary_transformations)), dtype=np.float32
+                    )  # !TODO! optimize allocation of this array before the main loop!
+
+                    i = 0
+                    for transformations_pair in tqdmu(combs, desc=f"transforming {raw_vars_pair}", leave=False):
+
+                        param_a = transformed_vars[:, vars_transformations[transformations_pair[0]]]
+                        param_b = transformed_vars[:, vars_transformations[transformations_pair[1]]]
+
+                        for bin_func_name, bin_func in binary_transformations.items():
+
+                            start = timer()
+                            final_transformed_vals[:, i] = bin_func(param_a, param_b)
+                            times_spent[bin_func_name] += timer() - start
+
+                            discretized_transformed_values = discretize_array(
+                                arr=final_transformed_vals[:, i], n_bins=self.quantization_nbins, method=self.quantization_method, dtype=self.quantization_dtype
+                            )
+                            fe_mi, fe_conf = mi_direct(
+                                discretized_transformed_values.reshape(-1, 1),
+                                x=[0],
+                                y=None,
+                                factors_nbins=[self.quantization_nbins],
+                                classes_y=classes_y,
+                                classes_y_safe=classes_y_safe,
+                                freqs_y=freqs_y,
+                                min_nonzero_confidence=fe_min_nonzero_confidence,
+                                npermutations=fe_npermutations,
+                            )
+
+                            config = (transformations_pair, bin_func_name, i)
+                            var_pairs_perf[config] = fe_mi
+
+                            if fe_mi > best_mi:
+                                best_mi = fe_mi
+                                best_config = config
+                            if fe_mi > best_mi * 0.85:
+                                if not fe_print_best_mis_only or (fe_mi == best_mi):
+                                    if verbose > 1:
+                                        print(f"MI of transformed pair {bin_func_name}({transformations_pair})={fe_mi:.4f}, MI of the plain pair {pair_mi:.4f}")
+                            i += 1
+                    if verbose > 1:
+                        print(f"For pair {raw_vars_pair}, best config is {best_config} with best mi= {best_mi}")
+
+                    if best_mi / pair_mi > fe_min_engineered_mi_prevalence * (1.0 if num_fs_steps < 1 else 1.025):
+
+                        # ---------------------------------------------------------------------------------------------------------------
+                        # Now, if there is a group of leaders with almost same performance, we need to approve them through some of the orther variables.
+                        # если будут возникать такие группы примерно одинаковых по силе лидеров, их придётся разрешать с помощью одного из других влияющих факторов
+                        # ---------------------------------------------------------------------------------------------------------------
+
+                        leading_features = []
+                        for next_config, next_mi in sort_dict_by_value(var_pairs_perf).items():
+                            if next_mi > best_mi * fe_good_to_best_feature_mi_threshold:
+                                leading_features.append(next_config)
+
+                        if len(leading_features) > 1:
+                            if len(numeric_vars_to_consider) > 2:
+
+                                new_vals = np.empty(shape=(len(X), fe_max_pair_features), dtype=self.quantization_dtype)
+                                new_nbins = []
+                                new_cols = []
+
+                                if verbose > 1:
+                                    print(f"Taking {len(leading_features)} new features for a separate validation step!")
+
+                                # ---------------------------------------------------------------------------------------------------------------
+                                # Now let's test all of the candidates as is against the rest of the approved factors (also as is).
+                                # Caindidates significantly outstanding (in terms of MI with target) with any of other approved factors are kept.
+                                # ---------------------------------------------------------------------------------------------------------------
+
+                                valid_pairs_perf = {}
+
+                                for transformations_pair, bin_func_name, i in leading_features:
+                                    param_a = final_transformed_vals[:, i]
+
+                                    best_valid_mi = -1
+                                    config = (transformations_pair, bin_func_name, i)
+
+                                    for external_factor in tqdmu(
+                                        set(numeric_vars_to_consider) - set(raw_vars_pair), desc="external validation factor", leave=False
+                                    ):
+                                        param_b = X.iloc[:, external_factor].values
+
+                                        for valid_bin_func_name, valid_bin_func in binary_transformations.items():
+
+                                            valid_vals = valid_bin_func(param_a, param_b)
+
+                                            discretized_transformed_values = discretize_array(
+                                                arr=valid_vals, n_bins=self.quantization_nbins, method=self.quantization_method, dtype=self.quantization_dtype
+                                            )
+                                            fe_mi, fe_conf = mi_direct(
+                                                discretized_transformed_values.reshape(-1, 1),
+                                                x=[0],
+                                                y=None,
+                                                factors_nbins=[self.quantization_nbins],
+                                                classes_y=classes_y,
+                                                classes_y_safe=classes_y_safe,
+                                                freqs_y=freqs_y,
+                                                min_nonzero_confidence=fe_min_nonzero_confidence,
+                                                npermutations=fe_npermutations,
+                                            )
+
+                                            if fe_mi > best_valid_mi:
+                                                best_valid_mi = fe_mi
+                                                if verbose > 1:
+                                                    print(
+                                                        f"MI of transformed pair {valid_bin_func_name}({(transformations_pair,bin_func_name)} with ext factor {external_factor})={fe_mi:.4f}"
+                                                    )
+
+                                    valid_pairs_perf[config] = best_valid_mi
+
+                                # ---------------------------------------------------------------------------------------------------------------
+                                # Now we recommend proceeding with top N best transformations!
+                                # ---------------------------------------------------------------------------------------------------------------
+
+                                for j, (config, valid_mi) in enumerate(sort_dict_by_value(valid_pairs_perf, reverse=True).items()):
+                                    if j < fe_max_pair_features:
+                                        new_feature_name = get_new_feature_name(fe_tuple=config, cols_names=cols)
+                                        if verbose:
+                                            logger.info(
+                                                f"{new_feature_name} is recommended to use as a new feature! (won in validation with other factors) best_mi={best_mi:.4f}, pair_mi={pair_mi:.4f}, rat={best_mi/pair_mi:.4f}"
+                                            )
+                                        transformations_pair, bin_func_name, i = config
+
+                                        new_vals[:, j] = discretize_array(
+                                            arr=final_transformed_vals[:, i],
+                                            n_bins=self.quantization_nbins,
+                                            method=self.quantization_method,
+                                            dtype=self.quantization_dtype,
+                                        )
+                                        new_nbins += [self.quantization_nbins]
+                                        new_cols += [new_feature_name]
+                                        X[new_feature_name] = final_transformed_vals[:, i]
+                                        engineered_features.add((transformations_pair, bin_func_name))
+
+                                    else:
+                                        break
+                                new_vals = new_vals[:, : min(fe_max_pair_features, j)]
+                            else:
+                                if verbose:
+                                    logger.warning(
+                                        f"{len(leading_features)} are recommended to use as new features! (can't narrow down the list by validation with other factors) best_mi={best_mi:.4f}, pair_mi={pair_mi:.4f}, rat={best_mi/pair_mi:.4f}"
+                                    )
+
+                                for j, (transformations_pair, bin_func_name, i) in enumerate(leading_features):
+                                    new_vals[:, j] = discretize_array(
+                                        arr=final_transformed_vals[:, i],
+                                        n_bins=self.quantization_nbins,
+                                        method=self.quantization_method,
+                                        dtype=self.quantization_dtype,
+                                    )
+                                    new_nbins += [self.quantization_nbins]
+                                    new_cols += [new_feature_name]
+                                    X[new_feature_name] = final_transformed_vals[:, i]
+                                    engineered_features.add((transformations_pair, bin_func_name))
+                        else:
+                            new_feature_name = get_new_feature_name(fe_tuple=best_config, cols_names=cols)
+                            if verbose:
+                                logger.info(
+                                    f"{new_feature_name} is recommended to use as a new feature! (clear winner) best_mi={best_mi:.4f}, pair_mi={pair_mi:.4f}, rat={best_mi/pair_mi:.4f}"
+                                )
+
+                            transformations_pair, bin_func_name, i = best_config
+
+                            new_vals = discretize_array(
+                                arr=final_transformed_vals[:, i], n_bins=self.quantization_nbins, method=self.quantization_method, dtype=self.quantization_dtype
+                            ).reshape(-1, 1)
+                            new_nbins = [self.quantization_nbins]
+                            new_cols = [new_feature_name]
+                            X[new_feature_name] = final_transformed_vals[:, i]
+                            engineered_features.add((transformations_pair, bin_func_name))
+
+                        data = np.append(data, new_vals, axis=1)
+                        nbins = nbins + new_nbins
+                        cols = cols + new_cols
+
+                        n_recommended_features += len(new_cols)
+
+                        # !TODO!  handle factors_to_use etc
+                        """
+                        factors_names_to_use=self.factors_names_to_use,
+                        factors_to_use=self.factors_to_use,                    
+                        """
+                    checked_pairs.add(raw_vars_pair)
+
+            if n_recommended_features == 0:
+                break
+            num_fs_steps += 1
+        if verbose > 1:
+            print("time spent by binary func:", sort_dict_by_value(times_spent))
+        # Possibly decide on eliminating original features? (if constructed ones cover 90%+ of MI)
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Drop Temporarily targets
+        # ---------------------------------------------------------------------------------------------------------------
+
+        X.drop(columns=target_names, inplace=True)
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # assign support
+        # ---------------------------------------------------------------------------------------------------------------
+
+        self.support_ = selected_vars
+        if selected_vars:
+            self.n_features_ = len(selected_vars)
+        else:
+            self.n_features_ = 0
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # assign extra vars for upcoming vars improving
+        # ---------------------------------------------------------------------------------------------------------------
+
+        # self.cached_MIs_ = cached_MIs
+        # self.cached_cond_MIs_ = cached_cond_MIs
+        # self.cached_confident_MIs_ = cached_confident_MIs
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Report FS results
+        # ---------------------------------------------------------------------------------------------------------------
+
+        if verbose:
+            logger.info(f"MRMR+ selected {self.n_features_:_} out of {self.n_features_in_:_} features: {predictors}")
+
+        self.signature = signature
+        return self
+
+    def transform(self, X, y=None):
+        if isinstance(X, pd.DataFrame):
+            return X.iloc[:, self.support_]
+        else:
+            return X[:, self.support_]
+
+
+def get_existing_feature_name(fe_tuple: tuple, cols_names: Sequence) -> str:
+    fname = cols_names[fe_tuple[0]]
+    if fe_tuple[1] == "identity":
+        return fname
+    else:
+        return f"{fe_tuple[1]}({fname})"
+
+
+def get_new_feature_name(fe_tuple: tuple, cols_names: Sequence) -> str:
+    return f"{fe_tuple[1]}({get_existing_feature_name(fe_tuple=fe_tuple[0][0],cols_names=cols_names)},{get_existing_feature_name(fe_tuple=fe_tuple[0][1],cols_names=cols_names)})"  # (((2, 'log'), (3, 'sin')), 'mul', 1016)
+
+
+def njit_functions_dict(dict, exceptions: Sequence = ("grad1", "grad2", "sinc", "logn", "greater", "less", "equal")):
+    """Tries replacing funcs in the dict with their njitted equivqlents, caring for exceptions."""
+    for key, func in dict.items():
+        if key not in exceptions:
+            try:
+                dict[key] = njit(func)
+            except Exception as e:
+                pass
+
+
+def create_unary_transformations(preset: str = "minimal"):
+    unary_constraints = {
+        # reverse trigonometric
+        "arccos": "-1to1",
+        "arcsin": "-1to1",
+        "arctan": "-pi/2topi/2",
+        # reverse hyperbolic
+        "arccosh": "1toinf",
+        "arctanh": "-0.(9)to0.(9)",
+        # powers
+        "sqrt": "pos",
+        "log": "pos",
+        "reciproc": "nonzero",
+        "invsquared": "nonzero",
+        "invqubed": "nonzero",
+        "invcbrt": "nonzero",
+        "invsqrt": "nonzero",
+    }
+
+    unary_transformations = {
+        # simplest
+        "identity": lambda x: x,
+    }
+    if preset != "minimal":
+        unary_transformations.update(
+            {
+                "sign": np.sign,
+                "neg": np.negative,
+                "abs": np.abs,
+                # outliers removal
+                # Rounding
+                "rint": np.rint,
+                # np.modf Return the fractional and integral parts of an array, element-wise.
+                # clip
+                # powers
+                "squared": lambda x: np.power(x, 2),
+                "qubed": lambda x: np.power(x, 3),
+                "reciproc": lambda x: np.power(x, -1),
+                "invsquared": lambda x: np.power(x, -2),
+                "invqubed": lambda x: np.power(x, -3),
+                "cbrt": np.cbrt,
+                "sqrt": np.sqrt,
+                "invcbrt": lambda x: np.power(x, -1 / 3),
+                "invsqrt": lambda x: np.power(x, -1 / 2),
+                # logarithms
+                "log": np.log,
+                "exp": np.exp,
+                # trigonometric
+                "sin": np.sin,
+            }
+        )
+
+    if preset == "maximal":
+        unary_transformations.update(
+            {
+                # math an
+                "grad1": np.gradient,
+                "grad2": lambda x: np.gradient(x, edge_order=2),
+                # trigonometric
+                "sinc": np.sinc,
+                "cos": np.cos,
+                "tan": np.tan,
+                # reverse trigonometric
+                "arcsin": np.arcsin,
+                "arccos": np.arccos,
+                "arctan": np.arctan,
+                # hyperbolic
+                "sinh": np.sinh,
+                "cosh": np.cosh,
+                "tanh": np.tanh,
+                # reverse hyperbolic
+                "arcsinh": np.arcsinh,
+                "arccosh": np.arccosh,
+                "arctanh": np.arctanh,
+                # special
+                #'psi':sp.psi, polygamma(0,x) is same as psi
+                "erf": sp.erf,
+                "dawsn": sp.dawsn,
+                "gammaln": sp.gammaln,
+                #'spherical_jn':sp.spherical_jn
+            }
+        )
+
+    njit_functions_dict(unary_transformations)
+
+    if preset == "maximal":
+        for order in range(3):
+            unary_transformations["polygamma_" + str(order)] = lambda x: sp.polygamma(order, x)
+            unary_transformations["struve" + str(order)] = lambda x: sp.struve(order, x)
+            unary_transformations["jv" + str(order)] = lambda x: sp.jv(order, x)
+
+        """j0
+        faster version of this function for order 0.
+
+        j1
+        faster version of this function for order 1.
+        """
+
+    return unary_transformations
+
+
+def create_binary_transformations(preset: str = "minimal"):
+
+    binary_transformations = {
+        # Basic
+        "mul": np.multiply,
+        "add": np.add,
+        # Extrema
+        "max": np.maximum,
+        "min": np.minimum,
+    }
+
+    if preset == "maximal":
+        binary_transformations.update(
+            {
+                # All kinds of averages
+                "hypot": np.hypot,
+                "logaddexp": np.logaddexp,
+                "agm": sp.agm,  # Compute the arithmetic-geometric mean of a and b.
+                # Rational routines
+                #'lcm':np.lcm, # requires int arguments #  ufunc 'lcm' did not contain a loop with signature matching types (<class 'numpy.dtype[float32]'>, <class 'numpy.dtype[float32]'>) -> None
+                #'gcd':np.gcd, # requires int arguments
+                #'mod':np.remainder, # requires int arguments
+                # Powers
+                "pow": np.power,  # non-symmetrical! may required dtype=complex for arbitrary numbers
+                # Logarithms
+                "logn": lambda x, y: np.emath.logn(x - np.min(x) + 0.1, y - np.min(y) + 0.1),  # non-symmetrical!
+                # DSP
+                # 'convolve':np.convolve, # symmetrical wrt args. scipy.signal.fftconvolve should be faster? SLOW?
+                # Linalg
+                "heaviside": np.heaviside,  # non-symmetrical!
+                #'cross':np.cross, # symmetrical # incompatible dimensions for cross product (dimension must be 2 or 3)
+                # Comparison
+                "greater": lambda x, y: np.greater(x, y).astype(int),
+                "less": lambda x, y: np.less(x, y).astype(int),
+                "equal": lambda x, y: np.equal(x, y).astype(int),
+                # special
+                "beta": sp.beta,  # symmetrical
+                "binom": sp.binom,  # non-symmetrical! binomial coefficient considered as a function of two real variables.
+            }
+        )
+
+    njit_functions_dict(binary_transformations)
+
+    return binary_transformations
diff --git a/mlframe/feature_selection/wrappers.py b/mlframe/feature_selection/wrappers.py
new file mode 100644
index 0000000..f9a1b8b
--- /dev/null
+++ b/mlframe/feature_selection/wrappers.py
@@ -0,0 +1,872 @@
+"""Feature selection within ML pipelines. Wrappers methods. Currently includes recursive feature elimination."""
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------------------------------------------------------
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+while True:
+    try:
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Normal Imports
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        from typing import *
+
+        import pandas as pd, numpy as np
+
+        from pyutilz.system import tqdmu
+        from pyutilz.numbalib import set_numba_random_seed
+        from pyutilz.pythonlib import store_params_in_object, get_parent_func_args
+
+        from sklearn.pipeline import Pipeline
+
+        from mlframe.config import *
+        from mlframe.utils import set_random_seed
+        from mlframe.baselines import get_best_dummy_score
+        from mlframe.helpers import has_early_stopping_support
+        from mlframe.preprocessing import pack_val_set_into_fit_params
+        from mlframe.metrics import sklearn_integral_calibration_error
+        from mlframe.optimization import *
+
+        from sklearn.dummy import DummyClassifier, DummyRegressor
+        from sklearn.metrics import make_scorer, mean_squared_error
+        from sklearn.base import is_classifier, is_regressor, BaseEstimator, TransformerMixin
+        from sklearn.model_selection import StratifiedGroupKFold, StratifiedKFold, StratifiedShuffleSplit, GroupKFold, GroupShuffleSplit, KFold
+
+        from enum import Enum, auto
+        from timeit import default_timer as timer
+
+        import matplotlib.pyplot as plt
+
+        import random
+        import copy
+
+    except ModuleNotFoundError as e:
+
+        logger.warning(e)
+
+        if "cannot import name" in str(e):
+            raise (e)
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Packages auto-install
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        from pyutilz.pythonlib import ensure_installed
+
+        ensure_installed("numpy pandas scikit-learn")  # cupy
+
+    else:
+        break
+
+# ----------------------------------------------------------------------------------------------------------------------------
+# Inits
+# ----------------------------------------------------------------------------------------------------------------------------
+
+
+class OptimumSearch(str, Enum):
+    ScipyLocal = "ScipyLocal"  # Brent
+    ScipyGlobal = "ScipyGlobal"  # direct, diff evol, shgo
+    ModelBasedHeuristic = "ModelBasedHeuristic"  # GaussianProcess or Catboost with uncertainty, or quantile regression
+    ExhaustiveRandom = "ExhaustiveRandom"
+    ExhaustiveDichotomic = "ExhaustiveDichotomic"
+
+
+class VotesAggregation(str, Enum):
+    Minimax = "Minimax"
+    OG = "OG"
+    Borda = "Borda"
+    Plurality = "Plurality"
+    Dowdall = "Dowdall"
+    Copeland = "Copeland"
+    AM = "AM"
+    GM = "GM"
+
+
+class RFECV(BaseEstimator, TransformerMixin):
+    """Finds subset of features having best CV score, by iterative narrowing down set of top_n candidates having highest importance, as per estimator's FI scores.
+
+    Optimizes mean CV scores (possibly accounting for variation, possibly translated into ranks) divided by the features number.
+
+    Uses several optimization methods:
+        exhaustive search
+        random search
+        model-based heuristic search.
+
+    Problems:
+        Impactful, but correlated factors all get low importance and will be thrown away (probably only for forests, not boostings?).
+        confirmed for boostings also! adding more predictors to original features worsens scores, whereas in theory it at least should not be worse!
+
+        Due to noise some random features can become "important".
+
+    Solution:
+        use CV to calculate fold FI, then combine across folds (by voting).
+        When estimating featureset quality at another TopN, use different splits & combine new FIs with all known before, to mitigate noise even more.
+
+    Optionally plots (and saves) the optimization path - checked nfeatures and corresponding scores.
+    If surrogate models are used, also shows predicted scores along with confidence bounds.
+
+    Challenges:
+        CV performance itself can be a multi-component value! Say, both ROC AUC and CALIB metrics can be considered. Voting can be a solution.
+        Estimator might itself be a HPT search instance. Or a pipeline.
+        It could be good to have several estimators. Their importance evaluations must be accounted for simultaneously (voting).
+        Estimator might need eval_set or similar (eval_frac).
+        Different folds invocations could benefit from generating all possible hyper parameters. Even if FS does not care, collected info could be used further at the HPT step.
+
+    Parameters
+    ----------
+        cv : int, cross-validation generator or an iterable, default=None
+
+    Attributes
+    ----------
+
+    estimator_ : ``Estimator`` instance
+        The fitted estimator used to select features.
+
+    cv_results_ : dict of ndarrays
+        A dict with keys:
+
+        split(k)_test_score : ndarray of shape (n_subsets_of_features,)
+            The cross-validation scores across (k)th fold.
+
+        mean_test_score : ndarray of shape (n_subsets_of_features,)
+            Mean of scores over the folds.
+
+        std_test_score : ndarray of shape (n_subsets_of_features,)
+            Standard deviation of scores over the folds.
+
+
+    n_features_ : int
+        The number of selected features with cross-validation.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`. Only defined if the
+        underlying estimator exposes such an attribute when fit.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    ranking_ ?: narray of shape (n_features,)
+        The feature ranking, such that `ranking_[i]`
+        corresponds to the ranking
+        position of the i-th feature.
+        Selected (i.e., estimated best)
+        features are assigned rank 1.
+
+    support_ : ndarray of shape (n_features,)
+        The mask of selected features.
+
+    """
+
+    def __init__(
+        self,
+        estimator: BaseEstimator,
+        fit_params: dict = {},
+        max_nfeatures: int = None,
+        mean_perf_weight: float = 1.0,
+        std_perf_weight: float = 0.1,
+        feature_cost: float = 0.00 / 100,
+        smooth_perf: int = 3,
+        # stopping conditions
+        max_runtime_mins: float = None,
+        max_refits: int = None,
+        best_desired_score: float = None,
+        max_noimproving_iters: int = 30,
+        # CV
+        cv: Union[object, int, None] = 3,
+        cv_shuffle: bool = False,
+        # Other
+        early_stopping_val_nsplits: Union[int, None] = 4,
+        early_stopping_rounds: Union[int, None] = None,
+        scoring: Union[object, None] = None,
+        top_predictors_search_method: OptimumSearch = OptimumSearch.ModelBasedHeuristic,
+        votes_aggregation_method: VotesAggregation = VotesAggregation.Borda,
+        use_all_fi_runs: bool = True,
+        use_last_fi_run_only: bool = False,
+        use_one_freshest_fi_run: bool = False,
+        use_fi_ranking: bool = False,
+        importance_getter: Union[str, Callable, None] = None,
+        random_state: int = None,
+        leave_progressbars: bool = True,
+        verbose: Union[bool, int] = 0,
+        show_plot: bool = False,
+        cat_features: Union[Sequence, None] = None,
+        keep_estimators: bool = False,
+        estimators_save_path: str = None,  # fitted estimators get saved into join(estimators_save_path,estimator_type_name,nestimator_nfeatures_nfold.dump)
+        # Required features and achieved ml metrics get saved in a dict join(estimators_save_path,required_features.dump).
+        frac: float = None,
+        skip_retraining_on_same_shape: bool = False,
+    ):
+
+        # checks
+        if frac is not None:
+            assert frac > 0.0 and frac < 1.0
+
+        # assert isinstance(estimator, (BaseEstimator,))
+
+        # save params
+
+        params = get_parent_func_args()
+        store_params_in_object(obj=self, params=params)
+        self.signature = None
+
+    def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.DataFrame, pd.Series, np.ndarray], groups: Union[pd.Series, np.ndarray] = None, **fit_params):
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Compute inputs/outputs signature
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        signature = (X.shape, y.shape)
+        if self.skip_retraining_on_same_shape:
+            if signature == self.signature:
+                if self.verbose:
+                    logger.info(f"Skipping retraining on the same inputs signature {signature}")
+                return self
+
+        # ---------------------------------------------------------------------------------------------------------------
+        # Inits
+        # ---------------------------------------------------------------------------------------------------------------
+
+        estimator = self.estimator
+        fit_params = copy.copy(self.fit_params)
+        max_runtime_mins = self.max_runtime_mins
+        max_refits = self.max_refits
+        cv = self.cv
+        cv_shuffle = self.cv_shuffle
+        early_stopping_val_nsplits = self.early_stopping_val_nsplits
+        early_stopping_rounds = self.early_stopping_rounds
+        scoring = self.scoring
+        top_predictors_search_method = self.top_predictors_search_method
+        votes_aggregation_method = self.votes_aggregation_method
+        use_all_fi_runs = self.use_all_fi_runs
+        use_last_fi_run_only = self.use_last_fi_run_only
+        use_one_freshest_fi_run = self.use_one_freshest_fi_run
+        use_fi_ranking = self.use_fi_ranking
+        importance_getter = self.importance_getter
+        random_state = self.random_state
+        leave_progressbars = self.leave_progressbars
+        verbose = self.verbose
+        show_plot = self.show_plot
+        cat_features = self.cat_features
+        keep_estimators = self.keep_estimators
+        feature_cost = self.feature_cost
+        smooth_perf = self.smooth_perf
+        frac = self.frac
+        best_desired_score = self.best_desired_score
+        max_noimproving_iters = self.max_noimproving_iters
+
+        start_time = timer()
+        ran_out_of_time = False
+
+        if random_state is not None:
+            set_random_seed(random_state)
+
+        feature_importances = {}
+        evaluated_scores_std = {}
+        evaluated_scores_mean = {}
+
+        if isinstance(X, pd.DataFrame):
+            original_features = X.columns.tolist()
+        else:
+            original_features = np.arange(X.shape[1])
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Init cv
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        if cv is None or str(cv).isnumeric():
+            if cv is None:
+                cv = 3
+            if is_classifier(estimator):
+                if groups is not None:
+                    cv = StratifiedGroupKFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+                else:
+                    cv = StratifiedKFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+            else:
+                if groups is not None:
+                    cv = GroupKFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+                else:
+                    cv = KFold(n_splits=cv, shuffle=cv_shuffle, random_state=random_state)
+            if verbose:
+                logger.info(f"Using cv={cv}")
+
+        if early_stopping_val_nsplits:
+            val_cv = copy.copy(cv)
+            val_cv.n_splits = early_stopping_val_nsplits
+            if not early_stopping_rounds:
+                early_stopping_rounds = 20  # TODO: derive as 1/5 of nestimators'
+        else:
+            val_cv = None
+
+        if verbose:
+            iters_pbar = tqdmu(
+                desc="RFECV iterations", leave=leave_progressbars, total=min(len(original_features), max_refits) if max_refits else len(original_features)
+            )
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Init scoring
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        if scoring is None:
+            if is_classifier(estimator):
+                scoring = make_scorer(score_func=sklearn_integral_calibration_error, needs_proba=True, needs_threshold=False, greater_is_better=False)
+            elif is_regressor(estimator):
+                scoring = make_scorer(score_func=mean_squared_error, needs_proba=False, needs_threshold=False, greater_is_better=False)
+            else:
+                raise ValueError(f"Scoring not known for estimator type: {estimator}")
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Init importance_getter
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        if isinstance(estimator, Pipeline):
+            estimator_type = type(estimator.steps[-1][1]).__name__
+        else:
+            estimator_type = type(estimator).__name__
+
+        if importance_getter is None or importance_getter == "auto":
+            if estimator_type in ("LogisticRegression",):
+                importance_getter = "coef_"
+            else:
+                importance_getter = "feature_importances_"
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Start evaluating different nfeatures, being guided by the selected search method
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        nsteps = 0
+        dummy_scores = []
+        fitted_estimators = {}
+        selected_features_per_nfeatures = {}
+
+        if top_predictors_search_method == OptimumSearch.ModelBasedHeuristic:
+            Optimizer = MBHOptimizer(
+                search_space=(
+                    np.array(np.arange(min(self.max_nfeatures, len(original_features)) + 1).tolist() + [len(original_features)])
+                    if self.max_nfeatures
+                    else np.arange(len(original_features) + 1)
+                ),
+                direction=OptimizationDirection.Maximize,
+                init_sampling_method=CandidateSamplingMethod.Equidistant,
+                init_evaluate_ascending=False,
+                init_evaluate_descending=True,
+                plotting=OptimizationProgressPlotting.OnScoreImprovement,
+                seeded_inputs=[min(2, len(original_features))],
+            )
+        else:
+            Optimizer = None
+
+        n_noimproving_iters = 0
+        best_score = -1e6
+
+        while nsteps < len(original_features):
+
+            if verbose:
+                iters_pbar.update(1)
+
+            # ----------------------------------------------------------------------------------------------------------------------------
+            # Select current set of features to work on, based on ranking received so far, and the optimum search method
+            # ----------------------------------------------------------------------------------------------------------------------------
+
+            current_features = get_next_features_subset(
+                nsteps=nsteps,
+                original_features=original_features,
+                feature_importances=feature_importances,
+                evaluated_scores_mean=evaluated_scores_mean,
+                evaluated_scores_std=evaluated_scores_std,
+                use_all_fi_runs=use_all_fi_runs,
+                use_last_fi_run_only=use_last_fi_run_only,
+                use_one_freshest_fi_run=use_one_freshest_fi_run,
+                use_fi_ranking=use_fi_ranking,
+                top_predictors_search_method=top_predictors_search_method,
+                votes_aggregation_method=votes_aggregation_method,
+                Optimizer=Optimizer,
+            )
+
+            if current_features is None or len(current_features) == 0:
+                break  # nothing more to try
+
+            selected_features_per_nfeatures[len(current_features)] = current_features
+
+            # ----------------------------------------------------------------------------------------------------------------------------
+            # Each split better be different. so, even if random_state is provided, random_state to the cv is generated separately
+            # (and deterministically) each time based on the original random_state.
+            # ----------------------------------------------------------------------------------------------------------------------------
+
+            scores = []
+
+            splitter = cv.split(X=X, y=y, groups=groups)
+            if verbose:
+                splitter = tqdmu(splitter, desc="CV folds", leave=False, total=cv.n_splits)
+
+            # ----------------------------------------------------------------------------------------------------------------------------
+            # Evaluate currently selected set of features on CV
+            # ----------------------------------------------------------------------------------------------------------------------------
+
+            for nfold, (train_index, test_index) in enumerate(splitter):
+
+                if frac:
+                    size = int(len(train_index) * frac)
+                    if size > 10:
+                        train_index = np.random.choice(train_index, size=size)
+
+                X_train, y_train, X_test, y_test = split_into_train_test(
+                    X=X, y=y, train_index=train_index, test_index=test_index, features_indices=current_features
+                )  # this splits both dataframes & ndarrays in the same fashion
+
+                if val_cv and has_early_stopping_support(estimator_type):
+
+                    # ----------------------------------------------------------------------------------------------------------------------------
+                    # Make additional early stopping split from X_train
+                    # ----------------------------------------------------------------------------------------------------------------------------
+
+                    if groups is not None:
+                        if isinstance(groups, pd.Series):
+                            train_groups = groups.iloc[train_index]
+                        else:
+                            train_groups = groups[train_index]
+                    else:
+                        train_groups = None
+
+                    for true_train_index, val_index in val_cv.split(X=X_train, y=y_train, groups=train_groups):
+                        break  # need only 1 iteration of 2nd split
+
+                    X_train, y_train, X_val, y_val = split_into_train_test(X=X_train, y=y_train, train_index=true_train_index, test_index=val_index)
+
+                    # ----------------------------------------------------------------------------------------------------------------------------
+                    # If estimator is known, apply early stopping to its fit params
+                    # ----------------------------------------------------------------------------------------------------------------------------
+
+                    temp_cat_features = [current_features.index(var) for var in cat_features if var in current_features] if cat_features else None
+
+                    temp_fit_params = pack_val_set_into_fit_params(
+                        model=estimator,
+                        X_val=X_val,
+                        y_val=y_val,
+                        early_stopping_rounds=early_stopping_rounds,
+                        cat_features=temp_cat_features,
+                    )  # crafts fit params with early stopping tailored to particular model type.
+                    temp_fit_params.update(fit_params)
+
+                else:
+                    temp_fit_params = {}
+                    X_val = None
+
+                # ----------------------------------------------------------------------------------------------------------------------------
+                # Fit our estimator on current train fold. Score on test & and get its feature importances.
+                # ----------------------------------------------------------------------------------------------------------------------------
+
+                # TODO! invoke different hyper parameters generation here
+
+                if keep_estimators:
+                    fitted_estimator = copy.copy(estimator)
+                else:
+                    fitted_estimator = estimator
+
+                fitted_estimator.fit(X=X_train, y=y_train, **temp_fit_params)
+
+                score = scoring(fitted_estimator, X_test, y_test)
+                scores.append(score)
+                fi = get_feature_importances(
+                    model=fitted_estimator, current_features=current_features, data=X_test, reference_data=X_val, importance_getter=importance_getter
+                )
+                # feature_indices,imp_values=list(fi.keys()),list(fi.values())
+                # print(np.array(feature_indices)[np.argsort(imp_values)[-10:]])
+
+                key = f"{len(current_features)}_{nfold}"
+                feature_importances[key] = fi
+
+                if keep_estimators:
+                    fitted_estimators[key] = fitted_estimator
+
+                # print(f"feature_importances[step{len(current_features)}_fold{nfold}]=" + str({key: value for key, value in fi.items() if value > 0}))
+
+                if 0 not in evaluated_scores_mean:
+
+                    # ----------------------------------------------------------------------------------------------------------------------------
+                    # Dummy baselines must serve as fitness @ 0 features.
+                    # ----------------------------------------------------------------------------------------------------------------------------
+                    dummy_scores.append(
+                        get_best_dummy_score(estimator=estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, scoring=scoring)
+                    )
+                    # print(f"Best dummy score (at 0 features, fold {nfold}): {best_dummy_score}")
+
+            if 0 not in evaluated_scores_mean:
+                store_averaged_cv_scores(pos=0, scores=dummy_scores, evaluated_scores_mean=evaluated_scores_mean, evaluated_scores_std=evaluated_scores_std)
+                if top_predictors_search_method == OptimumSearch.ModelBasedHeuristic:
+                    Optimizer.submit_evaluations(candidates=[0], evaluations=[np.array(dummy_scores).mean()], durations=[None])
+
+            scores_mean, scores_std = store_averaged_cv_scores(
+                pos=len(current_features), scores=scores, evaluated_scores_mean=evaluated_scores_mean, evaluated_scores_std=evaluated_scores_std
+            )
+            if top_predictors_search_method == OptimumSearch.ModelBasedHeuristic:
+                Optimizer.submit_evaluations(candidates=[len(current_features)], evaluations=[scores_mean], durations=[None])
+
+                if verbose:
+                    logger.info(
+                        f"trying nfeatures={len(current_features)}, score={scores_mean:.6f}, len(train_index)={len(train_index)}, len(test_index)={len(test_index)}"
+                    )
+
+            # ----------------------------------------------------------------------------------------------------------------------------
+            # Checking exit conditions
+            # ----------------------------------------------------------------------------------------------------------------------------
+
+            nsteps += 1
+
+            if max_runtime_mins and not ran_out_of_time:
+                delta = timer() - start_time
+                ran_out_of_time = delta > max_runtime_mins * 60
+                if ran_out_of_time:
+                    if verbose:
+                        logger.info(f"max_runtime_mins={max_runtime_mins:_.1f} reached.")
+                    break
+
+            if max_refits and nsteps >= max_refits:
+                if verbose:
+                    logger.info(f"max_refits={max_refits:_} reached.")
+                break
+
+            if scores_mean >= best_score:
+                best_score = scores_mean
+                n_noimproving_iters = 0
+            else:
+                n_noimproving_iters += 1
+
+            if best_desired_score is not None and scores_mean >= best_desired_score:
+                if verbose:
+                    logger.info(f"best_desired_score {best_desired_score:_.6f} reached.")
+                break
+
+                if best_desired_score is not None and scores_mean <= best_desired_score:
+                    if verbose:
+                        logger.info(f"best_desired_score {best_desired_score:_.6f} reached.")
+                    break
+
+            if max_noimproving_iters and n_noimproving_iters >= max_noimproving_iters:
+                if verbose:
+                    logger.info(f"Max # of noimproved iters reached: {n_noimproving_iters}")
+                break
+
+        # ----------------------------------------------------------------------------------------------------------------------------
+        # Saving best result found so far as final
+        # ----------------------------------------------------------------------------------------------------------------------------
+
+        self.n_features_in_ = X.shape[1]
+        self.feature_names_in_ = X.columns if isinstance(X, pd.DataFrame) else map(str, np.arange(self.n_features_in_))
+
+        self.estimators_ = fitted_estimators  # a dict with key=nfeatures_nfold
+        self.feature_importances_ = feature_importances  # a dict with key=nfeatures_nfold
+        self.selected_features_ = selected_features_per_nfeatures  # a dict with key=nfeatures
+
+        checked_nfeatures = sorted(evaluated_scores_mean.keys())
+        cv_std_perf = [evaluated_scores_std[n] for n in checked_nfeatures]
+        cv_mean_perf = [evaluated_scores_mean[n] for n in checked_nfeatures]
+        self.cv_results_ = {"nfeatures": checked_nfeatures, "cv_mean_perf": cv_mean_perf, "cv_std_perf": cv_std_perf}
+
+        self.select_optimal_nfeatures_(
+            checked_nfeatures=checked_nfeatures,
+            cv_mean_perf=cv_mean_perf,
+            cv_std_perf=cv_std_perf,
+            mean_perf_weight=self.mean_perf_weight,
+            std_perf_weight=self.std_perf_weight,
+            feature_cost=feature_cost,
+            smooth_perf=smooth_perf,
+            use_all_fi_runs=use_all_fi_runs,
+            use_last_fi_run_only=use_last_fi_run_only,
+            use_one_freshest_fi_run=use_one_freshest_fi_run,
+            use_fi_ranking=use_fi_ranking,
+            votes_aggregation_method=votes_aggregation_method,
+            verbose=verbose,
+            show_plot=show_plot,
+        )
+
+        self.signature = signature
+        return self
+
+    def select_optimal_nfeatures_(
+        self,
+        checked_nfeatures: np.ndarray,
+        cv_mean_perf: np.ndarray,
+        cv_std_perf: np.ndarray,
+        mean_perf_weight: float = 1.0,
+        std_perf_weight: float = 0.1,
+        feature_cost: float = 0.01 / 100,
+        smooth_perf: int = 3,
+        use_all_fi_runs: bool = True,
+        use_last_fi_run_only: bool = False,
+        use_one_freshest_fi_run: bool = False,
+        use_fi_ranking: bool = False,
+        votes_aggregation_method: VotesAggregation = VotesAggregation.Borda,
+        verbose: bool = False,
+        show_plot: bool = False,
+        plot_file=None,
+        font_size: int = 12,
+        figsize: tuple = (10, 7),
+    ):
+
+        base_perf = np.array(cv_mean_perf) * mean_perf_weight - np.array(cv_std_perf) * std_perf_weight
+        if smooth_perf:
+            smoothed_perf = pd.Series(base_perf).rolling(smooth_perf, center=True).mean().values
+            idx = np.isnan(smoothed_perf)
+            smoothed_perf[idx] = base_perf[idx]
+            base_perf = smoothed_perf
+
+        # ultimate_perf = base_perf / (np.log1p(np.arange(len(base_perf))) + comparison_base)
+        ultimate_perf = base_perf - np.arange(len(base_perf)) * feature_cost
+
+        best_idx = np.argmax(ultimate_perf)
+        best_top_n = checked_nfeatures[best_idx]
+
+        if show_plot or plot_file:
+            plt.rcParams.update({"font.size": font_size})
+            fig, ax1 = plt.subplots(figsize=figsize)
+            ax2 = ax1.twinx()
+
+            ax1.set_xlabel("Number of features selected")
+            ax1.set_ylabel("Mean CV score", c="b")
+
+            ax1.errorbar(checked_nfeatures, cv_mean_perf, yerr=cv_std_perf, c="b", alpha=0.4)
+
+            ax2.plot(checked_nfeatures, ultimate_perf, c="g")
+            ax1.plot(checked_nfeatures[best_idx], base_perf[best_idx], "ro")
+            ax2.set_ylabel("Adj CV score", c="g")
+
+            plt.title("Performance by nfeatures")
+            plt.tight_layout()
+
+            if plot_file:
+                plt.savefig(plot_file)
+            if show_plot:
+                plt.show()
+
+        # after making a cutoff decision:
+
+        self.n_features_ = best_top_n
+
+        # last time vote for feature_importances using all info up to date
+
+        fi_to_consider = select_appropriate_feature_importances(
+            feature_importances=self.feature_importances_,
+            nfeatures=best_top_n,
+            n_original_features=self.n_features_in_,
+            use_all_fi_runs=use_all_fi_runs,
+            use_last_fi_run_only=use_last_fi_run_only,
+            use_one_freshest_fi_run=use_one_freshest_fi_run,
+            use_fi_ranking=use_fi_ranking,
+        )
+
+        self.ranking_ = get_actual_features_ranking(
+            feature_importances=fi_to_consider,
+            votes_aggregation_method=votes_aggregation_method,
+        )
+
+        self.support_ = np.array([(i in self.ranking_[:best_top_n]) for i in self.feature_names_in_])
+
+        if verbose:
+            dummy_gain = base_perf[0] / base_perf[best_idx] - 1
+            allfeat_gain = base_perf[-1] / base_perf[best_idx] - 1
+            logger.info(
+                f"{self.n_features_:_} predictive factors selected out of {self.n_features_in_:_} during {len(self.selected_features_):_} rounds. Gain vs dummy={dummy_gain*100:.1f}%, gain vs all features={allfeat_gain*100:.1f}%"
+            )
+
+    def transform(self, X, y=None):
+        if isinstance(X, pd.DataFrame):
+            return X.iloc[:, self.support_]
+        else:
+            return X[:, self.support_]
+
+
+def split_into_train_test(
+    X: Union[pd.DataFrame, np.ndarray], y: Union[pd.DataFrame, np.ndarray], train_index: np.ndarray, test_index: np.ndarray, features_indices: np.ndarray = None
+) -> tuple:
+    """Split X & y according to indices & dtypes. Basically this accounts for diffeent dtypes (pd.DataFrame, np.ndarray) to perform the same."""
+
+    if isinstance(X, pd.DataFrame):
+        X_train, y_train = (X.iloc[train_index, :] if features_indices is None else X.iloc[train_index, :][features_indices]), (
+            y.iloc[train_index, :] if isinstance(y, pd.DataFrame) else (y.iloc[train_index] if isinstance(y, pd.Series) else y[train_index])
+        )
+        X_test, y_test = (X.iloc[test_index, :] if features_indices is None else X.iloc[test_index, :][features_indices]), (
+            y.iloc[test_index, :] if isinstance(y, pd.DataFrame) else (y.iloc[test_index] if isinstance(y, pd.Series) else y[test_index])
+        )
+    else:
+        X_train, y_train = (X[train_index, :] if features_indices is None else X[train_index, :][:, features_indices]), (
+            y[train_index, :] if len(y.shape) > 1 else y[train_index]
+        )
+        X_test, y_test = (X[test_index, :] if features_indices is None else X[test_index, :][:, features_indices]), (
+            y[test_index, :] if len(y.shape) > 1 else y[test_index]
+        )
+
+    return X_train, y_train, X_test, y_test
+
+
+def store_averaged_cv_scores(pos: int, scores: list, evaluated_scores_mean: dict, evaluated_scores_std: dict) -> None:
+    scores = np.array(scores)
+    scores_mean, scores_std = np.mean(scores), np.std(scores)
+    evaluated_scores_mean[pos] = scores_mean
+    evaluated_scores_std[pos] = scores_std
+
+    return scores_mean, scores_std
+
+
+def get_feature_importances(
+    model: object,
+    current_features: list,
+    importance_getter: Union[str, Callable],
+    data: Union[pd.DataFrame, np.ndarray, None] = None,
+    reference_data: Union[pd.DataFrame, np.ndarray, None] = None,
+) -> dict:
+
+    if isinstance(importance_getter, str):
+        res = getattr(model, importance_getter)
+        if importance_getter == "coef_":
+            res = np.abs(res)
+        if res.ndim > 1:
+            res = res.sum(axis=0)
+    else:
+        res = importance_getter(model=model, data=data, reference_data=reference_data)
+
+    assert len(res) == len(current_features)
+    return {feature_index: feature_importance for feature_index, feature_importance in zip(current_features, res)}
+
+
+def get_next_features_subset(
+    nsteps: int,
+    original_features: list,
+    feature_importances: pd.DataFrame,
+    evaluated_scores_mean: dict,
+    evaluated_scores_std: dict,
+    use_all_fi_runs: bool,
+    use_last_fi_run_only: bool,
+    use_one_freshest_fi_run: bool,
+    use_fi_ranking: bool,
+    top_predictors_search_method: OptimumSearch = OptimumSearch.ScipyLocal,
+    votes_aggregation_method: VotesAggregation = VotesAggregation.Borda,
+    Optimizer: object = None,
+) -> list:
+    """Generates a "next_nfeatures_to_check" candidate to evaluate.
+    Decides on a subset of FIs to use (all, freshest preceeding, all preceeding).
+    Combines FIs from different runs into ranks using voting.
+    Selects next_nfeatures_to_check best ranked features as candidates for the upcoming FI evaluation.
+    The whole idea of this approach is that we don't need to go all the way from len(original_features) up to 0 and evaluate
+    EVERY nfeatures. for 10k features and 1TB datast it's a waste.
+    """
+
+    # ----------------------------------------------------------------------------------------------------------------------------
+    # First step is to try all features.
+    # ----------------------------------------------------------------------------------------------------------------------------
+
+    if nsteps == 0:
+        return original_features
+    else:
+        remaining = list(set(np.arange(1, len(original_features))) - set(evaluated_scores_mean.keys()))
+        if len(remaining) == 0:
+            return []
+        else:
+
+            if top_predictors_search_method == OptimumSearch.ExhaustiveRandom:
+                next_nfeatures_to_check = random.choice(remaining)
+            elif top_predictors_search_method == OptimumSearch.ModelBasedHeuristic:
+                next_nfeatures_to_check = Optimizer.suggest_candidate()
+
+            if next_nfeatures_to_check is not None:
+
+                # ----------------------------- -----------------------------------------------------------------------------------------------
+                # At each step, feature importances must be recalculated in light of recent training on a smaller subset.
+                # The features already thrown away all receive constant importance update of the same size, to keep up with number of trains (?)
+                # ----------------------------------------------------------------------------------------------------------------------------
+
+                fi_to_consider = select_appropriate_feature_importances(
+                    feature_importances=feature_importances,
+                    nfeatures=next_nfeatures_to_check,
+                    n_original_features=len(original_features),
+                    use_all_fi_runs=use_all_fi_runs,
+                    use_last_fi_run_only=use_last_fi_run_only,
+                    use_one_freshest_fi_run=use_one_freshest_fi_run,
+                    use_fi_ranking=use_fi_ranking,
+                )
+                ranks = get_actual_features_ranking(feature_importances=fi_to_consider, votes_aggregation_method=votes_aggregation_method)
+                # print(f"fi_to_consider={fi_to_consider}")
+                # print(f"ranks={ranks}")
+                # print(f"next_nfeatures_to_check={next_nfeatures_to_check}, features chosen={ranks[:next_nfeatures_to_check]}")
+
+                return ranks[:next_nfeatures_to_check]
+            else:
+                return []
+
+
+def select_appropriate_feature_importances(
+    feature_importances: dict,
+    nfeatures: int,
+    n_original_features: int,
+    use_all_fi_runs: bool = True,
+    use_last_fi_run_only: bool = False,
+    use_one_freshest_fi_run: bool = False,
+    use_fi_ranking: bool = False,
+) -> dict:
+
+    if use_last_fi_run_only:
+        # use train folds with specific length. key is nfeatures_nfold
+        fi_to_consider = {key: value for key, value in feature_importances.items() if len(value) == n_original_features}
+    else:
+        if use_all_fi_runs:
+            # use all fi data collected so far
+            fi_to_consider = {key: value for key, value in feature_importances.items() if len(value) > 1} if n_original_features > 1 else feature_importances
+        else:
+            # can only use runs preceeding nfeatures here.
+            if use_one_freshest_fi_run:
+                # freshest preceeding
+                fi_to_consider = {}
+                for possible_nfeatures in range(nfeatures + 1, n_original_features):
+                    for key, value in feature_importances.items():
+                        if len(value) == possible_nfeatures:
+
+                            fi_to_consider[key] = value
+                    if fi_to_consider:
+                        print(f"using freshest FI of {possible_nfeatures} features for nfeatures={nfeatures}")
+                        break
+            else:
+                # all preceeding
+                fi_to_consider = {key: value for key, value in feature_importances.items() if (len(value) > nfeatures and len(value) != 1)}
+    if use_fi_ranking:
+        fi_to_consider = {key: pd.Series(value).rank(ascending=True, pct=True).to_dict() for key, value in fi_to_consider.items()}
+    return fi_to_consider
+
+
+def get_actual_features_ranking(feature_importances: dict, votes_aggregation_method: VotesAggregation) -> list:
+    """Absolute FIs from estimators trained on CV for each nfeatures are stored separatly.
+    They can be used to recompute final voted importances using any desired voting algo.
+    But of course the exploration path was already lead by specific voting algo active at the fitting time.
+
+    GM, and esp Minimax & Plurality are suboptimal for FS.
+    """
+
+    from votenrank import Leaderboard
+
+    lb = Leaderboard(table=pd.DataFrame(feature_importances))
+    if votes_aggregation_method == VotesAggregation.Borda:
+        ranks = lb.borda_ranking()
+    elif votes_aggregation_method == VotesAggregation.AM:
+        ranks = lb.mean_ranking(mean_type="arithmetic")
+    elif votes_aggregation_method == VotesAggregation.GM:
+        ranks = lb.mean_ranking(mean_type="geometric")
+    elif votes_aggregation_method == VotesAggregation.Copeland:
+        ranks = lb.copeland_ranking()
+    elif votes_aggregation_method == VotesAggregation.Dowdall:
+        ranks = lb.dowdall_ranking()
+    elif votes_aggregation_method == VotesAggregation.Minimax:
+        ranks = lb.minimax_ranking()
+    elif votes_aggregation_method == VotesAggregation.OG:
+        ranks = lb.optimality_gap_ranking(gamma=1)
+    elif votes_aggregation_method == VotesAggregation.Plurality:
+        ranks = lb.plurality_ranking()
+
+    # print("Current features ranks:")
+    # print(ranks)
+    return ranks.index.values.tolist()
diff --git a/helpers.py b/mlframe/helpers.py
similarity index 100%
rename from helpers.py
rename to mlframe/helpers.py
diff --git a/inference.py b/mlframe/inference.py
similarity index 100%
rename from inference.py
rename to mlframe/inference.py
diff --git a/keras.py b/mlframe/keras.py
similarity index 100%
rename from keras.py
rename to mlframe/keras.py
diff --git a/lightninglib.py b/mlframe/lightninglib.py
similarity index 100%
rename from lightninglib.py
rename to mlframe/lightninglib.py
diff --git a/metrics.py b/mlframe/metrics.py
similarity index 100%
rename from metrics.py
rename to mlframe/metrics.py
diff --git a/mlflowlib.py b/mlframe/mlflowlib.py
similarity index 100%
rename from mlflowlib.py
rename to mlframe/mlflowlib.py
diff --git a/model_selection.py b/mlframe/model_selection.py
similarity index 100%
rename from model_selection.py
rename to mlframe/model_selection.py
diff --git a/optimization.py b/mlframe/optimization.py
similarity index 100%
rename from optimization.py
rename to mlframe/optimization.py
diff --git a/outliers.py b/mlframe/outliers.py
similarity index 100%
rename from outliers.py
rename to mlframe/outliers.py
diff --git a/pipelines.py b/mlframe/pipelines.py
similarity index 100%
rename from pipelines.py
rename to mlframe/pipelines.py
diff --git a/preprocessing.py b/mlframe/preprocessing.py
similarity index 100%
rename from preprocessing.py
rename to mlframe/preprocessing.py
diff --git a/public_suffix_list.dat b/mlframe/public_suffix_list.dat
similarity index 100%
rename from public_suffix_list.dat
rename to mlframe/public_suffix_list.dat
diff --git a/stats.py b/mlframe/stats.py
similarity index 100%
rename from stats.py
rename to mlframe/stats.py
diff --git a/synthetic.py b/mlframe/synthetic.py
similarity index 100%
rename from synthetic.py
rename to mlframe/synthetic.py
diff --git a/tests.py b/mlframe/tests.py
similarity index 100%
rename from tests.py
rename to mlframe/tests.py
diff --git a/text.py b/mlframe/text.py
similarity index 100%
rename from text.py
rename to mlframe/text.py
diff --git a/tuning.py b/mlframe/tuning.py
similarity index 100%
rename from tuning.py
rename to mlframe/tuning.py
diff --git a/unittest_arrays.py b/mlframe/unittest_arrays.py
similarity index 100%
rename from unittest_arrays.py
rename to mlframe/unittest_arrays.py
diff --git a/utils.py b/mlframe/utils.py
similarity index 100%
rename from utils.py
rename to mlframe/utils.py
diff --git a/version.py b/mlframe/version.py
similarity index 100%
rename from version.py
rename to mlframe/version.py