KxSystems · cmccarthy1 · Sep 29, 2023 · Sep 29, 2023 · Oct 2, 2023 · Oct 2, 2023
diff --git a/fresh/feat.q b/fresh/feat.q
@@ -205,7 +205,7 @@ fresh.feat.firstMin:{[data]
 // @param data {number[]} Numerical data points
 // @return {dictionary} Spectral centroid, variance, skew and kurtosis
 fresh.feat.fftAggreg:{[data]
-  a:fresh.i.abso[fresh.i.rfft data]`;
+  a:fresh.i.abso[.p.toraw fresh.i.rfft data]`;
   l:"f"$til count a;
   mean:1.,(sum each a*/:3(l*)\l)%sum a;
   m1:mean 1;m2:mean 2;m3:mean 3;m4:mean 4;
@@ -227,7 +227,7 @@ fresh.feat.fftCoeff:{[data;coeff]
   r:(fresh.i.angle[fx;`deg pykw 1b]`;
     fresh.i.real[fx]`;
     fresh.i.imag[fx]`;
-    fresh.i.abso[fx:fresh.i.rfft data]`
+    fresh.i.abso[fx:.p.toraw fresh.i.rfft data]`
     );
   fftKeys:`$"_"sv'string raze(`coeff,/:til coeff),\:/:`angle`real`imag`abs;
   fftVals:raze coeff#'r,\:coeff#0n;

diff --git a/fresh/tests/features.t b/fresh/tests/features.t
diff --git a/fresh/tests/significancetests.p b/fresh/tests/significancetests.p
@@ -1,7 +1,7 @@
 p)import numpy as np
 p)from scipy import stats
 
-p)def< binary_feature_binary_test(x, y):
+p)def binary_feature_binary_test(x, y):
     x0, x1 = np.unique(x)
     y0, y1 = np.unique(y)
 
@@ -17,7 +17,7 @@ p)def< binary_feature_binary_test(x, y):
 
     return p_value
 
-p)def< target_binary_feature_real_test(y, x):
+p)def target_binary_feature_real_test(y, x):
     y0, y1 = np.unique(y)
 
     x_y1 = x[y == y1]
@@ -26,11 +26,11 @@ p)def< target_binary_feature_real_test(y, x):
     KS, p_ks = stats.ks_2samp(x_y1, x_y0,mode='asymp')
     return p_ks
 
-p)def< target_real_feature_real_test(x, y):
+p)def target_real_feature_real_test(x, y):
     tau, p_value = stats.kendalltau(x, y)
     return p_value
 
-p)def< benjamini_hochberg_test(df_pvalues, hypotheses_independent, fdr_level):
+p)def benjamini_hochberg_test(df_pvalues, hypotheses_independent, fdr_level):
     df_pvalues = df_pvalues.sort_values(by="p_value")
     m = len(df_pvalues)
     K = np.arange(1, m + 1)

diff --git a/fresh/tests/sigtests.t b/fresh/tests/sigtests.t
@@ -11,7 +11,6 @@ In each case significance tests implemented within freshq are compared to
 equivalent significance tests implemented previously in python.
 \
 
-\l p.q
 \l ml.q
 \l fresh/init.q
 \l fresh/tests/significancetests.p
@@ -22,13 +21,13 @@ xb:5000#0101101011b
 yb:5000#0101101011b
 
 / 1a.
-.ml.fresh.i.fisher[xb;yb] ~ binary_feature_binary_test[xb;yb]
+.ml.fresh.i.fisher[xb;yb] ~ .p.get[`binary_feature_binary_test;<][xb;yb]
 
 / 1b.
-.ml.fresh.i.ks[yb;xf] ~ target_binary_feature_real_test[yb;xf]
+.ml.fresh.i.ks[yb;xf] ~ .p.get[`target_binary_feature_real_test;<][yb;xf]
 
 / 1c.
-.ml.fresh.i.kTau[xf;yf] ~ target_real_feature_real_test[xf;yf]
+.ml.fresh.i.kTau[xf;yf] ~ .p.get[`target_real_feature_real_test;<][xf;yf]
 
 /
 2.
@@ -45,14 +44,16 @@ table3:([]desc 1000000?1f;1000000?10f;asc 1000000?1f)
 table4:([]1000000?0b;1000000?1f;1000000?1f)
 target1:asc 1000000?100f;target2:desc 1000000?1f;target3:target4:1000000?0b
 bintest:{2=count distinct x}
-pdmatrix:{pddf[benjamini_hochberg_test[y;"FALSE";x]][`:values]}
-k:{pdmatrix[x;y]`}
+pdmatrix:{pddf[.p.get[`benjamini_hochberg_test][.p.topd y;$[.pykx.loaded;0b;"FALSE"];x]][`:values]`}
+k:{t:pdmatrix[x;y];@[{x`};t;{[x;y]x}[t]]}
 vec:{k[x;y][;2]}
 bhfn:{[table;target]
 	pdict:.ml.fresh.sigFeat[table;target];
 	ptable:([]label:key pdict;p_value:value pdict);
-	dfptable:tab2df[ptable];
-	("i"$count .ml.fresh.benjhoch[0.05;pdict]) ~ sum vec[0.05;dfptable]=1b
+	dfptable:$[.pykx.loaded;;tab2df]ptable;
+        vecret:vec[0.05;dfptable];
+        vecret:$[11h=type vecret;`True=;0<]vecret;
+        ("i"$count .ml.fresh.benjhoch[0.05;pdict]) ~ sum vecret=1b
 	}
 bhfn[table1;target1]
 bhfn[table2;target2]

diff --git a/fresh/tests/test.p b/fresh/tests/test.p
@@ -1,61 +1,62 @@
 p)import numpy as np
 p)import pandas as pd
+p)import itertools
 p)from scipy.signal import welch, cwt, ricker, find_peaks_cwt
 p)from scipy.stats import linregress
 p)from statsmodels.tsa.stattools import acf, adfuller, pacf
 p)from numpy.linalg import LinAlgError
 
-p)def< _get_length_sequences_where(x):
+p)def _get_length_sequences_where(x):
         if len(x) == 0:
                 return [0]
         else:
                 res = [len(list(group)) for value, group in itertools.groupby(x) if value == 1]
                 return res if len(res) > 0 else [0]
-p)def< aggregate_on_chunks(x, f_agg, chunk_len):return [getattr(x[i * chunk_len: (i + 1) * chunk_len], f_agg)() for i in range(int(np.ceil(len(x) / chunk_len)))]
-
-p)def< hasduplicate(x):return len(x) != len(set(x))
-p)def< hasduplicatemin(x):return sum(np.asarray(x) == min(x)) >= 2
-p)def< hasduplicatemax(x):return sum(np.asarray(x) == max(x)) >= 2
-p)def< abs_energy(x):x = np.asarray(x); return sum(x * x)
-p)def< mean_change(x):return np.mean(np.diff(x))
-p)def< mean_abs_change(x):return np.mean(np.abs(np.diff(x)))
-p)def< count_above_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x > m)[0].shape[0]
-p)def< count_below_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x < m)[0].shape[0]
-p)def< first_location_of_maximum(x): x = np.asarray(x); return np.argmax(x) / len(x) if len(x) > 0 else np.NaN
-p)def< first_location_of_minimum(x): x = np.asarray(x); return np.argmin(x) / len(x) if len(x) > 0 else np.NaN
-p)def< last_location_of_minimum(x): x = np.asarray(x); return 1.0 - (1+np.argmin(x[::-1]))/ len(x) if len(x) > 0 else np.NaN
-p)def< last_location_of_maximum(x): x = np.asarray(x); return 1.0 - (1+np.argmax(x[::-1]))/ len(x) if len(x) > 0 else np.NaN
-p)def< ratio_val_num_to_t_series(x):return len(set(x))/len(x)
-p)def< ratio_beyond_r_sigma(x,r):return sum(abs(x - np.mean(x)) > r * np.std(x))/len(x)
-p)def< large_standard_deviation(x,r):x = np.asarray(x);return np.std(x) > (r * (max(x) - min(x)))
-p)def< absolute_sum_of_changes(x):return np.sum(abs(np.diff(x)))
-p)def< longest_strike_below_mean(x):return max(_get_length_sequences_where(x <= np.mean(x))) if len(x) > 0 else 0
-p)def< longest_strike_above_mean(x):return max(_get_length_sequences_where(x >= np.mean(x))) if len(x) > 0 else 0
-p)def< skewness_py(x):x = pd.Series(x);return pd.Series.skew(x)
-p)def< kurtosis_py(x):x = pd.Series(x);return pd.Series.kurtosis(x)
-p)def< range_count(x,min,max):return np.sum((x >= min) & (x < max))
-p)def< variance_larger_than_standard_deviation(x):return np.var(x) > np.std(x)
-p)def< number_cwt_peaks(x,n):return len(find_peaks_cwt(vector=x, widths=np.array(list(range(1, n + 1))), wavelet=ricker)) 
-p)def< quantile_py(x, q):x = pd.Series(x);return pd.Series.quantile(x, q)
-p)def< value_count(x, value):
+p)def aggregate_on_chunks(x, f_agg, chunk_len):return [getattr(x[i * chunk_len: (i + 1) * chunk_len], f_agg)() for i in range(int(np.ceil(len(x) / chunk_len)))]
+
+p)def hasduplicate(x):return len(x) != len(set(x))
+p)def hasduplicatemin(x):return sum(np.asarray(x) == min(x)) >= 2
+p)def hasduplicatemax(x):return sum(np.asarray(x) == max(x)) >= 2
+p)def abs_energy(x):x = np.asarray(x); return sum(x * x)
+p)def mean_change(x):return np.mean(np.diff(x))
+p)def mean_abs_change(x):return np.mean(np.abs(np.diff(x)))
+p)def count_above_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x > m)[0].shape[0]
+p)def count_below_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x < m)[0].shape[0]
+p)def first_location_of_maximum(x): x = np.asarray(x); return np.argmax(x) / len(x) if len(x) > 0 else np.NaN
+p)def first_location_of_minimum(x): x = np.asarray(x); return np.argmin(x) / len(x) if len(x) > 0 else np.NaN
+p)def last_location_of_minimum(x): x = np.asarray(x); return 1.0 - (1+np.argmin(x[::-1]))/ len(x) if len(x) > 0 else np.NaN
+p)def last_location_of_maximum(x): x = np.asarray(x); return 1.0 - (1+np.argmax(x[::-1]))/ len(x) if len(x) > 0 else np.NaN
+p)def ratio_val_num_to_t_series(x):return len(set(x))/len(x)
+p)def ratio_beyond_r_sigma(x,r):return sum(abs(x - np.mean(x)) > r * np.std(x))/len(x)
+p)def large_standard_deviation(x,r):x = np.asarray(x);return np.std(x) > (r * (max(x) - min(x)))
+p)def absolute_sum_of_changes(x):return np.sum(abs(np.diff(x)))
+p)def longest_strike_below_mean(x):return max(_get_length_sequences_where(x <= np.mean(x))) if len(x) > 0 else 0
+p)def longest_strike_above_mean(x):return max(_get_length_sequences_where(x >= np.mean(x))) if len(x) > 0 else 0
+p)def skewness_py(x):x = pd.Series(x);return pd.Series.skew(x)
+p)def kurtosis_py(x):x = pd.Series(x);return pd.Series.kurtosis(x)
+p)def range_count(x,min,max):return np.sum((x >= min) & (x < max))
+p)def variance_larger_than_standard_deviation(x):return np.var(x) > np.std(x)
+p)def number_cwt_peaks(x,n):return len(find_peaks_cwt(vector=x, widths=np.array(list(range(1, n + 1))), wavelet=ricker)) 
+p)def quantile_py(x, q):x = pd.Series(x);return pd.Series.quantile(x, q)
+p)def value_count(x, value):
         if np.isnan(value):
                 return np.isnan(x)
         else:
                 return x[x == value].shape[0]
 
-p)def< percentage_recurring_all_data(x):
+p)def percentage_recurring_all_data(x):
         unique, counts = np.unique(x, return_counts=True)
         return np.sum(counts > 1) / float(counts.shape[0])
 
-p)def< percentage_recurring_all_val(x):
+p)def percentage_recurring_all_val(x):
         x = pd.Series(x)
         if len(x) == 0:
                 return np.nan
         x = x.copy()
         value_counts = x.value_counts()
         return value_counts[value_counts > 1].sum() / len(x)
 
-p)def< number_peaks(x, n):
+p)def number_peaks(x, n):
         x = np.asarray(x)
         x_reduced = x[n:-n]
         res = None
@@ -68,7 +69,7 @@ p)def< number_peaks(x, n):
                 res &= (x_reduced > np.roll(x, -i)[n:-n])
         return sum(res)
 
-p)def< cid_ce(x, normalize):
+p)def cid_ce(x, normalize):
         x = np.asarray(x)
         if normalize:
                 s = np.std(x)
@@ -79,43 +80,43 @@ p)def< cid_ce(x, normalize):
         x = np.diff(x)
         return np.sqrt(np.sum((x * x)))
 
-p)def< mean_second_derivative_central(x):
+p)def mean_second_derivative_central(x):
         diff = (np.roll(x, 1) - 2 * np.array(x) + np.roll(x, -1)) / 2.0
         return np.mean(diff[1:-1])
 
-p)def< sum_recurring_values(x):
+p)def sum_recurring_values(x):
         unique, counts = np.unique(x, return_counts=True)
         counts[counts < 2] = 0
         counts[counts > 1] = 1
         return np.sum(counts * unique)
 
-p)def< sum_recurring_data_points(x):
+p)def sum_recurring_data_points(x):
         unique, counts = np.unique(x, return_counts=True)
         counts[counts < 2] = 0
         return np.sum(counts * unique)
 
-p)def< c3_py(x, lag):
+p)def c3_py(x, lag):
         n = len(x)
         x = np.asarray(x)
         if 2 * lag >= n:
                 return 0
         else:
                 return np.mean((np.roll(x, 2 * -lag) * np.roll(x, -lag) * x)[0:(n - 2 * lag)])
 
-p)def< number_crossing_m(x, m):
+p)def number_crossing_m(x, m):
         if not isinstance(x, (np.ndarray, pd.Series)):
                 x = np.asarray(x)
         positive = x > m
         return np.where(np.bitwise_xor(positive[1:], positive[:-1]))[0].size
 
-p)def< binned_entropy(x, max_bins):
+p)def binned_entropy(x, max_bins):
         if not isinstance(x, (np.ndarray, pd.Series)):
                 x = np.asarray(x)
         hist, bin_edges = np.histogram(x, bins=max_bins)
         probs = hist / x.size
         return - np.sum(p * np.math.log(p) for p in probs if p != 0)
 
-p)def< autocorrelation(x, lag):
+p)def autocorrelation(x, lag):
         if type(x) is pd.Series:
                 x = x.values
         if len(x) < lag:
@@ -126,7 +127,7 @@ p)def< autocorrelation(x, lag):
         sum_product = np.sum((y1-x_mean)*(y2-x_mean))
         return sum_product / ((len(x) - lag) * np.var(x))
 
-p)def< energy_ratio_by_chunks(x,y,z):
+p)def energy_ratio_by_chunks(x,y,z):
         full_series_energy = np.sum(x ** 2)
         num_segments = y
         segment_focus = z
@@ -137,7 +138,7 @@ p)def< energy_ratio_by_chunks(x,y,z):
         res_data=(np.sum(x[start:end]**2.0)/full_series_energy)
         return res_data 
 
-p)def< change_quantiles(x, ql, qh, isabs, f_agg):
+p)def change_quantiles(x, ql, qh, isabs, f_agg):
         if ql >= qh:
                 ValueError("ql={} should be lower than qh={}".format(ql, qh))
         div = np.diff(x)
@@ -156,7 +157,7 @@ p)def< change_quantiles(x, ql, qh, isabs, f_agg):
                 aggregator = getattr(np, f_agg)
                 return aggregator(div[ind_inside_corridor])
 
-p)def< time_reversal_asymmetry_statistic(x, lag):
+p)def time_reversal_asymmetry_statistic(x, lag):
     n = len(x)
     x = np.asarray(x)
     if 2 * lag >= n:
@@ -165,7 +166,7 @@ p)def< time_reversal_asymmetry_statistic(x, lag):
         return np.mean((np.roll(x, 2 * -lag) * np.roll(x, 2 * -lag) * np.roll(x, -lag) -
                         np.roll(x, -lag) * x * x)[0:(n - 2 * lag)])
 
-p)def< index_mass_quantile(x, q):
+p)def index_mass_quantile(x, q):
 
     x = np.asarray(x)
     abs_x = np.abs(x)
@@ -177,13 +178,13 @@ p)def< index_mass_quantile(x, q):
         mass_centralized = np.cumsum(abs_x) / s
         return (np.argmax(mass_centralized >= q)+1)/len(x)
 
-p)def< linear_trend(x):
+p)def linear_trend(x):
     linReg = linregress(range(len(x)), x)
     return linReg
 
-p)def< get_moment(y, moment):return y.dot(np.arange(len(y))**moment) / y.sum()
-p)def< get_centroid(y):return get_moment(y, 1)
-p)def< get_variance(y):return get_moment(y, 2) - get_centroid(y) ** 2
+p)def get_moment(y, moment):return y.dot(np.arange(len(y))**moment) / y.sum()
+p)def get_centroid(y):return get_moment(y, 1)
+p)def get_variance(y):return get_moment(y, 2) - get_centroid(y) ** 2
 
 p)def get_skew(y):
     variance = get_variance(y)
@@ -193,7 +194,7 @@ p)def get_skew(y):
         return (
             get_moment(y, 3) - 3*get_centroid(y)*variance - get_centroid(y)**3
         ) / get_variance(y)**(1.5)
-p)def< get_kurtosis(y):
+p)def get_kurtosis(y):
     variance = get_variance(y)
     if variance < 0.5:
         return np.nan
@@ -203,11 +204,11 @@ p)def< get_kurtosis(y):
             + 6*get_moment(y, 2)*get_centroid(y)**2 - 3*get_centroid(y)
         ) / get_variance(y)**2
 
-p)def< fft_aggregated(x):
+p)def fft_aggregated(x):
     fft_abs = abs(np.fft.rfft(x))
     return get_centroid(fft_abs),get_variance(fft_abs),get_skew(fft_abs),get_kurtosis(fft_abs)
 
-p)def< index_mass_quantile(x, q):
+p)def index_mass_quantile(x, q):
 
     x = np.asarray(x)
     abs_x = np.abs(x)
@@ -219,7 +220,7 @@ p)def< index_mass_quantile(x, q):
         mass_centralized = np.cumsum(abs_x) / s
         return (np.argmax(mass_centralized >= q)+1)/len(x)
 
-p)def< agg_autocorrelation(x,y):
+p)def agg_autocorrelation(x,y):
     var = np.var(x)
     n = len(x)
     if np.abs(var) < 10**-10 or n == 1:
@@ -228,7 +229,7 @@ p)def< agg_autocorrelation(x,y):
         a = acf(x, adjusted=True, fft=n > 1250)[1:]
     return getattr(np, y)(a)
 
-p)def< augmented_dickey_fuller(x):
+p)def augmented_dickey_fuller(x):
     res = None
     try:
         res = adfuller(x)
@@ -241,11 +242,11 @@ p)def< augmented_dickey_fuller(x):
 
     return res 
 
-p)def< spkt_welch_density(x, y):
+p)def spkt_welch_density(x, y):
     freq, pxx = welch(x)
     return pxx[y]
 
-p)def< fft_coefficient(x,y,z):
+p)def fft_coefficient(x,y,z):
 
     fft = np.fft.rfft(x)
 
@@ -263,7 +264,7 @@ p)def< fft_coefficient(x,y,z):
 
     return res
 
-p)def< partial_autocorrelation(x, param):
+p)def partial_autocorrelation(x, param):
     max_demanded_lag = max(param)
     n = len(x)
     if n <= 1:

diff --git a/fresh/utils.q b/fresh/utils.q
@@ -6,12 +6,12 @@
 \d .ml 
 
 // Python imports
-sci_ver  :1.5<="F"$3#.p.import[`scipy][`:__version__]`
+sci_ver  :1.5<="F"$3#$[-11h=type x;string;]x:.p.import[`scipy][`:__version__]`
 numpy    :.p.import`numpy
 pyStats  :.p.import`scipy.stats
 signal   :.p.import`scipy.signal
 stattools:.p.import`statsmodels.tsa.stattools
-stats_ver:"F"$"." vs (.p.import`statsmodels)[`:__version__]`
+stats_ver:"F"$"." vs $[-11h=type x;string;]x:.p.import[`statsmodels][`:__version__]`
 stats_break:$[((stats_ver[0]=0)&stats_ver[1]>=12)|stats_ver[0]>0;1b;0b]
 
 // @private
@@ -175,7 +175,7 @@ fresh.i.expandResults:{[results;column]
 // @return {float} Kendall’s tau - Close to 1 shows strong agreement, close to
 //   -1 shows strong disagreement
 fresh.i.kTau:{[target;feature]
-  fresh.i.kendallTau[<;target;feature]1
+  fresh.i.kendallTau[target;feature][`:pvalue]`
   }
 
 // @private

diff --git a/graph/tests/graph.t b/graph/tests/graph.t
@@ -2,7 +2,6 @@
 // which will fail to produce a valid/operational graph/pipeline in order to ensure that the
 // catching mechanism for the creation of such workflows is reliable and fully understood
 
-\l p.q
 \l ml.q
 \l graph/utils.q
 \l graph/graph.q