Fix

eapframework · Dec 6, 2019 · d2ae712 · d2ae712
1 parent 69fc5e9
commit d2ae712
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 16 deletions.
diff --git a/optimus/outliers/abstract_outliers_bounds.py b/optimus/outliers/abstract_outliers_bounds.py
@@ -2,12 +2,11 @@
 
 from pyspark.sql import functions as F
 
-from optimus.infer import is_dataframe
 from optimus.helpers.columns import parse_columns
 from optimus.helpers.converter import one_list_to_val, val_to_list
 from optimus.helpers.filters import dict_filter
-from optimus.helpers.functions import create_buckets
 from optimus.helpers.json import dump_json
+from optimus.infer import is_dataframe
 
 
 # LOWER_BOUND =
@@ -54,14 +53,14 @@ def select(self):
 
     # TODO: Pass a defined division param instead or run 3 separated jobs
     def hist(self, col_name):
-        buckets = 20
-        min_value, max_value = self.df.cols.range(col_name)
-
-        create_buckets(min_value, self.lower_bound, buckets)
-
-        create_buckets(self.lower_bound, self.upper_bound, buckets)
-
-        create_buckets(self.upper_bound, max_value, buckets)
+        # buckets = 20
+        # min_value, max_value = self.df.cols.range(col_name)
+        #
+        # create_buckets(min_value, self.lower_bound, buckets)
+        #
+        # create_buckets(self.lower_bound, self.upper_bound, buckets)
+        #
+        # create_buckets(self.upper_bound, max_value, buckets)
 
         # lower bound
         lower_bound_hist = self.df.rows.select(self.df[col_name] < self.lower_bound).cols.hist(col_name, 20)
@@ -73,7 +72,6 @@ def hist(self, col_name):
         non_outlier_hist = self.df.rows.select(
             (F.col(col_name) >= self.lower_bound) & (F.col(col_name) <= self.upper_bound)).cols.hist(col_name, 20)
 
-
         result = {}
         if lower_bound_hist is not None:
             result.update(lower_bound_hist)

diff --git a/tests/test_df_rows.py b/tests/test_df_rows.py
@@ -43,11 +43,7 @@ def test_rows_is_in():
 		actual_df =source_df.rows.is_in('num',2)
 		expected_df = op.create.df([('words', StringType(), True),('num', IntegerType(), True),('animals', StringType(), True),('thing', StringType(), True),('second', IntegerType(), True),('filter', StringType(), True)], [('    zombies', 2, 'cat', 'tv', 6, 'b'), ('simpsons   cat lady', 2, 'frog', 'table', 7, '1')])
 		assert (expected_df.collect() == actual_df.collect())
-	@staticmethod
-	def test_rows_select():
-		actual_df =source_df.rows.select(Column<b'(num = 1)'>)
-		expected_df = op.create.df([('words', StringType(), True),('num', IntegerType(), True),('animals', StringType(), True),('thing', StringType(), True),('second', IntegerType(), True),('filter', StringType(), True)], [('  I like     fish  ', 1, 'dog dog', 'housé', 5, 'a')])
-		assert (expected_df.collect() == actual_df.collect())
+
 	@staticmethod
 	def test_rows_select_by_dtypes():
 		actual_df =source_df.rows.select_by_dtypes('filter','integer')