Skip to content

Commit

Permalink
Fix hist for outliers
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 29, 2019
1 parent ed64e48 commit 845d5e5
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
16 changes: 10 additions & 6 deletions optimus/outliers/abstract_outliers_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,26 @@ def select(self):

return self.df.rows.select((F.col(col_name) > upper_bound) | (F.col(col_name) < lower_bound))

#TODO: Pass a defined division param instead or run 3 separated jobs
# TODO: Pass a defined division param instead or run 3 separated jobs
def hist(self, col_name):
# lower bounf
# lower bound
lower_bound_hist = self.df.rows.select(self.df[col_name] < self.lower_bound).cols.hist(col_name, 20)

# upper bound
upper_bound_hist = self.df.rows.select(self.df[col_name] > self.upper_bound).cols.hist(col_name, 20)

# Non outliers
non_outlier_hist = self.df.rows.select(
(F.col(col_name) <= self.upper_bound) | (F.col(col_name) >= self.lower_bound)).cols.hist(col_name, 20)
(F.col(col_name) >= self.lower_bound) & (F.col(col_name) <= self.upper_bound)).cols.hist(col_name, 20)


result = {}
result["lower_bound"] = lower_bound_hist
result["non_outlier_hist"] = non_outlier_hist
result["upper_bound"] = upper_bound_hist
if lower_bound_hist is not None:
result.update(lower_bound_hist)
if upper_bound_hist is not None:
result.update(upper_bound_hist)
if non_outlier_hist is not None:
result.update(non_outlier_hist)

return dump_json(result)

Expand Down
2 changes: 1 addition & 1 deletion optimus/outliers/tukey.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, df, col_name):
self.df = df
self.col_name = col_name

self.upper_bound, self.lower_bound, self.q1, self.median, self.q3, self.iqr = dict_filter(
self.lower_bound, self.upper_bound, self.q1, self.median, self.q3, self.iqr = dict_filter(
self.whiskers(), ["lower_bound", "upper_bound", "q1", "median", "q3", "iqr"]
)
# print(self.upper_bound, self.lower_bound, self.q1, self.median, self.q3, self.iqr)
Expand Down

0 comments on commit 845d5e5

Please sign in to comment.