Skip to content

Commit

Permalink
Added min_max ans max_abs scaler
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 16, 2019
1 parent fa58207 commit 2549e77
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
40 changes: 39 additions & 1 deletion optimus/dataframe/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -1398,7 +1398,7 @@ def div(columns, col_name="div"):
@add_attr(cols)
def z_score(input_cols, output_cols=None):
"""
Return the column data type
Return the column z score
:param input_cols: '*', list of columns names or a single column name
:param output_cols:
:return:
Expand All @@ -1412,6 +1412,44 @@ def _z_score(col_name, attr):
return apply(input_cols, func=_z_score, filter_col_by_dtypes=PYSPARK_NUMERIC_TYPES, output_cols=output_cols,
meta=Actions.Z_SCORE.value)

@add_attr(cols)
def min_max_scaler(input_cols, output_cols=None):
"""
Return the column min max scaler result
:param input_cols: '*', list of columns names or a single column name
:param output_cols:
:return:
"""

def _min_max(col_name, attr):
range_value = self.cols.range(col_name)
min_value = range_value[col_name]["range"]["min"]
max_value = range_value[col_name]["range"]["max"]
return F.abs((F.col(col_name) - min_value) / max_value - min_value)

return apply(input_cols, func=_min_max, filter_col_by_dtypes=PYSPARK_NUMERIC_TYPES, output_cols=output_cols,
meta=Actions.MIN_MAX_SCALER.value)

@add_attr(cols)
def max_abs_scaler(input_cols, output_cols=None):
"""
Return the max abs scaler result
:param input_cols: '*', list of columns names or a single column name
:param output_cols:
:return:
"""

def _result(col_name, attr):
def max_abs(col_name):
return F.max(F.abs(F.col(col_name)))

max_abs_result = format_dict(agg_exprs(input_cols, max_abs))

return (F.col(col_name)) / max_abs_result

return apply(input_cols, func=_result, filter_col_by_dtypes=PYSPARK_NUMERIC_TYPES, output_cols=output_cols,
meta=Actions.MAX_ABS_SCALER.value)

@add_attr(cols)
def iqr(columns, more=None, relative_error=RELATIVE_ERROR):
"""
Expand Down
2 changes: 2 additions & 0 deletions optimus/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ class Actions(Enum):
VALUES_TO_COLS = "values_to_cols"
SET = "set"
STRING_TO_INDEX = "string_to_index"
MIN_MAX_SCALER = "min_max_scaler"
MAX_ABS_SCALER = "max_abs_scaler"

@staticmethod
def list():
Expand Down

0 comments on commit 2549e77

Please sign in to comment.