Skip to content

Commit

Permalink
Fix replace for when handling different columns dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 25, 2019
1 parent f2ec510 commit bf970ef
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
17 changes: 10 additions & 7 deletions optimus/dataframe/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,20 +1103,23 @@ def func_numeric(_df, _input_col, _output_col, _search, _replace_by):
func = func_full
elif search_by == "chars" or search_by == "words":
func = func_chars_words
elif search_by == "numeric":
func == func_numeric
else:
RaiseIt.value_error(search_by, ["chars", "words", "full"])
RaiseIt.value_error(search_by, ["chars", "words", "full", "numeric"])

if search_by in ["chars", "words", "full"]:
filter_dtype = [PYSPARK_STRING_TYPES]
elif search_by == "numeric":
filter_dtype = [PYSPARK_NUMERIC_TYPES]

input_cols = parse_columns(self, input_cols, filter_by_column_dtypes=filter_dtype)

input_cols = parse_columns(self, input_cols,
filter_by_column_dtypes=[PYSPARK_STRING_TYPES, PYSPARK_NUMERIC_TYPES])
check_column_numbers(input_cols, "*")
output_cols = get_output_cols(input_cols, output_cols)

df = self
for input_col, output_col in zip(input_cols, output_cols):
if is_column_a(df, input_col, "int"):
func = func_numeric
# df = df.cols.cast(input_col, "str", output_col)

df = func(df, input_col, output_col, search, replace_by)

df = df.preserve_meta(self, Actions.REPLACE.value, output_col)
Expand Down
4 changes: 3 additions & 1 deletion optimus/dataframe/rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def append(rows):

return df_result


@add_attr(rows)
def select_by_dtypes(input_cols, data_type=None):
"""
Expand Down Expand Up @@ -84,7 +85,8 @@ def select(*args, **kwargs):
:param kwargs:
:return: Spark DataFrame
"""
return self.filter(*args, **kwargs)
df = self
return df.filter(*args, **kwargs)

@add_attr(rows)
def to_list(input_cols):
Expand Down
2 changes: 1 addition & 1 deletion optimus/helpers/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
elif cols_args == "*" or cols_args is None:
cols = df.columns


# In case we have a list of tuples we use the first element of the tuple is taken as the column name
# and the rest as params. We can use the param in a custom function as follow
# def func(attrs): attrs return (1,2) and (3,4)
Expand Down Expand Up @@ -163,7 +164,6 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
final_columns = cols

cols_params = []

if invert:
final_columns = list(OrderedSet(df.cols.names()) - OrderedSet(final_columns))

Expand Down

0 comments on commit bf970ef

Please sign in to comment.