From 8d22b2a287adcbcea04734cdac0d954c3f1de898 Mon Sep 17 00:00:00 2001 From: Argenis Leon Date: Thu, 14 Nov 2019 17:56:04 -0600 Subject: [PATCH] added columns param to sort() --- optimus/dataframe/columns.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/optimus/dataframe/columns.py b/optimus/dataframe/columns.py index bd4bcd21..2eba2b93 100644 --- a/optimus/dataframe/columns.py +++ b/optimus/dataframe/columns.py @@ -2,7 +2,6 @@ import re import string import unicodedata -import uuid from ast import literal_eval from functools import reduce from heapq import nlargest @@ -502,24 +501,27 @@ def keep(columns=None, regex=None): @add_attr(cols) # TODO: Create a function to sort by datatype? - def sort(order="asc"): + def sort(order="asc", columns=None): """ Sort dataframes columns asc or desc :param order: 'asc' or 'desc' accepted + :param columns: :return: Spark DataFrame """ - _reverse = None - if order == "asc": - _reverse = False - elif order == "desc": - _reverse = True - else: - RaiseIt.value_error(order, ["asc", "desc"]) + df = self + if columns is None: + _reverse = None + if order == "asc": + _reverse = False + elif order == "desc": + _reverse = True + else: + RaiseIt.value_error(order, ["asc", "desc"]) - columns = self.cols.names() - columns.sort(key=lambda v: v.upper(), reverse=_reverse) + columns = df.cols.names() + columns.sort(key=lambda v: v.upper(), reverse=_reverse) - return self.select(columns) + return df.select(columns) @add_attr(cols) def drop(columns=None, regex=None, data_type=None): @@ -2163,8 +2165,11 @@ def string_to_index(input_cols=None, output_cols=None, columns=None): :param columns: :return: """ + df = self - return ml_string_to_index(df, input_cols, output_cols, columns) + df = ml_string_to_index(df, input_cols, output_cols, columns) + + return df @add_attr(cols) def bucketizer(input_cols, splits, output_cols=None):