diff --git a/optimus/dataframe/columns.py b/optimus/dataframe/columns.py index 63b470a1..649d938f 100644 --- a/optimus/dataframe/columns.py +++ b/optimus/dataframe/columns.py @@ -1462,7 +1462,7 @@ def nest(input_cols, shape="string", separator="", output_col=None): if shape is "vector": input_cols = parse_columns(self, input_cols, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES) - + output_col = one_list_to_val(output_col) vector_assembler = VectorAssembler( inputCols=input_cols, outputCol=output_col) diff --git a/optimus/ml/feature.py b/optimus/ml/feature.py index 481da052..c8586867 100644 --- a/optimus/ml/feature.py +++ b/optimus/ml/feature.py @@ -38,18 +38,16 @@ def string_to_index(df, input_cols, output_cols=None, **kargs): :return: Dataframe with indexed columns. """ - # input_cols = parse_columns(df, input_cols) + input_cols = parse_columns(df, input_cols) if output_cols is None: - output_cols = [name_col(input_col, "index_to_string") for input_col in input_cols] - print(output_cols) indexers = [StringIndexer(inputCol=input_col, outputCol=output_col, **kargs).fit(df) for input_col, output_col in zip(list(set(input_cols)), list(set(output_cols)))] pipeline = Pipeline(stages=indexers) df = pipeline.fit(df).transform(df) - # df.show() + return df