Skip to content

Commit

Permalink
Preprocessing: Added support to control features (with no transformat…
Browse files Browse the repository at this point in the history
…ion), support to TSV as input, options to fill null with a median or a value
  • Loading branch information
gabrielspmoreira committed Apr 10, 2023
1 parent 44eb8ad commit 32ec264
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 3 additions & 1 deletion examples/quick_start/scripts/preproc/args_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def build_arg_parser():
help="",
)

parser.add_argument("--control_features", default="", help="")
parser.add_argument("--categorical_features", default="", help="")
parser.add_argument("--continuous_features", default="", help="")

Expand Down Expand Up @@ -94,6 +95,7 @@ def parse_arguments():
args = parser.parse_args()

# Parsing list args
args.control_features = parse_list_arg(args.control_features)
args.categorical_features = parse_list_arg(args.categorical_features)
args.continuous_features = parse_list_arg(args.continuous_features)

Expand All @@ -110,7 +112,7 @@ def parse_arguments():
if args.filter_query:
args.filter_query = args.filter_query.replace('"', "")

if args.csv_sep.lower() == "<tab>":
if args.csv_sep.lower() == "[tab]":
args.csv_sep = "\t"

return args
4 changes: 3 additions & 1 deletion examples/quick_start/scripts/preproc/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,14 @@ def generate_nvt_workflow_features(self):
args = self.args
feats = dict()

for col in args.control_features:
feats[col] = [col]
for col in args.categorical_features:
feats[col] = [col] >> nvt_ops.Categorify()
for col in args.continuous_features:
feats[col] = [col]
if args.continuous_features_fillna is not None:
if args.continuous_features_fillna.lower() == "<median>":
if args.continuous_features_fillna.lower() == "[median]":
feats[col] = feats[col] >> nvt_ops.FillMedian()
else:
feats[col] = feats[col] >> nvt_ops.FillMissing(args.continuous_features_fillna)
Expand Down

0 comments on commit 32ec264

Please sign in to comment.