Skip to content

Commit

Permalink
Add nullvalue param
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 10, 2019
1 parent 00b6ba2 commit d2653da
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion optimus/io/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def tsv(path, header='true', infer_schema='true', charset="UTF-8", *args, **kwar
return df

@staticmethod
def csv(path, sep=',', header='true', infer_schema='true', charset="UTF-8", *args, **kwargs):
def csv(path, sep=',', header='true', infer_schema='true', charset="UTF-8", null_value="None", *args, **kwargs):
"""
Return a dataframe from a csv file. It is the same read.csv Spark function with some predefined
params
Expand All @@ -68,6 +68,7 @@ def csv(path, sep=',', header='true', infer_schema='true', charset="UTF-8", *arg
:param header: tell the function whether dataset has a header row. 'true' default.
:param infer_schema: infers the input schema automatically from data.
:param charset: Charset file encoding
:param null_value: value to convert the string to a None value
It requires one extra pass over the data. 'true' default.
:return dataFrame
Expand All @@ -80,6 +81,7 @@ def csv(path, sep=',', header='true', infer_schema='true', charset="UTF-8", *arg
.options(mode="DROPMALFORMED")
.options(delimiter=sep)
.options(inferSchema=infer_schema)
.options(nullValue=null_value)
.option("charset", charset)
.csv(file, *args, **kwargs))

Expand Down

0 comments on commit d2653da

Please sign in to comment.