Skip to content

Commit

Permalink
Now can op to calculate advanced stats
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 11, 2019
1 parent e8e4d95 commit 23e7f35
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 258 deletions.
16 changes: 9 additions & 7 deletions optimus/dataframe/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def reset(self):


@add_method(DataFrame)
def send(self, name=None, infer=True, mismatch=None, stats=True):
def send(self, name=None, infer=True, mismatch=None, stats=True, advanced_stats=True):
"""
Profile and send the data to the queue
:param self:
Expand All @@ -476,12 +476,14 @@ def send(self, name=None, infer=True, mismatch=None, stats=True):
if name is not None:
df.set_name(name)

columns, output = Profiler.instance.dataset(df, columns="*", buckets=35, infer=infer, relative_error=RELATIVE_ERROR,
approx_count=True,
sample=10000,
stats=stats,
format="json",
mismatch=mismatch)
output = Profiler.instance.dataset(df, columns="*", buckets=35, infer=infer, relative_error=RELATIVE_ERROR,
approx_count=True,
sample=10000,
stats=stats,
format="json",
mismatch=mismatch,
advanced_stats=advanced_stats
)

if Comm.instance:
Comm.instance.send(output)
Expand Down
2 changes: 2 additions & 0 deletions optimus/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@

# Profiler
PROFILER_COLUMN_TYPES = {"categorical", "numeric", "date", "null", "array", "binary"}
PYTHON_TO_PROFILER = {"string": "categorical", "boolean": "categorical", "int": "numeric", "decimal": "numeric",
"date": "date", "array": "array", "binaty": "binary", "null": "null"}

SPARK_DTYPES_TO_PROFILER = {"int": ["smallint", "tinyint", "bigint", "int"], "decimal": ["float", "double"],
"string": "string", "date": {"date", "timestamp"}, "boolean": "boolean", "binary": "binary",
Expand Down
Loading

0 comments on commit 23e7f35

Please sign in to comment.