Skip to content

Commit

Permalink
Fix on sample error
Browse files Browse the repository at this point in the history
  • Loading branch information
argenisleon committed Nov 11, 2019
1 parent 23e7f35 commit 5949525
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions optimus/profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ def dataset(self, df, columns="*", buckets=10, infer=False, relative_error=RELAT
:param stats: calculate stats, if not only data table returned
:param format: dict or json
:param mismatch:
:param advanced_stats:
:return: dict or json
"""
output_columns = self.output_columns
Expand Down Expand Up @@ -350,10 +351,11 @@ def dataset(self, df, columns="*", buckets=10, infer=False, relative_error=RELAT
assign(output_columns, "summary.missing_count", total_count_na, dict)
assign(output_columns, "summary.p_missing", round(total_count_na / self.rows_count * 100, 2))

sample = {"columns": [{"title": cols} for cols in df.cols.names()],
"value": df.sample_n(sample).rows.to_list(columns)}
# TODO: drop, rename and move operation must affect the sample
sample = {"columns": [{"title": cols} for cols in df.cols.names()],
"value": df.sample_n(sample).rows.to_list(columns)}

assign(output_columns, "sample", sample, dict)
assign(output_columns, "sample", sample, dict)

actual_columns = output_columns["columns"]
# Order columns
Expand Down Expand Up @@ -464,6 +466,8 @@ def columns_stats(self, df, columns, buckets=10, infer=False, relative_error=REL

return columns_info



def columns_agg(self, df, columns, buckets=10, relative_error=RELATIVE_ERROR, approx_count=True, advanced_stats=True):
columns = parse_columns(df, columns)
n = BATCH_SIZE
Expand Down

0 comments on commit 5949525

Please sign in to comment.