Skip to content

Commit

Permalink
Merge pull request #14 from ThaminduR/fixNumericalAgg
Browse files Browse the repository at this point in the history
Fix `agg_numerical_column` function
  • Loading branch information
ThaminduR authored Jan 24, 2021
2 parents b8df7c8 + 5c2760a commit a41b63f
Showing 1 changed file with 28 additions and 24 deletions.
52 changes: 28 additions & 24 deletions spark_privacy_preserver/mondrian_utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,34 +178,37 @@ def agg_numerical_column(series):
minm = str(minimum)

if(len(minm) == 1):
min_start = minm[-1]
if(minimum >= 5):
string = '5-'
else:
string = '0-'
else:
min_start = minm[-2]
if(minimum >= int(min_start+'5')):
string = min_start+'5-'
if (minm[-1]=='0'):
string = minm +"-"
else:
string = min_start+'0-'
min_start = minm[:-1]
if(minimum >= int(min_start+'5')):
string = min_start+'5-'
else:
string = min_start+'0-'

if(len(maxm) == 1):
max_start = maxm[-1]
if(maximum >= 5):
string += "10"
else:
string += '5'
else:
max_start = maxm[-2]
if(maximum >= int(max_start+'5')):
string += str(int(max_start+'0') + 10)
if(maxm[-1]=='0'):
string += maxm
else:
string += max_start+'5'
max_start = maxm[:-1]
if(maximum > int(max_start+'5')):
string += str(int(max_start+'0') + 10)
else:
string += max_start+'5'

return string


def anonymizer(df, partitions, feature_columns, sensitive_column, categorical, max_partitions=None):
aggregations = {}

Expand Down Expand Up @@ -409,33 +412,34 @@ def agg_columns(df, partdf, indexes, columns, categorical):
maxm = str(maximum)
minm = str(minimum)
if(len(minm) == 1):
min_start = minm[-1]
if(minimum >= 5):
string = '5-'
else:
string = '0-'
else:
min_start = minm[-2]
if(minimum >= int(min_start+'5')):
string = min_start+'5-'
if (minm[-1]=='0'):
string = minm +"-"
else:
string = min_start+'0-'
min_start = minm[:-1]
if(minimum >= int(min_start+'5')):
string = min_start+'5-'
else:
string = min_start+'0-'

if(len(maxm) == 1):
max_start = maxm[-1]
if(maximum >= 5):
string += "10"
else:
string += '5'
else:
max_start = maxm[-2]
if(maximum >= int(max_start+'5')):
string += str(int(max_start+'0') + 10)
if(maxm[-1]=='0'):
string += maxm
else:
string += max_start+'5'

min_start = minm[-2]
max_start = maxm[-2]
max_start = maxm[:-1]
if(maximum > int(max_start+'5')):
string += str(int(max_start+'0') + 10)
else:
string += max_start+'5'

df[column] = df[column].astype(str)
df.loc[indexes, column] = string
Expand Down

0 comments on commit a41b63f

Please sign in to comment.