Skip to content

Commit

Permalink
Add sttack class and Add receipt test
Browse files Browse the repository at this point in the history
  • Loading branch information
glassonion1 committed Nov 10, 2021
1 parent 532f121 commit b0c764d
Show file tree
Hide file tree
Showing 7 changed files with 14,295 additions and 6 deletions.
4 changes: 3 additions & 1 deletion anonypy/anonypy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def count_t_closeness(self, k, p):
def agg_categorical_column(series):
# this is workaround for dtype bug of series
series.astype("category")
return [",".join(set(series))]

l = [str(n) for n in set(series)]
return [",".join(l)]


def agg_numerical_column(series):
Expand Down
4 changes: 1 addition & 3 deletions anonypy/attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ def attack(df, knowledge):

di = pd.DataFrame(np.hstack((index, dist)))

print(di[2].median())

di.loc[di[2] > di[2].median(), :] = -1
di.loc[di[3] > di[3].median(), :] = -1
# Display the top three
return di.iloc[:, 0:k].astype(int)
13 changes: 13 additions & 0 deletions data/NHANES_attack.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Female,30.0,Hispanic,College,Parther,26.6,0,0,0,0,Q4,0
Male,67.0,Black,Graduate,Widowed,28.8,0,0,0,0,Q3,1
Female,57.0,Hispanic,9th,Separated,35.4,1,1,0,0,Q3,1
Female,24.0,Other,Graduate,Never,25.3,0,1,0,0,Q1,0
Male,33.0,Japanese,11th,Never,25.3,0,1,0,0,Q1,0
Female,27.0,Black,College,Never,38.0,0,0,0,0,Q1,0
Male,49.0,Other,11th,Married,25.0,0,0,0,0,Q1,0
Female,69.0,Hispanic,9th,Separated,30.3,0,1,0,0,Q4,0
Male,56.0,Other,11th,Married,25.0,1,0,0,0,Q2,1
Male,22.0,Hispanic,College,Never,25.3,0,0,0,0,Q4,0
Female,60.0,Hispanic,Graduate,Divorced,35.9,1,0,0,0,Q2,1
Male,10.0,Japanese,AAA,Nver,35.9,1,0,0,0,Q2,1
Female,51.0,Other,hoge,Divorced,35.9,1,0,0,0,Q2,1
14,232 changes: 14,232 additions & 0 deletions data/receipt.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="anonypy",
version="0.1.4",
version="0.1.7",
packages=find_packages(),
author="glassonion1",
author_email="[email protected]",
Expand Down
5 changes: 4 additions & 1 deletion tests/attack_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def test_attack():
df = pd.read_csv("data/NHANES.csv", header=None, names=columns)
print(f"\n{df.head()}")

df_attack = pd.read_csv("data/NHANES_attack.csv", header=None, names=columns)
print(f"\n{df_attack.head()}")

for name in categorical:
df[name] = df[name].astype("category")

Expand All @@ -25,7 +28,7 @@ def test_attack():
dfn = pd.DataFrame(rows).loc[:, ["col1", "col2", "col3"]]

# this is attackers knowledge
knowledge = df[40:50].loc[:, ["col1", "col2", "col3"]]
knowledge = df_attack.loc[:, ["col1", "col2", "col3"]]

rl = attack.attack(dfn, knowledge)
print(rl)
41 changes: 41 additions & 0 deletions tests/receipt_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import anonypy
import pandas as pd
from datetime import datetime, date


def calculate_age(born):
born = datetime.strptime(born, "%Y/%m")
today = date.today()
return today.year - born.year - ((today.month, today.day) < (born.month, born.day))


def test_receipt():
path = "data/receipt.csv"
df = pd.read_csv(path)

# カテゴリ属性の設定
categorical = set(
(
"r_type",
"sex",
"family",
"icd10",
)
)
for name in categorical:
df[name] = df[name].astype("category")

print(len(df))
print(df.head())

df["birth_ym"] = df["birth_ym"].map(lambda x: calculate_age(x))

feature_columns = ["sex", "family", "birth_ym"]
sensitive_column = "iid"

p = anonypy.Preserver(df, feature_columns, sensitive_column)
rows = p.anonymize_k_anonymity(k=2)

dfn = pd.DataFrame(rows)
print(len(dfn))
print(dfn.head())

0 comments on commit b0c764d

Please sign in to comment.