-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add lidp_auditing code to federated research
PiperOrigin-RevId: 578963150 Change-Id: Ida8f7b085f217bfe471842dfa3bbee9dee2c9d70
- Loading branch information
1 parent
83e4fb5
commit 2d68af9
Showing
6 changed files
with
657 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Build all for the code. | ||
|
||
load("@rules_python//python:defs.bzl", "py_library", "py_test") | ||
|
||
licenses(["notice"]) | ||
|
||
package( | ||
default_applicable_licenses = ["//:package_license"], | ||
default_visibility = [":lidp_auditing_packages"], | ||
) | ||
|
||
package_group( | ||
name = "lidp_auditing_packages", | ||
packages = ["//lidp_auditing/..."], | ||
) | ||
|
||
py_library( | ||
name = "constants_lib", | ||
srcs = ["constants.py"], | ||
) | ||
|
||
py_library( | ||
name = "data_lib", | ||
srcs = ["data.py"], | ||
deps = [":constants_lib"], | ||
) | ||
|
||
py_test( | ||
name = "data_test", | ||
timeout = "long", | ||
srcs = ["data_test.py"], | ||
deps = [ | ||
":constants_lib", | ||
":data_lib", | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# LiDP Auditing: Unleashing the Power of Randomization in Auditing DP | ||
|
||
This is the code to reproduce the experimental results of the NeurIPS 2023 paper | ||
[Unleashing the Power of Randomization in Auditing Differentially Private ML](https://arxiv.org/abs/2305.18447). | ||
|
||
Auditing differential privacy for ML involves running membership inference many | ||
times and giving high-confidence estimates on the success of the attack (i.e., | ||
we try to detect the presence of a crafted datapoint, called a "canary" in the | ||
training data). | ||
|
||
[This paper](\(https://arxiv.org/abs/2305.18447\)) introduces a variant of DP | ||
called "Lifted DP" (or "LiDP" in short) that is equivalent to the usual notions | ||
of DP. It also gives a recipe to audit LiDP with multiple randomized hypothesis | ||
tests and adaptive confidence intervals to improve the sample complexity of | ||
auditing DP by 4 to 16 times. | ||
|
||
## Cite | ||
|
||
If you found this code useful, please cite the following work. | ||
|
||
``` | ||
@incollection{pillutla-etal:lidp_auditing:neurips2023, | ||
title = {{Unleashing the Power of Randomization in Auditing | ||
Differentially Private ML}}, | ||
author = {Krishna Pillutla and Galen Andrew and Peter Kairouz and | ||
H. Brendan McMahan and Alina Oprea and Sewoong Oh}, | ||
booktitle = {NeurIPS}, | ||
year = {2023}, | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# Copyright 2023, Google LLC. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Constants, type annotations, and tuned parameters for the experiments.""" | ||
|
||
import tensorflow as tf | ||
|
||
# CANARY TYPES | ||
NO_CANARY = 'no_canary' | ||
STATIC_DATA_CANARY = 'static_data' | ||
RANDOM_GRADIENT_CANARY = 'random_gradient' | ||
CANARY_TYPES = [ | ||
STATIC_DATA_CANARY, | ||
RANDOM_GRADIENT_CANARY, | ||
NO_CANARY, | ||
] | ||
|
||
# MODEL TYPES | ||
LINEAR_MODEL = 'linear' | ||
MLP_MODEL = 'mlp' | ||
MODEL_TYPES = [ | ||
LINEAR_MODEL, | ||
MLP_MODEL, | ||
] | ||
|
||
# DATASETS | ||
FASHION_MNIST_DATASET = 'fashion_mnist' | ||
PURCHASE_DATASET = 'purchase' | ||
DATASET_NAMES = [FASHION_MNIST_DATASET, PURCHASE_DATASET] | ||
|
||
# Types | ||
DatasetTupleType = tuple[ | ||
tf.data.Dataset, tf.data.Dataset, tf.data.Dataset, tf.data.Dataset | ||
] | ||
|
||
|
||
# Tuned constants | ||
def get_clip_norm( | ||
dataset: str, model_type: str, dp_eps: float, run_nonprivate: bool | ||
) -> float: | ||
"""Get clip norm based on tuned hyperparameters.""" | ||
if run_nonprivate or dp_eps >= 1e5: # Treat it as no DP. | ||
return 1e10 # Extremely large clip norm, should never be reached. | ||
if dataset == FASHION_MNIST_DATASET and model_type == MLP_MODEL: | ||
# FashionMNIST + MLP | ||
clip_norm_dict = { | ||
1.0: 2.0, | ||
2.0: 4.0, | ||
4.0: 4.0, | ||
8.0: 8.0, | ||
16.0: 8.0, | ||
32.0: 8.0, | ||
} | ||
if dp_eps not in clip_norm_dict: | ||
raise ValueError('DP_EPSILON = %s not known.' % dp_eps) | ||
return clip_norm_dict[dp_eps] | ||
elif dataset == FASHION_MNIST_DATASET and model_type == LINEAR_MODEL: | ||
# FashionMNIST + Linear | ||
clip_norm_dict = { | ||
1.0: 4.0, | ||
2.0: 4.0, | ||
4.0: 4.0, | ||
8.0: 8.0, | ||
16.0: 8.0, | ||
32.0: 8.0, | ||
} | ||
if dp_eps not in clip_norm_dict: | ||
raise ValueError('DP_EPSILON = %s not known.' % dp_eps) | ||
return clip_norm_dict[dp_eps] | ||
elif dataset == PURCHASE_DATASET and model_type == MLP_MODEL: | ||
# Purchase + MLP | ||
clip_norm_dict = { | ||
1.0: 0.25, | ||
2.0: 0.5, | ||
4.0: 1.0, | ||
8.0: 1.0, | ||
16.0: 1.0, | ||
32.0: 2.0, | ||
} | ||
if dp_eps not in clip_norm_dict: | ||
raise ValueError('DP_EPSILON = %s not known.' % dp_eps) | ||
return clip_norm_dict[dp_eps] | ||
else: | ||
raise ValueError('Unknown dataset-model: %s, %s' % (dataset, model_type)) |
Oops, something went wrong.