Add lidp_auditing code to federated research

PiperOrigin-RevId: 578963150 Change-Id: Ida8f7b085f217bfe471842dfa3bbee9dee2c9d70
google-research · Nov 6, 2023 · 2d68af9 · 2d68af9
1 parent 83e4fb5
commit 2d68af9
Show file tree

Hide file tree

Showing 6 changed files with 657 additions and 0 deletions.
diff --git a/lidp_auditing/BUILD b/lidp_auditing/BUILD
@@ -0,0 +1,36 @@
+# Build all for the code.
+
+load("@rules_python//python:defs.bzl", "py_library", "py_test")
+
+licenses(["notice"])
+
+package(
+    default_applicable_licenses = ["//:package_license"],
+    default_visibility = [":lidp_auditing_packages"],
+)
+
+package_group(
+    name = "lidp_auditing_packages",
+    packages = ["//lidp_auditing/..."],
+)
+
+py_library(
+    name = "constants_lib",
+    srcs = ["constants.py"],
+)
+
+py_library(
+    name = "data_lib",
+    srcs = ["data.py"],
+    deps = [":constants_lib"],
+)
+
+py_test(
+    name = "data_test",
+    timeout = "long",
+    srcs = ["data_test.py"],
+    deps = [
+        ":constants_lib",
+        ":data_lib",
+    ],
+)
diff --git a/lidp_auditing/README.md b/lidp_auditing/README.md
@@ -0,0 +1,30 @@
+# LiDP Auditing: Unleashing the Power of Randomization in Auditing DP
+
+This is the code to reproduce the experimental results of the NeurIPS 2023 paper
+[Unleashing the Power of Randomization in Auditing Differentially Private ML](https://arxiv.org/abs/2305.18447).
+
+Auditing differential privacy for ML involves running membership inference many
+times and giving high-confidence estimates on the success of the attack (i.e.,
+we try to detect the presence of a crafted datapoint, called a "canary" in the
+training data).
+
+[This paper](\(https://arxiv.org/abs/2305.18447\)) introduces a variant of DP
+called "Lifted DP" (or "LiDP" in short) that is equivalent to the usual notions
+of DP. It also gives a recipe to audit LiDP with multiple randomized hypothesis
+tests and adaptive confidence intervals to improve the sample complexity of
+auditing DP by 4 to 16 times.
+
+## Cite
+
+If you found this code useful, please cite the following work.
+
+```
+@incollection{pillutla-etal:lidp_auditing:neurips2023,
+title = {{Unleashing the Power of Randomization in Auditing
+          Differentially Private ML}},
+author = {Krishna Pillutla and Galen Andrew and Peter Kairouz and
+          H. Brendan McMahan and Alina Oprea and Sewoong Oh},
+booktitle = {NeurIPS},
+year = {2023},
+}
+```
diff --git a/lidp_auditing/constants.py b/lidp_auditing/constants.py
@@ -0,0 +1,94 @@
+# Copyright 2023, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Constants, type annotations, and tuned parameters for the experiments."""
+
+import tensorflow as tf
+
+# CANARY TYPES
+NO_CANARY = 'no_canary'
+STATIC_DATA_CANARY = 'static_data'
+RANDOM_GRADIENT_CANARY = 'random_gradient'
+CANARY_TYPES = [
+    STATIC_DATA_CANARY,
+    RANDOM_GRADIENT_CANARY,
+    NO_CANARY,
+]
+
+# MODEL TYPES
+LINEAR_MODEL = 'linear'
+MLP_MODEL = 'mlp'
+MODEL_TYPES = [
+    LINEAR_MODEL,
+    MLP_MODEL,
+]
+
+# DATASETS
+FASHION_MNIST_DATASET = 'fashion_mnist'
+PURCHASE_DATASET = 'purchase'
+DATASET_NAMES = [FASHION_MNIST_DATASET, PURCHASE_DATASET]
+
+# Types
+DatasetTupleType = tuple[
+    tf.data.Dataset, tf.data.Dataset, tf.data.Dataset, tf.data.Dataset
+]
+
+
+# Tuned constants
+def get_clip_norm(
+    dataset: str, model_type: str, dp_eps: float, run_nonprivate: bool
+) -> float:
+  """Get clip norm based on tuned hyperparameters."""
+  if run_nonprivate or dp_eps >= 1e5:  # Treat it as no DP.
+    return 1e10  # Extremely large clip norm, should never be reached.
+  if dataset == FASHION_MNIST_DATASET and model_type == MLP_MODEL:
+    # FashionMNIST + MLP
+    clip_norm_dict = {
+        1.0: 2.0,
+        2.0: 4.0,
+        4.0: 4.0,
+        8.0: 8.0,
+        16.0: 8.0,
+        32.0: 8.0,
+    }
+    if dp_eps not in clip_norm_dict:
+      raise ValueError('DP_EPSILON = %s not known.' % dp_eps)
+    return clip_norm_dict[dp_eps]
+  elif dataset == FASHION_MNIST_DATASET and model_type == LINEAR_MODEL:
+    # FashionMNIST + Linear
+    clip_norm_dict = {
+        1.0: 4.0,
+        2.0: 4.0,
+        4.0: 4.0,
+        8.0: 8.0,
+        16.0: 8.0,
+        32.0: 8.0,
+    }
+    if dp_eps not in clip_norm_dict:
+      raise ValueError('DP_EPSILON = %s not known.' % dp_eps)
+    return clip_norm_dict[dp_eps]
+  elif dataset == PURCHASE_DATASET and model_type == MLP_MODEL:
+    # Purchase + MLP
+    clip_norm_dict = {
+        1.0: 0.25,
+        2.0: 0.5,
+        4.0: 1.0,
+        8.0: 1.0,
+        16.0: 1.0,
+        32.0: 2.0,
+    }
+    if dp_eps not in clip_norm_dict:
+      raise ValueError('DP_EPSILON = %s not known.' % dp_eps)
+    return clip_norm_dict[dp_eps]
+  else:
+    raise ValueError('Unknown dataset-model: %s, %s' % (dataset, model_type))