Merge pull request #56 from Abhin02:freq_estimation

PiperOrigin-RevId: 431460904 Change-Id: Ie973ade6e99844c2be494b8d5f84c08191694dcf
google-research · Mar 2, 2022 · 9bdfa3f · 9bdfa3f
2 parents 737e29e + 8a8263d
commit 9bdfa3f
Show file tree

Hide file tree

Showing 26 changed files with 1,779 additions and 150 deletions.
diff --git a/rcc_dp/BUILD b/rcc_dp/BUILD
@@ -1,122 +1,24 @@
 load("@rules_python//python:defs.bzl", "py_library", "py_test")
 
-licenses(["notice"])
-
-py_library(
-    name = "config",
-    srcs = ["config.py"],
-    srcs_version = "PY3",
-)
-
-py_library(
-    name = "get_parameters",
-    srcs = ["get_parameters.py"],
-    srcs_version = "PY3",
-    deps = [
-        ":optimize_unbias",
-        ":privunit",
-    ],
-)
-
-py_library(
-    name = "experiment",
-    srcs = ["experiment.py"],
-    srcs_version = "PY3",
-    deps = [
-        ":get_parameters",
-        ":miracle",
-        ":modify_pi",
-        ":privunit",
-        ":sqkr",
+package(
+    default_visibility = [
+        "//rcc_dp:__subpackages__",
     ],
 )
 
-py_library(
-    name = "miracle",
-    srcs = ["miracle.py"],
-    srcs_version = "PY3",
-)
+licenses(["notice"])
 
 py_library(
     name = "modify_pi",
     srcs = ["modify_pi.py"],
-    srcs_version = "PY3",
-)
-
-py_library(
-    name = "optimize_unbias",
-    srcs = ["optimize_unbias.py"],
-    srcs_version = "PY3",
-    deps = [":privunit"],
-)
-
-py_library(
-    name = "privunit",
-    srcs = ["privunit.py"],
-    srcs_version = "PY3",
-)
-
-py_library(
-    name = "sqkr",
-    srcs = ["sqkr.py"],
-)
-
-py_test(
-    name = "experiment_test",
-    size = "large",
-    srcs = ["experiment_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":config",
-        ":experiment",
-    ],
-)
-
-py_test(
-    name = "get_parameters_test",
-    srcs = ["get_parameters_test.py"],
-    python_version = "PY3",
-    srcs_version = "PY3",
-    tags = ["nokokoro"],  # Runtime too high for continuous integration.
-    deps = [
-        ":get_parameters",
-        ":miracle",
-        ":modify_pi",
-    ],
-)
-
-py_test(
-    name = "miracle_test",
-    srcs = ["miracle_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":get_parameters",
-        ":miracle",
-    ],
 )
 
 py_test(
     name = "modify_pi_test",
     srcs = ["modify_pi_test.py"],
-    python_version = "PY3",
-    srcs_version = "PY3",
     deps = [
-        ":get_parameters",
-        ":miracle",
         ":modify_pi",
+        "//rcc_dp/mean_estimation:get_parameters",
+        "//rcc_dp/mean_estimation:miracle",
     ],
 )
-
-py_test(
-    name = "privunit_test",
-    srcs = ["privunit_test.py"],
-    python_version = "PY3",
-    deps = [":privunit"],
-)
-
-py_test(
-    name = "sqkr_test",
-    srcs = ["sqkr_test.py"],
-    python_version = "PY3",
-    deps = [":sqkr"],
-)
diff --git a/rcc_dp/README.md b/rcc_dp/README.md
@@ -1,3 +1,110 @@
-# RCC/DP project
+# Source code for "Optimal Compression of Locally Differentially Private Mechanisms"
 
-This is work in progress. Links to relevant publications will be added later.
+Reference: Abhin Shah, Wei-Ning Chen, Johannes Balle, Peter Kairouz, Lucas
+Theis, "Optimal Compression of Locally Differentially Private Mechanisms," The
+25th International Conference on Artificial Intelligence and Statistics
+(AISTATS), 2022
+
+Contact: [email protected], [email protected]
+
+Arxiv:
+[https://arxiv.org/pdf/2111.00092.pdf](https://arxiv.org/pdf/2111.00092.pdf)
+
+### Dependencies:
+
+In order to successfully execute the code, the following libraries must be
+installed:
+
+Python --- json, math, time, matplotlib, numpy, scipy,
+[absl](https://github.com/abseil/abseil-py),
+[ml_collections](https://github.com/google/ml_collections)
+
+### Wrapper functions:
+
+This repository contains the code for (a) mean estimation and (b) frequency
+estimation. To run the code, a wrapper function needs to be written. For
+example, to run the mean estimation code, the following could be used:
+
+```
+from mean_estimation import config as defaults
+from mean_estimation import experiment
+from mean_estimation import experiment_coding_cost
+
+def main():
+    config = defaults.get_config()
+    experiment.evaluate('path-to-the-mean-estimation-code', config)
+
+if __name__ == "__main__":
+    main()
+```
+
+Similarly, to run the frequency estimation code, the following could be used:
+
+```
+from frequency_estimation import config as defaults
+from frequency_estimation import experiment
+from frequency_estimation import experiment_coding_cost
+
+def main():
+    config = defaults.get_config()
+    experiment.evaluate('path-to-the-frequency-estimation-code', config)
+
+if __name__ == "__main__":
+    main()
+```
+
+### Reproducing the figures
+
+1.  To reproduce Figure 1(Top), make the following changes in
+    mean_estimation/config.py: `num_itr = 10 vary = "cc"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment_coding_cost.evaluate('path-to-the-mean-estimation-code', config)`
+2.  To reproduce Figure 1(Bottom), make the following changes in
+    mean_estimation/config.py: `num_itr = 10 vary = "eps"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-mean-estimation-code', config)`
+3.  To reproduce Figure 2(Top), make the following changes in
+    frequency_estimation/config.py: `num_itr = 10 vary = "cc"` and add the
+    following commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment_coding_cost.evaluate('path-to-the-frequency-estimation-code',
+    config)`
+4.  To reproduce Figure 2(Bottom), make the following changes in
+    frequency_estimation/config.py: `num_itr = 10 vary = "eps"` and add the
+    following commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-frequency-estimation-code', config)`
+5.  To reproduce Figure 3, make the following changes in
+    mean_estimation/config.py: `run_approx_miracle=True,
+    run_modified_miracle=False, num_itr = 10 vary = "eps"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-mean-estimation-code', config)`
+6.  To reproduce Figure 4(Left), make the following changes in
+    mean_estimation/config.py: `num_itr = 10 vary = "d"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-mean-estimation-code', config)`
+7.  To reproduce Figure 4(Right), make the following changes in
+    mean_estimation/config.py: `num_itr = 10 vary = "n"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-mean-estimation-code', config)`
+8.  To reproduce Figure 5, make the following changes in
+    frequency_estimation/config.py: `run_approx_miracle=True,
+    run_modified_miracle=False, num_itr = 10 vary = "eps"` and add the following
+    commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-frequency-estimation-code', config)`
+9.  To reproduce Figure 6(Left), make the following changes in
+    frequency_estimation/config.py: `num_itr = 10 vary = "d"` and add the
+    following commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-frequency-estimation-code', config)`
+10. To reproduce Figure 6(Right), make the following changes in
+    frequency_estimation/config.py: `num_itr = 10 vary = "n"` and add the
+    following commands in the main function of the wrapper: `config =
+    defaults.get_config()
+    experiment.evaluate('path-to-the-frequency-estimation-code', config)`
diff --git a/rcc_dp/frequency_estimation/BUILD b/rcc_dp/frequency_estimation/BUILD
@@ -0,0 +1,64 @@
+load("@rules_python//python:defs.bzl", "py_library", "py_test")
+
+licenses(["notice"])
+
+py_library(
+    name = "config",
+    srcs = ["config.py"],
+)
+
+py_library(
+    name = "experiment",
+    srcs = ["experiment.py"],
+    deps = [
+        ":miracle",
+        ":rhr",
+        ":ss",
+        ":unbias",
+        "//rcc_dp:modify_pi",
+    ],
+)
+
+py_library(
+    name = "experiment_coding_cost",
+    srcs = ["experiment_coding_cost.py"],
+    deps = [
+        ":miracle",
+        ":rhr",
+        ":ss",
+        ":unbias",
+        "//rcc_dp:modify_pi",
+    ],
+)
+
+py_test(
+    name = "experiment_test",
+    srcs = ["experiment_test.py"],
+    deps = [
+        ":config",
+        ":experiment",
+        ":experiment_coding_cost",
+    ],
+)
+
+py_library(
+    name = "miracle",
+    srcs = ["miracle.py"],
+)
+
+py_library(
+    name = "unbias",
+    srcs = ["unbias.py"],
+    deps = [":ss"],
+)
+
+py_library(
+    name = "ss",
+    srcs = ["ss.py"],
+)
+
+py_library(
+    name = "rhr",
+    srcs = ["rhr.py"],
+    deps = [":ss"],
+)
diff --git a/rcc_dp/frequency_estimation/config.py b/rcc_dp/frequency_estimation/config.py
@@ -0,0 +1,58 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base configuration."""
+
+from ml_collections.config_dict import config_dict
+
+
+def get_config():
+  """Returns config dictionary for model."""
+  config = dict(
+      name="defaults",
+      # Either use geometric, zipf, or uniform i.e., data variable
+      # can take one of "geometric", "zipf", "uniform".
+      distribution="zipf",
+      lbd_geometric=0.8,
+      degree_zipf=1.0,
+      # Flags to indicate which methods to compare.
+      run_approx_miracle=False,
+      run_miracle=False,
+      run_modified_miracle=True,
+      run_ss=True,
+      run_rhr=True,
+      encoding_type="fast",  # Can take either fast or normal
+      # Common parameters.
+      num_itr=1,
+      coding_cost=14,
+      coding_cost_multiplier=1,
+      approx_coding_cost_multiplier=3,
+      approx_t=6,
+      # Specific parameters (leave them as they are for now).
+      delta=10**(-6),
+      alpha=1.0,
+      # Variation.
+      vary="eps",  # Can take one of "cc", "k", "n", "eps".
+      cc_space=[6, 8, 10, 12, 14],
+      k_space=[200, 400, 600, 800, 1000],
+      n_space=[2000, 4000, 6000, 8000, 10000],
+      eps_space=list(range(1, 9)),
+      # Defaults.
+      n=5000,
+      k=500,
+      t=3,
+      epsilon_target=6,
+  )
+  config = config_dict.ConfigDict(config)
+  config.lock()  # Prevent addition of new fields.
+  return config