Merge pull request OpenMined#1967 from kamathhrishi/cifar10_exp

iamtrask · web-flow · commit d631d2bd9c94 · 2019-04-28T13:09:53.000+01:00
PATE
diff --git a/syft/frameworks/torch/differential_privacy/pate.py b/syft/frameworks/torch/differential_privacy/pate.py
@@ -24,6 +24,7 @@
 import os
 import math
 import numpy as np
+import torch
 
 # import tensorflow as tf
 #
@@ -50,7 +51,7 @@
 
 
 def compute_q_noisy_max(counts, noise_eps):
-    """returns ~ Pr[outcome != winner].
+    """Returns ~ Pr[outcome != winner].
 
   Args:
     counts: a list of scores
@@ -65,16 +66,19 @@ def compute_q_noisy_max(counts, noise_eps):
 
     winner = np.argmax(counts)
     counts_normalized = noise_eps * (counts - counts[winner])
+
     counts_rest = np.array([counts_normalized[i] for i in range(len(counts)) if i != winner])
     q = 0.0
     for c in counts_rest:
         gap = -c
+
         q += (gap + 2.0) / (4.0 * math.exp(gap))
+
     return min(q, 1.0 - (1.0 / len(counts)))
 
 
 def compute_q_noisy_max_approx(counts, noise_eps):
-    """returns ~ Pr[outcome != winner].
+    """Returns ~ Pr[outcome != winner].
 
   Args:
     counts: a list of scores
@@ -213,24 +217,28 @@ def perform_analysis(teacher_preds, indices, noise_eps, delta=1e-5, moments=8, b
 
     assert num_examples == _num_examples
 
-    counts_mat = np.zeros((num_examples, num_labels)).astype(np.int32)
+    counts_mat = np.zeros((num_examples, num_labels))
 
     for i in range(num_examples):
         for j in range(num_teachers):
             counts_mat[i, int(teacher_preds[j, i])] += 1
 
     l_list = 1.0 + np.array(range(moments))
+
     total_log_mgf_nm = np.array([0.0 for _ in l_list])
     total_ss_nm = np.array([0.0 for _ in l_list])
 
     for i in indices:
+
         total_log_mgf_nm += np.array(
             [logmgf_from_counts(counts_mat[i], noise_eps, l) for l in l_list]
         )
+
         total_ss_nm += np.array([smoothed_sens(counts_mat[i], noise_eps, l, beta) for l in l_list])
 
     # We want delta = exp(alpha - eps l).
     # Solving gives eps = (alpha - ln (delta))/l
+
     eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list
 
     # print("Epsilons (Noisy Max): " + str(eps_list_nm))
@@ -266,3 +274,236 @@ def perform_analysis(teacher_preds, indices, noise_eps, delta=1e-5, moments=8, b
     # print("Data independent bound = " + str(min(data_ind_eps_list)) + ".")
 
     return min(eps_list_nm), min(data_ind_eps_list)
+
+
+def tensors_to_literals(tensor_list):
+    """Converts list of torch tensors to list of integers/floats. Fix for not having the functionality which converts list of tensors to tensors
+    
+       Args:
+           
+           tensor_list[List]: List of torch tensors
+           
+       Returns:
+           
+           literal_list[List]: List of floats/integers
+           
+    """
+
+    literal_list = []
+
+    for tensor in tensor_list:
+        literal_list.append(tensor.item())
+
+    return literal_list
+
+
+def logmgf_exact_torch(q, priv_eps, l):
+    """Computes the logmgf value given q and privacy eps.
+       The bound used is the min of three terms. The first term is from
+       https://arxiv.org/pdf/1605.02065.pdf.
+       The second term is based on the fact that when event has probability (1-q) for
+       q close to zero, q can only change by exp(eps), which corresponds to a
+       much smaller multiplicative change in (1-q)
+       The third term comes directly from the privacy guarantee.
+       Args:
+            q: pr of non-optimal outcome
+            priv_eps: eps parameter for DP
+            l: moment to compute.
+       Returns:
+            Upper bound on logmgf
+      """
+    if q < 0.5:
+        t_one = (1 - q) * math.pow((1 - q) / (1 - math.exp(priv_eps) * q), l)
+        t_two = q * math.exp(priv_eps * l)
+        t = t_one + t_two
+        try:
+
+            log_t = math.log(t)
+
+        except ValueError:
+
+            print("Got ValueError in math.log for values :" + str((q, priv_eps, l, t)))
+            log_t = priv_eps * l
+    else:
+
+        log_t = priv_eps * l
+
+    return min(0.5 * priv_eps * priv_eps * l * (l + 1), log_t, priv_eps * l)
+
+
+def compute_q_noisy_max_torch(counts, noise_eps):
+    """Returns ~ Pr[outcome != winner].
+       Args:
+           
+          counts: a list of scores
+          noise_eps: privacy parameter for noisy_max
+          
+       Returns:
+           
+          q: the probability that outcome is different from true winner.
+          
+    """
+
+    if type(counts) != torch.tensor:
+
+        counts = torch.tensor(tensors_to_literals(counts), dtype=torch.float)
+
+    _, winner = counts.max(0)
+    counts_normalized = noise_eps * (
+        torch.tensor(counts, dtype=torch.float) - torch.tensor(counts[winner], dtype=torch.float)
+    )
+
+    counts_normalized = tensors_to_literals(counts_normalized)
+    counts_rest = torch.tensor(
+        [counts_normalized[i] for i in range(len(counts)) if i != winner], dtype=torch.float
+    )
+    q = 0.0
+
+    index = 0
+    for c in counts_rest:
+
+        gap = -c
+        q += (gap + 2.0) / (4.0 * math.exp(gap))
+
+        index += 1
+
+    return min(q, 1.0 - (1.0 / len(counts)))
+
+
+def logmgf_from_counts_torch(counts, noise_eps, l):
+
+    """
+        ReportNoisyMax mechanism with noise_eps with 2*noise_eps-DP
+        in our setting where one count can go up by one and another
+        can go down by 1.
+    """
+
+    q = compute_q_noisy_max_torch(counts, noise_eps)
+
+    return logmgf_exact_torch(q, 2.0 * noise_eps, l)
+
+
+def sens_at_k_torch(counts, noise_eps, l, k):
+
+    """Return sensitivity at distane k.
+      Args:
+        
+          counts: an array of scores
+          noise_eps: noise parameter used
+          l: moment whose sensitivity is being computed
+          k: distance
+      Returns:
+         sensitivity: at distance k
+     """
+
+    counts_sorted = sorted(counts, reverse=True)
+
+    if 0.5 * noise_eps * l > 1:
+
+        print("l too large to compute sensitivity")
+        return 0
+
+    if counts[0] < counts[1] + k:
+
+        return 0
+
+    counts_sorted[0] -= k
+    counts_sorted[1] += k
+    val = logmgf_from_counts_torch(counts_sorted, noise_eps, l)
+    counts_sorted[0] -= 1
+    counts_sorted[1] += 1
+    val_changed = logmgf_from_counts_torch(counts_sorted, noise_eps, l)
+    return val_changed - val
+
+
+def smooth_sens_torch(counts, noise_eps, l, beta):
+
+    """Compute beta-smooth sensitivity.
+    
+     Args:
+         counts: array of scors
+         noise_eps: noise parameter
+         l: moment of interest
+         beta: smoothness parameter
+     Returns:
+         smooth_sensitivity: a beta smooth upper bound
+     """
+
+    k = 0
+    smoothed_sensitivity = sens_at_k_torch(counts, noise_eps, l, k)
+
+    while k < max(counts):
+
+        k += 1
+        sensitivity_at_k = sens_at_k_torch(counts, noise_eps, l, k)
+        smoothed_sensitivity = max(smoothed_sensitivity, math.exp(-beta * k) * sensitivity_at_k)
+        if sensitivity_at_k == 0.0:
+            break
+
+    return smoothed_sensitivity
+
+
+def perform_analysis_torch(preds, indices, noise_eps=0.1, delta=1e-5, moments=8, beta=0.09):
+    """Performs PATE analysis on predictions from teachers and combined predictions for student.
+    Args:
+        teacher_preds: a torch tensor of dim (num_teachers x num_examples). Each value corresponds to the
+            index of the label which a teacher gave for a specific example
+        indices: a torch tensor of dim (num_examples) of aggregated examples which were aggregated using
+            the noisy max mechanism.
+        noise_eps: the epsilon level used to create the indices
+        delta: the desired level of delta
+        moments: the number of moments to track (see the paper)
+        beta: a smoothing parameter (see the paper)
+    Returns:
+        tuple: first value is the data dependent epsilon, then the data independent epsilon
+    """
+
+    num_teachers, num_examples = preds.shape
+    _num_examples = indices.shape[0]
+
+    assert num_examples == _num_examples
+
+    labels = list(preds.flatten())
+    labels = set([tensor.item() for tensor in labels])
+    num_labels = len(labels)
+
+    counts_mat = torch.zeros(num_examples, num_labels, dtype=torch.float32)
+
+    for i in range(num_examples):
+
+        for j in range(num_teachers):
+
+            counts_mat[i, int(preds[j, i])] += 1
+
+    l_list = 1 + torch.tensor(range(moments), dtype=torch.float)
+
+    total_log_mgf_nm = torch.tensor([0.0 for _ in l_list], dtype=torch.float)
+    total_ss_nm = torch.tensor([0.0 for _ in l_list], dtype=torch.float)
+
+    for i in indices:
+
+        total_log_mgf_nm += torch.tensor(
+            [logmgf_from_counts_torch(counts_mat[i].clone(), noise_eps, l) for l in l_list]
+        )
+
+        total_ss_nm += torch.tensor(
+            [smooth_sens_torch(counts_mat[i].clone(), noise_eps, l, beta) for l in l_list],
+            dtype=torch.float,
+        )
+
+    eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list
+    ss_eps = 2.0 * beta * math.log(1 / delta)
+    ss_scale = 2.0 / ss_eps
+    if min(eps_list_nm) == eps_list_nm[-1]:
+        print(
+            "Warning: May not have used enough values of l. Increase 'moments' variable and run again."
+        )
+
+    data_ind_log_mgf = torch.tensor([0.0 for _ in l_list])
+    data_ind_log_mgf += num_examples * torch.tensor(
+        tensors_to_literals([logmgf_exact_torch(1.0, 2.0 * noise_eps, l) for l in l_list])
+    )
+
+    data_ind_eps_list = (data_ind_log_mgf - math.log(delta)) / l_list
+
+    return min(eps_list_nm), min(data_ind_eps_list)
diff --git a/test/torch/differential_privacy/test_pate.py b/test/torch/differential_privacy/test_pate.py
@@ -1,11 +1,17 @@
 import numpy as np
+
+import torch
+
 from syft.frameworks.torch.differential_privacy import pate
 
+np.random.seed(0)
+
 
 def test_base_dataset():
 
     num_teachers, num_examples, num_labels = (100, 50, 10)
     preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)  # fake preds
+
     indices = (np.random.rand(num_examples) * num_labels).astype(int)  # true answers
 
     preds[:, 0:10] *= 0
@@ -15,3 +21,42 @@ def test_base_dataset():
     )
 
     assert data_dep_eps < data_ind_eps
+
+
+def test_base_dataset_torch():
+
+    num_teachers, num_examples, num_labels = (100, 50, 10)
+    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)  # fake preds
+
+    indices = (np.random.rand(num_examples) * num_labels).astype(int)  # true answers
+
+    preds[:, 0:10] *= 0
+
+    data_dep_eps, data_ind_eps = pate.perform_analysis_torch(
+        preds, indices, noise_eps=0.1, delta=1e-5
+    )
+
+    assert data_dep_eps < data_ind_eps
+
+
+def test_torch_ref_match():
+
+    # Verify if the torch implementation values match the original Numpy implementation.
+
+    num_teachers, num_examples, num_labels = (100, 50, 10)
+    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)  # fake preds
+
+    indices = (np.random.rand(num_examples) * num_labels).astype(int)  # true answers
+
+    preds[:, 0:10] *= 0
+
+    data_dep_eps, data_ind_eps = pate.perform_analysis_torch(
+        preds, indices, noise_eps=0.1, delta=1e-5
+    )
+
+    data_dep_eps_ref, data_ind_eps_ref = pate.perform_analysis(
+        preds, indices, noise_eps=0.1, delta=1e-5
+    )
+
+    assert torch.isclose(data_dep_eps, torch.tensor(data_dep_eps_ref.item()))
+    assert torch.isclose(data_ind_eps, torch.tensor(data_ind_eps_ref.item()))