From cdd16639a21adba5656798a953062367f5879d96 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jan 2025 12:17:48 -0800 Subject: [PATCH] Ability to specify extra features available to trained attacks. PiperOrigin-RevId: 715896851 --- .../data_structures.py | 8 ++++ .../membership_inference_attack/models.py | 42 +++++++++++++------ .../models_test.py | 10 +++++ 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py index 3eba2f87..e07d343f 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py @@ -281,6 +281,14 @@ class AttackInputData: entropy_train: Optional[np.ndarray] = None entropy_test: Optional[np.ndarray] = None + # Extra features for the training and test sets. This may include metadata, + # additional losses, model outputs that are available to the adversary. + # + # These features will be used (in addition to logits/probabilities, losses) + # for trained attacks. + extra_features_train: Optional[np.ndarray] = None + extra_features_test: Optional[np.ndarray] = None + # If loss is not explicitly specified, this function will be used to derive # loss from logits and labels. It can be a pre-defined `LossFunction` or its # string representation, or a callable. diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py index cf45cd5d..da3c7c98 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py @@ -69,10 +69,16 @@ def create_attacker_data(attack_input_data: data_structures.AttackInputData, Returns: AttackerData. """ - attack_input_train = _column_stack(attack_input_data.logits_or_probs_train, - attack_input_data.get_loss_train()) - attack_input_test = _column_stack(attack_input_data.logits_or_probs_test, - attack_input_data.get_loss_test()) + attack_input_train = _column_stack( + attack_input_data.logits_or_probs_train, + attack_input_data.get_loss_train(), + attack_input_data.extra_features_train, + ) + attack_input_test = _column_stack( + attack_input_data.logits_or_probs_test, + attack_input_data.get_loss_test(), + attack_input_data.extra_features_test, + ) ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0] features_all = np.concatenate((attack_input_train, attack_input_test)) @@ -118,22 +124,32 @@ def _sample_multidimensional_array(array, size): return array[indices] -def _column_stack(logits, loss): - """Stacks logits and losses. +def _column_stack(logits, loss, extra_features): + """Stacks logits, losses, and extra features. - In case that only one exists, returns that one. Args: logits: logits array loss: loss array + extra_features: extra features array Returns: - stacked logits and losses (or only one if both do not exist). + stacked logits, losses, and extra features (of any that exists). """ - if logits is None: - return np.expand_dims(loss, axis=-1) - if loss is None: - return logits - return np.column_stack((logits, loss)) + columns = [] + if logits is not None: + columns.append(logits) + if loss is not None: + if len(loss.shape) == 1: + loss = np.expand_dims(loss, axis=-1) + columns.append(loss) + if extra_features is not None: + columns.append(extra_features) + + if not columns: + raise ValueError('logits, loss, and extra_features cannot all be None.') + if len(columns) == 1: + return columns[0] + return np.column_stack(columns) class TrainedAttacker(object): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py index 032864b4..02741b3b 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py @@ -68,6 +68,16 @@ def test_multilabel_create_attacker_data_loss_and_logits(self): attack_input.is_multilabel_data(), msg='Expected multilabel check to pass.') + def test_create_attacker_data_loss_and_extra_features(self): + attack_input = AttackInputData( + loss_train=np.array([1, 3]), + loss_test=np.array([2, 4]), + extra_features_train=np.array([[2, 3], [4, 5]]), + extra_features_test=np.array([[3, 4], [5, 6]]), + ) + attacker_data = models.create_attacker_data(attack_input, 2) + self.assertSequenceEqual(attacker_data.features_all.shape, [4, 3]) + def test_multilabel_create_attacker_data_logits_labels_sample_weights(self): attack_input = AttackInputData( logits_train=np.array([[1, 2], [5, 6], [8, 9]]),