ruff compliance complete

aidotse · Mar 26, 2024 · e8c63bb · e8c63bb
1 parent 17075c8
commit e8c63bb
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 10 deletions.
diff --git a/config/cifar10.yaml b/config/cifar10.yaml
@@ -10,14 +10,14 @@ audit: # Configurations for auditing
   device: cuda:0 # String for indicating on which device we conduct the membership inference attack and train reference models.
   audit_batch_size: 1000 # Integer number for indicating the batch size for computing signals in the Privacy Meter.
   num_shadow_models: 1
-  f_attack_data_size: 0.05
+  f_attack_data_size: 0.3
 
 train: # Configuration for training
   type: pytorch # Training framework (we only support pytorch now).
   model_name: nn # String for indicating the model type. We support CNN, wrn28-1, wrn28-2, wrn28-10, vgg16, nn and speedyresnet (requires cuda). More model types can be added in model.py.
   num_target_model: 1 #Integer number for indicating how many target models we want to audit for the privacy game
   device: cuda:0 # String for indicating the device we want to use for training models, e.g., cpu or cuda.
-  epochs: 5 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs.
+  epochs: 50 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs.
   batch_size: 32 # Integer number for indicating batch size for training the target model. For speedyresnet, it uses its own batch size.
   optimizer: SGD # String which indicates the optimizer. We support Adam and SGD. For speedyresnet, it uses its own optimizer.
   learning_rate: 0.01 # Float number for indicating learning rate for training the target model. For speedyresnet, it uses its own learning_rate.
@@ -30,8 +30,8 @@ train: # Configuration for training
 
 data: # Configuration for data
   dataset: cifar10 # String indicates the name of the dataset (i.e., cifar10, cifar100, purchase100, texas1000)
-  f_train: 0.5 # Float number from 0 to 1 indicating the fraction of the train dataset
-  f_test: 0.5 # Float number from 0 to 1 indicating the fraction of the test dataset
+  f_train: 0.3 # Float number from 0 to 1 indicating the fraction of the train dataset
+  f_test: 0.3 # Float number from 0 to 1 indicating the fraction of the test dataset
   split_method: no_overlapping # String for indicating the methods of splitting the dataset between train, test, and auditing.
   f_audit: 0.3 # Float from 0 to 1, indicating the fraction of the auditing dataset
   data_dir: ./data # String about where to save the data.

diff --git a/leakpro.py b/leakpro.py
@@ -52,8 +52,8 @@ def setup_log(name: str, save_file: bool) -> logging.Logger:
 if __name__ == "__main__":
 
     RETRAIN = True
-    args = "./config/adult.yaml"  # noqa: ERA001
-    #args = "./config/cifar10.yaml" # noqa: ERA001
+    #args = "./config/adult.yaml"  # noqa: ERA001
+    args = "./config/cifar10.yaml" # noqa: ERA001
     with open(args, "rb") as f:
         configs = yaml.safe_load(f)
 

diff --git a/leakpro/dataset.py b/leakpro/dataset.py
@@ -212,7 +212,7 @@ def get_dataset_subset(dataset: Dataset, indices: List[int]) -> Dataset:
     if max(indices) >= len(dataset) or min(indices) < 0:
         raise ValueError("Index out of range")
 
-    data = dataset.X
+    data = dataset.x
     targets = dataset.y
     transforms = dataset.transforms
     subset_data = [data[idx] for idx in indices]

diff --git a/leakpro/mia_attacks/attacks/rmia.py b/leakpro/mia_attacks/attacks/rmia.py
@@ -71,19 +71,30 @@ def prepare_attack(self:Self) -> None:
 
         Signals are computed on the auxiliary model(s) and dataset.
         """
+        # sample dataset to compute histogram
+        all_index = np.arange(self.population_size)
+        attack_data_size = np.round(
+            self.f_attack_data_size * self.population_size
+        ).astype(int)
+
+        self.attack_data_index = np.random.choice(
+            all_index, attack_data_size, replace=False
+        )
+        attack_data = get_dataset_subset(self.population, self.attack_data_index)
+
         # compute the ratio of p(z|theta) (target model) to p(z)=sum_{theta'} p(z|theta') (shadow models)
         # for all points in the attack dataset output from signal: # models x # data points x # classes
 
         # get the true label indices
-        z_label_indices = np.array(self.attack_data.y)
+        z_label_indices = np.array(attack_data.y)
 
         # run points through real model to collect the logits
-        logits_theta = np.array(self.signal([self.target_model], self.attack_data))
+        logits_theta = np.array(self.signal([self.target_model], attack_data))
         # collect the softmax output of the correct class
         p_z_given_theta = self.softmax(logits_theta, z_label_indices)
 
         # run points through shadow models and collect the logits
-        logits_shadow_models = self.signal(self.shadow_models, self.attack_data)
+        logits_shadow_models = self.signal(self.shadow_models, attack_data)
         # collect the softmax output of the correct class for each shadow model
         p_z_given_shadow_models = [self.softmax(np.array(x).reshape(1,*x.shape), z_label_indices) for x in logits_shadow_models]
         # stack the softmax output of the correct class for each shadow model to dimension # models x # data points

diff --git a/test_adult/models_metadata.pkl b/test_adult/models_metadata.pkl