Skip to content

Commit

Permalink
ruff compliance complete
Browse files Browse the repository at this point in the history
  • Loading branch information
johanos1 committed Mar 26, 2024
1 parent 17075c8 commit e8c63bb
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 10 deletions.
8 changes: 4 additions & 4 deletions config/cifar10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ audit: # Configurations for auditing
device: cuda:0 # String for indicating on which device we conduct the membership inference attack and train reference models.
audit_batch_size: 1000 # Integer number for indicating the batch size for computing signals in the Privacy Meter.
num_shadow_models: 1
f_attack_data_size: 0.05
f_attack_data_size: 0.3

train: # Configuration for training
type: pytorch # Training framework (we only support pytorch now).
model_name: nn # String for indicating the model type. We support CNN, wrn28-1, wrn28-2, wrn28-10, vgg16, nn and speedyresnet (requires cuda). More model types can be added in model.py.
num_target_model: 1 #Integer number for indicating how many target models we want to audit for the privacy game
device: cuda:0 # String for indicating the device we want to use for training models, e.g., cpu or cuda.
epochs: 5 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs.
epochs: 50 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs.
batch_size: 32 # Integer number for indicating batch size for training the target model. For speedyresnet, it uses its own batch size.
optimizer: SGD # String which indicates the optimizer. We support Adam and SGD. For speedyresnet, it uses its own optimizer.
learning_rate: 0.01 # Float number for indicating learning rate for training the target model. For speedyresnet, it uses its own learning_rate.
Expand All @@ -30,8 +30,8 @@ train: # Configuration for training

data: # Configuration for data
dataset: cifar10 # String indicates the name of the dataset (i.e., cifar10, cifar100, purchase100, texas1000)
f_train: 0.5 # Float number from 0 to 1 indicating the fraction of the train dataset
f_test: 0.5 # Float number from 0 to 1 indicating the fraction of the test dataset
f_train: 0.3 # Float number from 0 to 1 indicating the fraction of the train dataset
f_test: 0.3 # Float number from 0 to 1 indicating the fraction of the test dataset
split_method: no_overlapping # String for indicating the methods of splitting the dataset between train, test, and auditing.
f_audit: 0.3 # Float from 0 to 1, indicating the fraction of the auditing dataset
data_dir: ./data # String about where to save the data.
Expand Down
4 changes: 2 additions & 2 deletions leakpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def setup_log(name: str, save_file: bool) -> logging.Logger:
if __name__ == "__main__":

RETRAIN = True
args = "./config/adult.yaml" # noqa: ERA001
#args = "./config/cifar10.yaml" # noqa: ERA001
#args = "./config/adult.yaml" # noqa: ERA001
args = "./config/cifar10.yaml" # noqa: ERA001
with open(args, "rb") as f:
configs = yaml.safe_load(f)

Expand Down
2 changes: 1 addition & 1 deletion leakpro/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def get_dataset_subset(dataset: Dataset, indices: List[int]) -> Dataset:
if max(indices) >= len(dataset) or min(indices) < 0:
raise ValueError("Index out of range")

data = dataset.X
data = dataset.x
targets = dataset.y
transforms = dataset.transforms
subset_data = [data[idx] for idx in indices]
Expand Down
17 changes: 14 additions & 3 deletions leakpro/mia_attacks/attacks/rmia.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,30 @@ def prepare_attack(self:Self) -> None:
Signals are computed on the auxiliary model(s) and dataset.
"""
# sample dataset to compute histogram
all_index = np.arange(self.population_size)
attack_data_size = np.round(
self.f_attack_data_size * self.population_size
).astype(int)

self.attack_data_index = np.random.choice(
all_index, attack_data_size, replace=False
)
attack_data = get_dataset_subset(self.population, self.attack_data_index)

# compute the ratio of p(z|theta) (target model) to p(z)=sum_{theta'} p(z|theta') (shadow models)
# for all points in the attack dataset output from signal: # models x # data points x # classes

# get the true label indices
z_label_indices = np.array(self.attack_data.y)
z_label_indices = np.array(attack_data.y)

# run points through real model to collect the logits
logits_theta = np.array(self.signal([self.target_model], self.attack_data))
logits_theta = np.array(self.signal([self.target_model], attack_data))
# collect the softmax output of the correct class
p_z_given_theta = self.softmax(logits_theta, z_label_indices)

# run points through shadow models and collect the logits
logits_shadow_models = self.signal(self.shadow_models, self.attack_data)
logits_shadow_models = self.signal(self.shadow_models, attack_data)
# collect the softmax output of the correct class for each shadow model
p_z_given_shadow_models = [self.softmax(np.array(x).reshape(1,*x.shape), z_label_indices) for x in logits_shadow_models]
# stack the softmax output of the correct class for each shadow model to dimension # models x # data points
Expand Down
Binary file modified test_adult/models_metadata.pkl
Binary file not shown.

0 comments on commit e8c63bb

Please sign in to comment.