6
6
from skimage .util import random_noise
7
7
8
8
9
+ def apply_noise (t , noise_type , poisoned_ratio ):
10
+ """
11
+ Applies noise to a tensor based on the specified noise type and poisoning ratio.
12
+
13
+ Args:
14
+ t (torch.Tensor): The input tensor to which noise will be applied.
15
+ noise_type (str): The type of noise to apply. Supported types are:
16
+ - "salt": Salt noise (binary salt-and-pepper noise with only 'salt').
17
+ - "gaussian": Gaussian noise with mean 0 and specified variance.
18
+ - "s&p": Salt-and-pepper noise.
19
+ - "nlp_rawdata": Applies a custom NLP raw data poisoning function.
20
+ poisoned_ratio (float): The ratio or variance of noise to be applied, depending on the noise type.
21
+
22
+ Returns:
23
+ torch.Tensor: The tensor with noise applied. If the noise type is not supported,
24
+ returns the original tensor with an error message printed.
25
+
26
+ Raises:
27
+ ValueError: If the specified noise_type is not supported.
28
+
29
+ Notes:
30
+ - The "nlp_rawdata" noise type requires the custom `poison_to_nlp_rawdata` function.
31
+ - Noise for types "salt", "gaussian", and "s&p" is generated using `random_noise` from
32
+ the `skimage.util` package, and returned as a `torch.Tensor`.
33
+ """
34
+ if noise_type == "salt" :
35
+ return torch .tensor (random_noise (t , mode = noise_type , amount = poisoned_ratio ))
36
+ elif noise_type == "gaussian" :
37
+ return torch .tensor (random_noise (t , mode = noise_type , mean = 0 , var = poisoned_ratio , clip = True ))
38
+ elif noise_type == "s&p" :
39
+ return torch .tensor (random_noise (t , mode = noise_type , amount = poisoned_ratio ))
40
+ elif noise_type == "nlp_rawdata" :
41
+ return poison_to_nlp_rawdata (t , poisoned_ratio )
42
+ else :
43
+ print ("ERROR: poison attack type not supported." )
44
+ return t
45
+
46
+
9
47
def datapoison (
10
48
dataset ,
11
49
indices ,
12
- poisoned_persent ,
50
+ poisoned_percent ,
13
51
poisoned_ratio ,
14
52
targeted = False ,
15
53
target_label = 3 ,
16
54
noise_type = "salt" ,
17
55
):
18
56
"""
19
- Function to add random noise of various types to the dataset.
57
+ Adds noise to a specified portion of a dataset for data poisoning purposes.
58
+
59
+ This function applies noise to randomly selected samples within a dataset.
60
+ Noise can be targeted or non-targeted. In non-targeted poisoning, random samples
61
+ are chosen and altered using the specified noise type and ratio. In targeted poisoning,
62
+ only samples with a specified label are altered by adding an 'X' pattern.
63
+
64
+ Args:
65
+ dataset (Dataset): The dataset to poison, expected to have `.data` and `.targets` attributes.
66
+ indices (list of int): The list of indices in the dataset to consider for poisoning.
67
+ poisoned_percent (float): The percentage of `indices` to poison, as a fraction (0 <= poisoned_percent <= 1).
68
+ poisoned_ratio (float): The intensity or probability parameter for the noise, depending on the noise type.
69
+ targeted (bool, optional): If True, applies targeted poisoning by adding an 'X' only to samples with `target_label`.
70
+ Default is False.
71
+ target_label (int, optional): The label to target when `targeted` is True. Default is 3.
72
+ noise_type (str, optional): The type of noise to apply in non-targeted poisoning. Supported types are:
73
+ - "salt": Applies salt noise.
74
+ - "gaussian": Applies Gaussian noise.
75
+ - "s&p": Applies salt-and-pepper noise.
76
+ Default is "salt".
77
+
78
+ Returns:
79
+ Dataset: A deep copy of the original dataset with poisoned data in `.data`.
80
+
81
+ Raises:
82
+ ValueError: If `poisoned_percent` is not between 0 and 1, or if `noise_type` is unsupported.
83
+
84
+ Notes:
85
+ - Non-targeted poisoning randomly selects samples from `indices` based on `poisoned_percent`.
86
+ - Targeted poisoning modifies only samples with `target_label` by adding an 'X' pattern, regardless of `poisoned_ratio`.
20
87
"""
21
88
new_dataset = copy .deepcopy (dataset )
22
89
train_data = new_dataset .data
23
90
targets = new_dataset .targets
24
91
num_indices = len (indices )
25
- if type (noise_type ) != str :
92
+ if not isinstance (noise_type , str ) :
26
93
noise_type = noise_type [0 ]
27
94
28
- if targeted == False :
29
- num_poisoned = int (poisoned_persent * num_indices )
95
+ if not targeted :
96
+ num_poisoned = int (poisoned_percent * num_indices )
30
97
if num_indices == 0 :
31
98
return new_dataset
32
99
if num_poisoned > num_indices :
@@ -35,21 +102,7 @@ def datapoison(
35
102
36
103
for i in poisoned_indice :
37
104
t = train_data [i ]
38
- if noise_type == "salt" :
39
- # Replaces random pixels with 1.
40
- poisoned = torch .tensor (random_noise (t , mode = noise_type , amount = poisoned_ratio ))
41
- elif noise_type == "gaussian" :
42
- # Gaussian-distributed additive noise.
43
- poisoned = torch .tensor (random_noise (t , mode = noise_type , mean = 0 , var = poisoned_ratio , clip = True ))
44
- elif noise_type == "s&p" :
45
- # Replaces random pixels with either 1 or low_val, where low_val is 0 for unsigned images or -1 for signed images.
46
- poisoned = torch .tensor (random_noise (t , mode = noise_type , amount = poisoned_ratio ))
47
- elif noise_type == "nlp_rawdata" :
48
- # for NLP data, change the word vector to 0 with p=poisoned_ratio
49
- poisoned = poison_to_nlp_rawdata (t , poisoned_ratio )
50
- else :
51
- print ("ERROR: poison attack type not supported." )
52
- poisoned = t
105
+ poisoned = apply_noise (t , noise_type , poisoned_ratio )
53
106
train_data [i ] = poisoned
54
107
else :
55
108
for i in indices :
@@ -63,7 +116,20 @@ def datapoison(
63
116
64
117
def add_x_to_image (img ):
65
118
"""
66
- Add a 10*10 pixels X at the top-left of an image
119
+ Adds a 10x10 pixel 'X' mark to the top-left corner of an image.
120
+
121
+ This function modifies the input image by setting specific pixels in the
122
+ top-left 10x10 region to a high intensity value, forming an 'X' shape.
123
+ Pixels on or below the main diagonal and above the secondary diagonal
124
+ are set to 255 (white).
125
+
126
+ Args:
127
+ img (array-like): A 2D array or image tensor representing pixel values.
128
+ It is expected to be in grayscale, where each pixel
129
+ has a single intensity value.
130
+
131
+ Returns:
132
+ torch.Tensor: A tensor representation of the modified image with the 'X' mark.
67
133
"""
68
134
for i in range (0 , 10 ):
69
135
for j in range (0 , 10 ):
@@ -74,7 +140,29 @@ def add_x_to_image(img):
74
140
75
141
def poison_to_nlp_rawdata (text_data , poisoned_ratio ):
76
142
"""
77
- for NLP data, change the word vector to 0 with p=poisoned_ratio
143
+ Poisons NLP data by setting word vectors to zero with a given probability.
144
+
145
+ This function randomly selects a portion of non-zero word vectors in the
146
+ input text data and sets them to zero vectors based on the specified
147
+ poisoning ratio. This simulates a form of data corruption by partially
148
+ nullifying the information in the input data.
149
+
150
+ Args:
151
+ text_data (list of torch.Tensor): A list where each entry is a tensor
152
+ representing a word vector. Non-zero vectors are assumed to represent valid words.
153
+ poisoned_ratio (float): The fraction of non-zero word vectors to set to zero,
154
+ where 0 <= poisoned_ratio <= 1.
155
+
156
+ Returns:
157
+ list of torch.Tensor: The modified text data with some word vectors set to zero.
158
+
159
+ Raises:
160
+ ValueError: If `poisoned_ratio` is greater than 1 or less than 0.
161
+
162
+ Notes:
163
+ - `poisoned_ratio` controls the percentage of non-zero vectors to poison.
164
+ - If `num_poisoned_token` is zero or exceeds the number of non-zero vectors,
165
+ the function returns the original `text_data` without modification.
78
166
"""
79
167
non_zero_vector_indice = [i for i in range (0 , len (text_data )) if text_data [i ][0 ] != 0 ]
80
168
non_zero_vector_len = len (non_zero_vector_indice )
0 commit comments