-
Notifications
You must be signed in to change notification settings - Fork 3
/
affine_transforms.py
451 lines (375 loc) · 16.3 KB
/
affine_transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
"""
Affine transforms implemented on torch tensors, and
only requiring one interpolation
Included:
- Affine()
- AffineCompose()
- Rotation()
- Translation()
- Shear()
- Zoom()
- Flip()
"""
import math
import random
import torch
# necessary now, but should eventually not be
import scipy.ndimage as ndi
import numpy as np
def transform_matrix_offset_center(matrix, x, y):
"""Apply offset to a transform matrix so that the image is
transformed about the center of the image.
NOTE: This is a fairly simple operaion, so can easily be
moved to full torch.
Arguments
---------
matrix : 3x3 matrix/array
x : integer
height dimension of image to be transformed
y : integer
width dimension of image to be transformed
"""
o_x = float(x) / 2 + 0.5
o_y = float(y) / 2 + 0.5
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
return transform_matrix
def apply_transform(x, transform, fill_mode='nearest', fill_value=0.):
"""Applies an affine transform to a 2D array, or to each channel of a 3D array.
NOTE: this can and certainly should be moved to full torch operations.
Arguments
---------
x : np.ndarray
array to transform. NOTE: array should be ordered CHW
transform : 3x3 affine transform matrix
matrix to apply
"""
x = x.astype('float32')
transform = transform_matrix_offset_center(transform, x.shape[1], x.shape[2])
final_affine_matrix = transform[:2, :2]
final_offset = transform[:2, 2]
channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
final_offset, order=0, mode=fill_mode, cval=fill_value) for x_channel in x]
x = np.stack(channel_images, axis=0)
return x
class Affine(object):
def __init__(self,
rotation_range=None,
translation_range=None,
shear_range=None,
zoom_range=None,
fill_mode='constant',
fill_value=0.,
target_fill_mode='nearest',
target_fill_value=0.):
"""Perform an affine transforms with various sub-transforms, using
only one interpolation and without having to instantiate each
sub-transform individually.
Arguments
---------
rotation_range : one integer or float
image will be rotated between (-degrees, degrees) degrees
translation_range : a float or a tuple/list w/ 2 floats between [0, 1)
first value:
image will be horizontally shifted between
(-height_range * height_dimension, height_range * height_dimension)
second value:
Image will be vertically shifted between
(-width_range * width_dimension, width_range * width_dimension)
shear_range : float
radian bounds on the shear transform
zoom_range : list/tuple with two floats between [0, infinity).
first float should be less than the second
lower and upper bounds on percent zoom.
Anything less than 1.0 will zoom in on the image,
anything greater than 1.0 will zoom out on the image.
e.g. (0.7, 1.0) will only zoom in,
(1.0, 1.4) will only zoom out,
(0.7, 1.4) will randomly zoom in or out
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
ProTip : use 'nearest' for discrete images (e.g. segmentations)
and use 'constant' for continuous images
fill_value : float
the value to fill the empty space with if fill_mode='constant'
target_fill_mode : same as fill_mode, but for target image
target_fill_value : same as fill_value, but for target image
"""
self.transforms = []
if rotation_range:
rotation_tform = Rotation(rotation_range, lazy=True)
self.transforms.append(rotation_tform)
if translation_range:
translation_tform = Translation(translation_range, lazy=True)
self.transforms.append(translation_tform)
if shear_range:
shear_tform = Shear(shear_range, lazy=True)
self.transforms.append(shear_tform)
if zoom_range:
zoom_tform = Translation(zoom_range, lazy=True)
self.transforms.append(zoom_tform)
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
def __call__(self, x, y=None):
# collect all of the lazily returned tform matrices
tform_matrix = self.transforms[0](x)
for tform in self.transforms[1:]:
tform_matrix = np.dot(tform_matrix, tform(x))
x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix,
fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x, y
else:
return x
class AffineCompose(object):
def __init__(self,
transforms,
fill_mode='constant',
fill_value=0.,
target_fill_mode='nearest',
target_fill_value=0.):
"""Apply a collection of explicit affine transforms to an input image,
and to a target image if necessary
Arguments
---------
transforms : list or tuple
each element in the list/tuple should be an affine transform.
currently supported transforms:
- Rotation()
- Translation()
- Shear()
- Zoom()
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
fill_value : float
the value to fill the empty space with if fill_mode='constant'
"""
self.transforms = transforms
# set transforms to lazy so they only return the tform matrix
for t in self.transforms:
t.lazy = True
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
def __call__(self, x, y=None):
# collect all of the lazily returned tform matrices
tform_matrix = self.transforms[0](x)
for tform in self.transforms[1:]:
tform_matrix = np.dot(tform_matrix, tform(x))
x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix,
fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x, y
else:
return x
class Rotation(object):
def __init__(self,
rotation_range,
fill_mode='constant',
fill_value=0.,
target_fill_mode='nearest',
target_fill_value=0.,
lazy=False):
"""Randomly rotate an image between (-degrees, degrees). If the image
has multiple channels, the same rotation will be applied to each channel.
Arguments
---------
rotation_range : integer or float
image will be rotated between (-degrees, degrees) degrees
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
fill_value : float
the value to fill the empty space with if fill_mode='constant'
lazy : boolean
if true, perform the transform on the tensor and return the tensor
if false, only create the affine transform matrix and return that
"""
self.rotation_range = rotation_range
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
self.lazy = lazy
def __call__(self, x, y=None):
degree = random.uniform(-self.rotation_range, self.rotation_range)
theta = math.pi / 180 * degree
rotation_matrix = np.array([[math.cos(theta), -math.sin(theta), 0],
[math.sin(theta), math.cos(theta), 0],
[0, 0, 1]])
if self.lazy:
return rotation_matrix
else:
x_transformed = torch.from_numpy(apply_transform(x.numpy(), rotation_matrix,
fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y_transformed = torch.from_numpy(apply_transform(y.numpy(), rotation_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x_transformed, y_transformed
else:
return x_transformed
class Translation(object):
def __init__(self,
translation_range,
fill_mode='constant',
fill_value=0.,
target_fill_mode='nearest',
target_fill_value=0.,
lazy=False):
"""Randomly translate an image some fraction of total height and/or
some fraction of total width. If the image has multiple channels,
the same translation will be applied to each channel.
Arguments
---------
translation_range : two floats between [0, 1)
first value:
fractional bounds of total height to shift image
image will be horizontally shifted between
(-height_range * height_dimension, height_range * height_dimension)
second value:
fractional bounds of total width to shift image
Image will be vertically shifted between
(-width_range * width_dimension, width_range * width_dimension)
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
fill_value : float
the value to fill the empty space with if fill_mode='constant'
lazy : boolean
if true, perform the transform on the tensor and return the tensor
if false, only create the affine transform matrix and return that
"""
if isinstance(translation_range, float):
translation_range = (translation_range, translation_range)
self.height_range = translation_range[0]
self.width_range = translation_range[1]
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
self.lazy = lazy
def __call__(self, x, y=None):
# height shift
if self.height_range > 0:
tx = random.uniform(-self.height_range, self.height_range) * x.size(1)
else:
tx = 0
# width shift
if self.width_range > 0:
ty = random.uniform(-self.width_range, self.width_range) * x.size(2)
else:
ty = 0
translation_matrix = np.array([[1, 0, tx],
[0, 1, ty],
[0, 0, 1]])
if self.lazy:
return translation_matrix
else:
x_transformed = torch.from_numpy(apply_transform(x.numpy(),
translation_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y_transformed = torch.from_numpy(apply_transform(y.numpy(), translation_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x_transformed, y_transformed
else:
return x_transformed
class Shear(object):
def __init__(self,
shear_range,
fill_mode='constant',
fill_value=0.,
target_fill_mode='nearest',
target_fill_value=0.,
lazy=False):
"""Randomly shear an image with radians (-shear_range, shear_range)
Arguments
---------
shear_range : float
radian bounds on the shear transform
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
fill_value : float
the value to fill the empty space with if fill_mode='constant'
lazy : boolean
if true, perform the transform on the tensor and return the tensor
if false, only create the affine transform matrix and return that
"""
self.shear_range = shear_range
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
self.lazy = lazy
def __call__(self, x, y=None):
shear = random.uniform(-self.shear_range, self.shear_range)
shear_matrix = np.array([[1, -math.sin(shear), 0],
[0, math.cos(shear), 0],
[0, 0, 1]])
if self.lazy:
return shear_matrix
else:
x_transformed = torch.from_numpy(apply_transform(x.numpy(),
shear_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y_transformed = torch.from_numpy(apply_transform(y.numpy(), shear_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x_transformed, y_transformed
else:
return x_transformed
class Zoom(object):
def __init__(self,
zoom_range,
fill_mode='constant',
fill_value=0,
target_fill_mode='nearest',
target_fill_value=0.,
lazy=False):
"""Randomly zoom in and/or out on an image
Arguments
---------
zoom_range : tuple or list with 2 values, both between (0, infinity)
lower and upper bounds on percent zoom.
Anything less than 1.0 will zoom in on the image,
anything greater than 1.0 will zoom out on the image.
e.g. (0.7, 1.0) will only zoom in,
(1.0, 1.4) will only zoom out,
(0.7, 1.4) will randomly zoom in or out
fill_mode : string in {'constant', 'nearest'}
how to fill the empty space caused by the transform
fill_value : float
the value to fill the empty space with if fill_mode='constant'
lazy : boolean
if true, perform the transform on the tensor and return the tensor
if false, only create the affine transform matrix and return that
"""
if not isinstance(zoom_range, list) and not isinstance(zoom_range, tuple):
raise ValueError('zoom_range must be tuple or list with 2 values')
self.zoom_range = zoom_range
self.fill_mode = fill_mode
self.fill_value = fill_value
self.target_fill_mode = target_fill_mode
self.target_fill_value = target_fill_value
self.lazy = lazy
def __call__(self, x, y=None):
zx = random.uniform(self.zoom_range[0], self.zoom_range[1])
zy = random.uniform(self.zoom_range[0], self.zoom_range[1])
zoom_matrix = np.array([[zx, 0, 0],
[0, zy, 0],
[0, 0, 1]])
if self.lazy:
return zoom_matrix
else:
x_transformed = torch.from_numpy(apply_transform(x.numpy(),
zoom_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
if y:
y_transformed = torch.from_numpy(apply_transform(y.numpy(), zoom_matrix,
fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
return x_transformed, y_transformed
else:
return x_transformed