Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce mosaic augmentation #1007

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
repos:
- repo: https://github.com/psf/black
rev: 20.8b1
rev: 22.6.0
hooks:
- id: black
language_version: python3
- repo: https://gitlab.com/pycqa/flake8
rev: "3.8.3"
rev: "3.9.2"
hooks:
- id: flake8
args: ["--config=setup.cfg"]
Expand Down
3 changes: 3 additions & 0 deletions icevision/data/record_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def new(self, records: Sequence[BaseRecord]):
new._records = IndexableDict([(record.record_id, record) for record in records])
return new

def __add__(self, other):
return self.new([*self._records.values(), *other._records.values()])

def make_splits(self, data_splitter: DataSplitter):
record_id_splits = data_splitter.split(self)
return [
Expand Down
4 changes: 2 additions & 2 deletions icevision/tfms/albumentations/albumentations_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ def apply(self, record):
record.setup_transform(tfm=self)

# TODO: composing every time
tfms = self.create_tfms()
albu_tfms = self.create_tfms()
# apply transform
self._albu_out = tfms(**self._albu_in)
self._albu_out = albu_tfms(**self._albu_in)

# store additional info (might be used by components on `collect`)
height, width, _ = self._albu_out["image"].shape
Expand Down
1 change: 1 addition & 0 deletions icevision/tfms/batch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from icevision.tfms.batch.batch_transform import *
from icevision.tfms.batch.img_pad_stack import *
from icevision.tfms.batch.mosaic import *
133 changes: 133 additions & 0 deletions icevision/tfms/batch/custom_albu_tfms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import cv2
import numpy as np
import random

from albumentations.core.transforms_interface import DualTransform
from albumentations.augmentations.crops import functional as F
from albumentations.augmentations.bbox_utils import union_of_bboxes
from albumentations.augmentations.geometric import functional as FGeometric


class CustomRandomSizedBBoxSafeCrop(DualTransform):
"""Crop a random part of the input and rescale it to some size without loss of bboxes.

Args:
height (int): height after crop and resize.
width (int): width after crop and resize.
erosion_rate (float): erosion rate applied on input image height before crop.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
max_ar_distortion: maximum difference between crop area aspect ratio and mosaic tile aspect ratio, value 0 means
that the boxes have exactly the same aspect ratio, suggested range (0.1, 0.5)
num_tries: maximum number of tries in crop area selection to meet the aspect ratio distortion criteria
p (float): probability of applying the transform. Default: 1.

Targets:
image, mask, bboxes

Image types:
uint8, float32
"""

def __init__(
self,
height,
width,
erosion_rate=0.0,
interpolation=cv2.INTER_LINEAR,
max_ar_distortion=0.2,
num_tries=10,
always_apply=False,
p=1.0,
):
super().__init__(always_apply, p)
self.height = height
self.width = width
self.max_ar_distortion = max_ar_distortion
self.num_tries = num_tries
self.interpolation = interpolation
self.erosion_rate = erosion_rate

def apply(
self,
img,
crop_height=0,
crop_width=0,
h_start=0,
w_start=0,
interpolation=cv2.INTER_LINEAR,
**params
):
crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
return FGeometric.resize(crop, self.height, self.width, interpolation)

def apply_to_bbox(
self,
bbox,
crop_height=0,
crop_width=0,
h_start=0,
w_start=0,
rows=0,
cols=0,
**params
):
return F.bbox_random_crop(
bbox, crop_height, crop_width, h_start, w_start, rows, cols
)

def get_params_dependent_on_targets(self, params):
img_h, img_w = params["image"].shape[:2]
if (
len(params["bboxes"]) == 0
): # less likely, this class is for use with bboxes.
erosive_h = int(img_h * (1.0 - self.erosion_rate))
crop_height = (
img_h if erosive_h >= img_h else random.randint(erosive_h, img_h)
)
return {
"h_start": random.random(),
"w_start": random.random(),
"crop_height": crop_height,
"crop_width": int(crop_height * img_w / img_h),
}
bboxes = params["bboxes"]
# n_boxes = np.random.randint(len(bboxes))
# bboxes = [bboxes[n_boxes]]
# get union of all bboxes
x, y, x2, y2 = union_of_bboxes(
width=img_w,
height=img_h,
bboxes=bboxes,
erosion_rate=self.erosion_rate,
)
# find bigger region until aspect ratio distortion criteria is met or max tries reached
target_aspect_ratio = self.width / self.height
for _ in range(self.num_tries):
bx, by = x * random.random(), y * random.random()
bx2, by2 = (
x2 + (1 - x2) * random.random(),
y2 + (1 - y2) * random.random(),
)
bw, bh = bx2 - bx, by2 - by
crop_height = img_h if bh >= 1.0 else int(img_h * bh)
crop_width = img_w if bw >= 1.0 else int(img_w * bw)
crop_aspect_ratio = crop_width / crop_height
if abs(crop_aspect_ratio - target_aspect_ratio) < self.max_ar_distortion:
break
h_start = np.clip(0.0 if bh >= 1.0 else by / (1.0 - bh), 0.0, 1.0)
w_start = np.clip(0.0 if bw >= 1.0 else bx / (1.0 - bw), 0.0, 1.0)
return {
"h_start": h_start,
"w_start": w_start,
"crop_height": crop_height,
"crop_width": crop_width,
}

@property
def targets_as_params(self):
return ["image", "bboxes"]

def get_transform_init_args_names(self):
return ("height", "width", "erosion_rate", "interpolation")
109 changes: 109 additions & 0 deletions icevision/tfms/batch/mosaic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from icevision import tfms
from icevision.imports import *
from icevision.core import *
from icevision.tfms.batch.batch_transform import BatchTransform
from icevision.tfms.batch.custom_albu_tfms import (
CustomRandomSizedBBoxSafeCrop,
)


class Mosaic(BatchTransform):
def __init__(
self,
n_imgs=4,
bbox_safe=True,
p=0.3,
erosion_rate=0.0,
max_ar_distortion: float = 0.3,
num_tries: int = 10,
):
self.n_imgs = n_imgs
self.bbox_safe = bbox_safe
self.p = p
self.max_ar_distortion = max_ar_distortion
self.num_tries = num_tries
self.erosion_rate = erosion_rate

def create_tfms(self, main_record: BaseRecord):
positions = [
tfms.A.transforms.PadIfNeeded.PositionType.TOP_LEFT,
tfms.A.transforms.PadIfNeeded.PositionType.TOP_RIGHT,
tfms.A.transforms.PadIfNeeded.PositionType.BOTTOM_LEFT,
tfms.A.transforms.PadIfNeeded.PositionType.BOTTOM_RIGHT,
]
h = main_record.img_size.height
w = main_record.img_size.width
# we split the canvas to 4 pieces
# random canvas center point (1/3 to 2/3 of image shape to minimize scaling)
rw, rh = (1 + np.random.random_sample(2)) / 3
pw, ph = int(rw * w), int(rh * h)

crop_boundaries = [
[0, 0, pw, ph],
[pw, 0, w, ph],
[0, ph, pw, h],
[pw, ph, w, h],
]

mosaic_tfms = [
tfms.A.Adapter(
[
CustomRandomSizedBBoxSafeCrop(
y_max - y_min,
x_max - x_min,
erosion_rate=self.erosion_rate,
num_tries=self.num_tries,
max_ar_distortion=self.max_ar_distortion,
)
if self.bbox_safe
else tfms.A.Crop(x_min, y_min, x_max, y_max),
tfms.A.PadIfNeeded(h, w, position=position, border_mode=0),
]
)
for (x_min, y_min, x_max, y_max), position in zip(
crop_boundaries, positions
)
]

return mosaic_tfms

def apply(self, records: List[BaseRecord]) -> List[BaseRecord]:
transformed_records = []
for i, current_record in enumerate(records):
other_records = records[:i] + records[i + 1 :]

n = self.n_imgs - 1 # n of images to draw
mosaic_records = np.random.choice(other_records, size=n, replace=False)

# cannot edit record.img directly cause then we end up with nested mosaics
transformed_records.append(self.make_mosaic(current_record, mosaic_records))

return transformed_records

def make_mosaic(
self, main_record: BaseRecord, mosaic_records: List[BaseRecord]
) -> BaseRecord:
do_apply = np.random.random_sample() <= self.p
if not do_apply:
return main_record

mosaic_tfms = self.create_tfms(main_record)
main_record_copy = deepcopy(main_record)
canvas = np.zeros_like(main_record.img)
labels = []
bboxes = []
# apply crops and padding

for record, tfm in zip([main_record, *mosaic_records], mosaic_tfms):
record_copy = deepcopy(record)
record_copy = tfm(record_copy)
canvas += record_copy.img

labels.extend(record_copy.detection.labels)
bboxes.extend(record_copy.detection.bboxes)

# compile transformed mosaic record
main_record_copy.set_img(canvas)
main_record_copy.detection.set_bboxes(bboxes)
main_record_copy.detection.set_labels(labels)
return main_record_copy
Loading