airctic · potipot · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 22.6.0
     hooks:
       - id: black
         language_version: python3
   - repo: https://gitlab.com/pycqa/flake8
-    rev: "3.8.3"
+    rev: "3.9.2"
     hooks:
       - id: flake8
         args: ["--config=setup.cfg"]

diff --git a/icevision/data/record_collection.py b/icevision/data/record_collection.py
@@ -27,6 +27,9 @@ def new(self, records: Sequence[BaseRecord]):
         new._records = IndexableDict([(record.record_id, record) for record in records])
         return new
 
+    def __add__(self, other):
+        return self.new([*self._records.values(), *other._records.values()])
+
     def make_splits(self, data_splitter: DataSplitter):
         record_id_splits = data_splitter.split(self)
         return [

diff --git a/icevision/tfms/albumentations/albumentations_adapter.py b/icevision/tfms/albumentations/albumentations_adapter.py
@@ -277,9 +277,9 @@ def apply(self, record):
         record.setup_transform(tfm=self)
 
         # TODO: composing every time
-        tfms = self.create_tfms()
+        albu_tfms = self.create_tfms()
         # apply transform
-        self._albu_out = tfms(**self._albu_in)
+        self._albu_out = albu_tfms(**self._albu_in)
 
         # store additional info (might be used by components on `collect`)
         height, width, _ = self._albu_out["image"].shape

diff --git a/icevision/tfms/batch/__init__.py b/icevision/tfms/batch/__init__.py
@@ -1,2 +1,3 @@
 from icevision.tfms.batch.batch_transform import *
 from icevision.tfms.batch.img_pad_stack import *
+from icevision.tfms.batch.mosaic import *
diff --git a/icevision/tfms/batch/custom_albu_tfms.py b/icevision/tfms/batch/custom_albu_tfms.py
@@ -0,0 +1,133 @@
+import cv2
+import numpy as np
+import random
+
+from albumentations.core.transforms_interface import DualTransform
+from albumentations.augmentations.crops import functional as F
+from albumentations.augmentations.bbox_utils import union_of_bboxes
+from albumentations.augmentations.geometric import functional as FGeometric
+
+
+class CustomRandomSizedBBoxSafeCrop(DualTransform):
+    """Crop a random part of the input and rescale it to some size without loss of bboxes.
+
+    Args:
+        height (int): height after crop and resize.
+        width (int): width after crop and resize.
+        erosion_rate (float): erosion rate applied on input image height before crop.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        max_ar_distortion: maximum difference between crop area aspect ratio and mosaic tile aspect ratio, value 0 means
+         that the boxes have exactly the same aspect ratio, suggested range (0.1, 0.5)
+        num_tries: maximum number of tries in crop area selection to meet the aspect ratio distortion criteria
+        p (float): probability of applying the transform. Default: 1.
+
+    Targets:
+        image, mask, bboxes
+
+    Image types:
+        uint8, float32
+    """
+
+    def __init__(
+        self,
+        height,
+        width,
+        erosion_rate=0.0,
+        interpolation=cv2.INTER_LINEAR,
+        max_ar_distortion=0.2,
+        num_tries=10,
+        always_apply=False,
+        p=1.0,
+    ):
+        super().__init__(always_apply, p)
+        self.height = height
+        self.width = width
+        self.max_ar_distortion = max_ar_distortion
+        self.num_tries = num_tries
+        self.interpolation = interpolation
+        self.erosion_rate = erosion_rate
+
+    def apply(
+        self,
+        img,
+        crop_height=0,
+        crop_width=0,
+        h_start=0,
+        w_start=0,
+        interpolation=cv2.INTER_LINEAR,
+        **params
+    ):
+        crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
+        return FGeometric.resize(crop, self.height, self.width, interpolation)
+
+    def apply_to_bbox(
+        self,
+        bbox,
+        crop_height=0,
+        crop_width=0,
+        h_start=0,
+        w_start=0,
+        rows=0,
+        cols=0,
+        **params
+    ):
+        return F.bbox_random_crop(
+            bbox, crop_height, crop_width, h_start, w_start, rows, cols
+        )
+
+    def get_params_dependent_on_targets(self, params):
+        img_h, img_w = params["image"].shape[:2]
+        if (
+            len(params["bboxes"]) == 0
+        ):  # less likely, this class is for use with bboxes.
+            erosive_h = int(img_h * (1.0 - self.erosion_rate))
+            crop_height = (
+                img_h if erosive_h >= img_h else random.randint(erosive_h, img_h)
+            )
+            return {
+                "h_start": random.random(),
+                "w_start": random.random(),
+                "crop_height": crop_height,
+                "crop_width": int(crop_height * img_w / img_h),
+            }
+        bboxes = params["bboxes"]
+        # n_boxes = np.random.randint(len(bboxes))
+        # bboxes = [bboxes[n_boxes]]
+        # get union of all bboxes
+        x, y, x2, y2 = union_of_bboxes(
+            width=img_w,
+            height=img_h,
+            bboxes=bboxes,
+            erosion_rate=self.erosion_rate,
+        )
+        # find bigger region until aspect ratio distortion criteria is met or max tries reached
+        target_aspect_ratio = self.width / self.height
+        for _ in range(self.num_tries):
+            bx, by = x * random.random(), y * random.random()
+            bx2, by2 = (
+                x2 + (1 - x2) * random.random(),
+                y2 + (1 - y2) * random.random(),
+            )
+            bw, bh = bx2 - bx, by2 - by
+            crop_height = img_h if bh >= 1.0 else int(img_h * bh)
+            crop_width = img_w if bw >= 1.0 else int(img_w * bw)
+            crop_aspect_ratio = crop_width / crop_height
+            if abs(crop_aspect_ratio - target_aspect_ratio) < self.max_ar_distortion:
+                break
+        h_start = np.clip(0.0 if bh >= 1.0 else by / (1.0 - bh), 0.0, 1.0)
+        w_start = np.clip(0.0 if bw >= 1.0 else bx / (1.0 - bw), 0.0, 1.0)
+        return {
+            "h_start": h_start,
+            "w_start": w_start,
+            "crop_height": crop_height,
+            "crop_width": crop_width,
+        }
+
+    @property
+    def targets_as_params(self):
+        return ["image", "bboxes"]
+
+    def get_transform_init_args_names(self):
+        return ("height", "width", "erosion_rate", "interpolation")
diff --git a/icevision/tfms/batch/mosaic.py b/icevision/tfms/batch/mosaic.py
@@ -0,0 +1,109 @@
+from icevision import tfms
+from icevision.imports import *
+from icevision.core import *
+from icevision.tfms.batch.batch_transform import BatchTransform
+from icevision.tfms.batch.custom_albu_tfms import (
+    CustomRandomSizedBBoxSafeCrop,
+)
+
+
+class Mosaic(BatchTransform):
+    def __init__(
+        self,
+        n_imgs=4,
+        bbox_safe=True,
+        p=0.3,
+        erosion_rate=0.0,
+        max_ar_distortion: float = 0.3,
+        num_tries: int = 10,
+    ):
+        self.n_imgs = n_imgs
+        self.bbox_safe = bbox_safe
+        self.p = p
+        self.max_ar_distortion = max_ar_distortion
+        self.num_tries = num_tries
+        self.erosion_rate = erosion_rate
+
+    def create_tfms(self, main_record: BaseRecord):
+        positions = [
+            tfms.A.transforms.PadIfNeeded.PositionType.TOP_LEFT,
+            tfms.A.transforms.PadIfNeeded.PositionType.TOP_RIGHT,
+            tfms.A.transforms.PadIfNeeded.PositionType.BOTTOM_LEFT,
+            tfms.A.transforms.PadIfNeeded.PositionType.BOTTOM_RIGHT,
+        ]
+        h = main_record.img_size.height
+        w = main_record.img_size.width
+        # we split the canvas to 4 pieces
+        # random canvas center point (1/3 to 2/3 of image shape to minimize scaling)
+        rw, rh = (1 + np.random.random_sample(2)) / 3
+        pw, ph = int(rw * w), int(rh * h)
+
+        crop_boundaries = [
+            [0, 0, pw, ph],
+            [pw, 0, w, ph],
+            [0, ph, pw, h],
+            [pw, ph, w, h],
+        ]
+
+        mosaic_tfms = [
+            tfms.A.Adapter(
+                [
+                    CustomRandomSizedBBoxSafeCrop(
+                        y_max - y_min,
+                        x_max - x_min,
+                        erosion_rate=self.erosion_rate,
+                        num_tries=self.num_tries,
+                        max_ar_distortion=self.max_ar_distortion,
+                    )
+                    if self.bbox_safe
+                    else tfms.A.Crop(x_min, y_min, x_max, y_max),
+                    tfms.A.PadIfNeeded(h, w, position=position, border_mode=0),
+                ]
+            )
+            for (x_min, y_min, x_max, y_max), position in zip(
+                crop_boundaries, positions
+            )
+        ]
+
+        return mosaic_tfms
+
+    def apply(self, records: List[BaseRecord]) -> List[BaseRecord]:
+        transformed_records = []
+        for i, current_record in enumerate(records):
+            other_records = records[:i] + records[i + 1 :]
+
+            n = self.n_imgs - 1  # n of images to draw
+            mosaic_records = np.random.choice(other_records, size=n, replace=False)
+
+            # cannot edit record.img directly cause then we end up with nested mosaics
+            transformed_records.append(self.make_mosaic(current_record, mosaic_records))
+
+        return transformed_records
+
+    def make_mosaic(
+        self, main_record: BaseRecord, mosaic_records: List[BaseRecord]
+    ) -> BaseRecord:
+        do_apply = np.random.random_sample() <= self.p
+        if not do_apply:
+            return main_record
+
+        mosaic_tfms = self.create_tfms(main_record)
+        main_record_copy = deepcopy(main_record)
+        canvas = np.zeros_like(main_record.img)
+        labels = []
+        bboxes = []
+        # apply crops and padding
+
+        for record, tfm in zip([main_record, *mosaic_records], mosaic_tfms):
+            record_copy = deepcopy(record)
+            record_copy = tfm(record_copy)
+            canvas += record_copy.img
+
+            labels.extend(record_copy.detection.labels)
+            bboxes.extend(record_copy.detection.bboxes)
+
+        # compile transformed mosaic record
+        main_record_copy.set_img(canvas)
+        main_record_copy.detection.set_bboxes(bboxes)
+        main_record_copy.detection.set_labels(labels)
+        return main_record_copy