From 662feb54bc20a8a06769976782ebc2a74c0ad048 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 1 Jun 2021 00:25:59 +0530
Subject: [PATCH 001/122] add classifier heads

---
 .../classification_heads/__init__.py          |   2 +
 .../multitask/classification_heads/builder.py |  22 +++
 .../multitask/classification_heads/head.py    | 185 ++++++++++++++++++
 3 files changed, 209 insertions(+)
 create mode 100644 icevision/models/multitask/classification_heads/__init__.py
 create mode 100644 icevision/models/multitask/classification_heads/builder.py
 create mode 100644 icevision/models/multitask/classification_heads/head.py

diff --git a/icevision/models/multitask/classification_heads/__init__.py b/icevision/models/multitask/classification_heads/__init__.py
new file mode 100644
index 000000000..c82f63d71
--- /dev/null
+++ b/icevision/models/multitask/classification_heads/__init__.py
@@ -0,0 +1,2 @@
+from .builder import *
+from .head import *
diff --git a/icevision/models/multitask/classification_heads/builder.py b/icevision/models/multitask/classification_heads/builder.py
new file mode 100644
index 000000000..ea3767313
--- /dev/null
+++ b/icevision/models/multitask/classification_heads/builder.py
@@ -0,0 +1,22 @@
+from typing import Dict
+from .head import CLASSIFICATION_HEADS
+import torch.nn as nn
+
+__all__ = ["build_classifier_heads"]
+
+# Enter dict of dicts as `cfg`
+def build_classifier_heads(cfg: Dict[str, Dict[str, dict]]):
+    """
+    Build classification head from a config which is
+    a dict of dicts. A head is created for each key in the
+    input dictionary
+
+    Returns a `nn.ModuleDict()` mapping keys from `cfg` to
+    classifier heads
+    """
+    heads = nn.ModuleDict()
+    # if cfg is not None:
+    for name, config in cfg.items():
+        head = CLASSIFICATION_HEADS.build(config)
+        heads.update({name: head})
+    return heads
diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
new file mode 100644
index 000000000..0fcba1c83
--- /dev/null
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -0,0 +1,185 @@
+# Hacked together by Rahul & Farid
+
+from mmcv.cnn import MODELS as MMCV_MODELS
+from mmcv.utils import Registry
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from typing import List, Union, Optional, Dict
+from torch import Tensor
+from functools import partial
+from collections import namedtuple
+from dataclasses import dataclass
+
+TensorList = List[Tensor]
+TensorDict = Dict[str, Tensor]
+
+MODELS = Registry("models", parent=MMCV_MODELS)
+CLASSIFICATION_HEADS = MODELS
+
+__all__ = ["ImageClassificationHead"]
+
+
+class Passthrough(nn.Module):
+    def forward(self, x):
+        return x
+
+
+# NOTE: We aren't using `ClassifierConfig` anymore, and are sticking to `mmdet`'s
+# regular python `dict` convention
+@dataclass
+class ClassifierConfig:
+    # classifier_name: str
+    out_classes: int
+    fpn_keys: Union[List[str], List[int], None] = None
+    num_fpn_features: int = 512
+    dropout: Optional[float] = 0.2
+    # Loss function args
+    loss_func: Optional[nn.Module] = None
+    activation: Optional[nn.Module] = None
+    multilabel: bool = False
+    loss_func_wts: Optional[Tensor] = None
+    # Post activation processing
+    thresh: Optional[float] = None
+    topk: Optional[int] = None
+
+    def __post_init__(self):
+        if self.multilabel:
+            if self.topk is None and self.thresh is None:
+                self.thresh = 0.5
+        else:
+            if self.topk is None and self.thresh is None:
+                self.topk = 1
+
+
+@CLASSIFICATION_HEADS.register_module(name="ImageClassificationHead")
+class ImageClassificationHead(nn.Module):
+    """
+    Image classification head that optionally takes `fpn_keys` features from
+    an FPN, average pools and concatenates them into a single tensor
+    of shape `num_features` and then runs a linear layer to `out_classes
+
+    fpn_features: [List[Tensor]] => AvgPool => Flatten => Linear`
+
+    Also includes `compute_loss` to match the design of other
+    components of object detection systems.
+    To use your own loss function, pass it into `loss_func`.
+    If `loss_func` is None (by default), we create one based on other args:
+    If `multilabel` is true, one-hot encoded targets are expected and
+    nn.BCEWithLogitsLoss is used, else nn.CrossEntropyLoss is used
+    and targets are expected to be integers
+    NOTE: Not all loss function args are exposed
+    """
+
+    def __init__(
+        self,
+        out_classes: int,
+        num_fpn_features: int,
+        fpn_keys: Union[List[str], List[int], None] = None,
+        dropout: Optional[float] = 0.2,
+        # Loss function args
+        loss_func: Optional[nn.Module] = None,
+        activation: Optional[nn.Module] = None,
+        multilabel: bool = False,
+        loss_func_wts: Optional[Tensor] = None,
+        # Final postprocessing args
+        thresh: Optional[float] = None,
+        topk: Optional[int] = None,
+    ):
+        super().__init__()
+
+        # Setup loss function & activation
+        self.multilabel, self.loss_func, self.loss_func_wts, self.activation = (
+            multilabel,
+            loss_func,
+            loss_func_wts,
+            activation,
+        )
+        self.thresh, self.topk = thresh, topk
+
+        # Setup head
+        self.fpn_keys = fpn_keys
+        self.classifier = nn.Sequential(
+            nn.Flatten(1),
+            nn.Dropout(dropout) if dropout else Passthrough(),
+            nn.Linear(num_fpn_features, out_classes),
+        )
+        self.setup_loss_function()
+        self.setup_postprocessing()
+
+    def setup_postprocessing(self):
+        if self.multilabel:
+            if self.topk is None and self.thresh is None:
+                self.thresh = 0.5
+        else:
+            if self.topk is None and self.thresh is None:
+                self.topk = 1
+
+    def setup_loss_function(self):
+        if self.loss_func is None:
+            if self.multilabel:
+                # self.loss_func = nn.BCEWithLogitsLoss(self.loss_func_wts)
+                self.loss_func = partial(
+                    F.binary_cross_entropy_with_logits, pos_weight=self.loss_func_wts
+                )
+                self.activation = torch.sigmoid  # nn.Sigmoid()
+            else:
+                # self.loss_func = nn.CrossEntropyLoss(self.loss_func_wts)
+                self.loss_func = partial(F.cross_entropy, weight=self.loss_func_wts)
+                self.activation = partial(F.softmax, dim=-1)  # nn.Softmax(-1)
+
+    @classmethod
+    def from_config(cls, config: ClassifierConfig):
+        return cls(**config.__dict__)
+
+    # TODO: Make it run with regular features as well
+    def forward(self, features: Union[Tensor, TensorDict, TensorList]):
+        """
+        Sequence of outputs from an FPN or regular feature extractor
+        => Avg. Pool each into 1 dimension
+        => Concatenate into single tensor
+        => Linear layer -> output classes
+
+        If `self.fpn_keys` is specified, it grabs the specific (int|str) indices from
+        `features` for the pooling layer, else it takes _all_ of them
+        """
+        if isinstance(features, (list, dict, tuple)):
+            # Grab specific features if specified
+            if self.fpn_keys is not None:
+                pooled_features = [
+                    F.adaptive_avg_pool2d(features[k], 1) for k in self.fpn_keys
+                ]
+            # If no `fpn_keys` exist, concat all the feature maps (could be expensive)
+            else:
+                pooled_features = [F.adaptive_avg_pool2d(feat, 1) for feat in features]
+            pooled_features = torch.cat(pooled_features, dim=1)
+
+        # If doing regular (non-FPN) feature extraction, we don't need `fpn_keys` and
+        # just avg. pool the last layer's features
+        elif isinstance(features, Tensor):
+            pooled_features = F.adaptive_avg_pool2d(features, 1)
+        else:
+            raise TypeError(
+                f"Expected TensorList|TensorDict|Tensor|tuple, got {type(features)}"
+            )
+
+        return self.classifier(pooled_features)
+
+    # TorchVision style API
+    def compute_loss(self, predictions, targets):
+        return self.loss_func(predictions, targets)
+
+    def postprocess(self, predictions):
+        return self.activation(predictions)
+
+    # MMDet style API
+    def forward_train(self, x, gt_label) -> Tensor:
+        preds = self(x)
+        return self.loss_func(preds, gt_label)
+
+    def forward_activate(self, x):
+        "Run forward pass with activation function"
+        x = self(x)
+        return self.activation(x)

From a6048cb0abbe24891a2a70f9d99d3ef27b7b4641 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 1 Jun 2021 00:26:41 +0530
Subject: [PATCH 002/122] add multi augmentation dataset

---
 icevision/models/multitask/data/__init__.py |   0
 icevision/models/multitask/data/dataset.py  | 133 ++++++++++++++++++++
 icevision/models/multitask/data/dtypes.py   |  18 +++
 3 files changed, 151 insertions(+)
 create mode 100644 icevision/models/multitask/data/__init__.py
 create mode 100644 icevision/models/multitask/data/dataset.py
 create mode 100644 icevision/models/multitask/data/dtypes.py

diff --git a/icevision/models/multitask/data/__init__.py b/icevision/models/multitask/data/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
new file mode 100644
index 000000000..a17078556
--- /dev/null
+++ b/icevision/models/multitask/data/dataset.py
@@ -0,0 +1,133 @@
+from icevision.imports import *
+from icevision.core import *
+from icevision.core.tasks import Task
+from torch.utils.data import Dataset
+from icevision.data.dataset import Dataset as RecordDataset
+from icevision.utils.utils import normalize, flatten
+
+import icevision.tfms as tfms
+import torchvision.transforms as Tfms
+
+__all__ = ["HybridAugmentationsRecordDataset", "RecordDataset"]
+
+
+class HybridAugmentationsRecordDataset(Dataset):
+    """
+    Dataset that stores records internally and dynamically attaches an `img` component
+    to each task when being fetched
+
+    Arguments:
+        * records: A list of records.
+        * classification_transforms_groups <Dict[str, Dict[str, Union[Tfms.Compose, List[str]]]] : a dict
+            that creates groups of tasks, where each task receives the same transforms and gets a dedicated
+            forward pass in the network. For example:
+                dict(
+                    tasks=["shot_framing", "color_tones"],
+                    transforms=Tfms.Compose([Tfms.Resize(224), Tfms.ToTensor()])
+                )
+        * detection_transforms <tfms.A.Adapter> - Icevision albumentations adapter for detection transforms
+        * norm_mean <List[float]> : norm mean stats
+        * norm_std <List[float]> : norm stdev stats
+        * debug <bool> : If true, prints info & unnormalised `PIL.Image`s are returned on fetching items
+    """
+
+    def __init__(
+        self,
+        records: List[dict],
+        classification_transforms_groups: dict,
+        detection_transforms: Optional[tfms.Transform] = None,
+        norm_mean: Collection[float] = [0.485, 0.456, 0.406],
+        norm_std: Collection[float] = [0.229, 0.224, 0.225],
+        debug: bool = False,
+    ):
+        "Return `PIL.Image` when `debug=True`"
+        self.records = records
+        self.classification_transforms_groups = classification_transforms_groups
+        self.detection_transforms = detection_transforms
+        self.norm_mean = norm_mean
+        self.norm_std = norm_std
+        self.debug = debug
+        self.validate()
+
+    def validate(self):
+        """
+        Input args validation
+        * Ensure that each value in the `classification_transforms_groups` dict
+          has a "tasks" and "transforms" key
+        * Ensure the number of tasks mentioned in `classification_transforms_groups`
+          match up _exactly_ with the tasks in the record
+        """
+        for group in self.classification_transforms_groups.values():
+            assert set(group.keys()).issuperset(
+                ["tasks", "transforms"]
+            ), f"Invalid keys in `classification_transforms_groups`"
+
+        missing_tasks = []
+        for attr in flatten(
+            [g["tasks"] for g in self.classification_transforms_groups.values()]
+        ):
+            if not hasattr(self.records[0], attr):
+                missing_tasks += [attr]
+        if not missing_tasks == []:
+            raise ValueError(
+                f"`classification_transforms_groups` has more groups than are present in the `record`. \n"
+                f"Missing the following tasks: {missing_tasks}"
+            )
+
+    def __len__(self):
+        return len(self.records)
+
+    def __getitem__(self, i):
+        record = self.records[i].load()
+
+        # Keep a copy of the orig img as it gets modified by albu
+        original_img = deepcopy(record.img)
+        if isinstance(original_img, np.ndarray):
+            original_img = PIL.Image.fromarray(original_img)
+
+        # Do detection transform and assign it to the detection task
+        if self.detection_transforms is not None:
+            record = self.detection_transforms(record)
+
+        record.add_component(ImageRecordComponent(Task("detection")))
+        record.detection.set_img(record.img)
+
+        if self.debug:
+            print(f"Fetching Item #{i}")
+
+        # Do classification transforms
+        for group in self.classification_transforms_groups.values():
+            img_tfms = group["transforms"]
+            tfmd_img = img_tfms(original_img)
+            if self.debug:
+                print(f"  Group: {group['tasks']}, ID: {id(tfmd_img)}")
+
+            # NOTE:
+            # * We need to add the img component dynamically here to
+            #   play nice with the albumentations adapter 🤬
+            # * Setting the same img twice (to diff parts in memory),
+            #   but it's ok cuz we will unload the record in DataLoader
+            for task in group["tasks"]:
+                record.add_component(ImageRecordComponent(Task(task)))
+                getattr(record, task).set_img(tfmd_img)
+                if self.debug:
+                    print(f"   - Task: {task}, ID: {id(tfmd_img)}")
+
+        # This is a bit verbose, but allows us to return PIL images for easy debugging.
+        # Else, it returns normalized numpy arrays, like usual icevision datasets
+        for comp in record.components:
+            if isinstance(comp, ImageRecordComponent):
+                # Convert to `np.ndarray` if it isn't already
+                if isinstance(comp.img, PIL.Image.Image):
+                    comp.set_img(np.array(comp.img))
+                if self.debug:  # for debugging only
+                    comp.set_img(PIL.Image.fromarray(comp.img))
+                else:
+                    comp.set_img(
+                        normalize(comp.img, mean=self.norm_mean, std=self.norm_std)
+                    )
+
+        return record
+
+    def __repr__(self):
+        return f"<{self.__class__.__name__} with {len(self.records)} items and {len(self.group_tfms)+1} groups>"
diff --git a/icevision/models/multitask/data/dtypes.py b/icevision/models/multitask/data/dtypes.py
new file mode 100644
index 000000000..ecb25432d
--- /dev/null
+++ b/icevision/models/multitask/data/dtypes.py
@@ -0,0 +1,18 @@
+from typing import Dict, List, Tuple, Union
+from torch import Tensor
+import numpy as np
+import torch
+
+__all__ = ["ImgMetadataDict", "TensorList", "TensorDict", "ArrayList", "ArrayDict"]
+
+ImgMetadataDict = Dict[str, Union[Tuple[int], np.ndarray]]
+TensorList = List[Tensor]
+TensorDict = Dict[str, Tensor]
+ArrayList = List[np.ndarray]
+ArrayDict = Dict[str, np.ndarray]
+
+ClassificationGroupDataDict = Dict[str, Union[List[str], Tensor, TensorDict]]
+DataDictClassification = Dict[str, ClassificationGroupDataDict]
+DataDictDetection = Union[
+    TensorDict, ArrayDict, Dict[str, Union[Tuple[int], ImgMetadataDict]]
+]

From 22f78466457e33a429137bfd25be0660bd0012d0 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 1 Jun 2021 00:27:35 +0530
Subject: [PATCH 003/122] add `flatten` utility

---
 icevision/utils/utils.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/icevision/utils/utils.py b/icevision/utils/utils.py
index b37e80ac0..bffb4f64a 100644
--- a/icevision/utils/utils.py
+++ b/icevision/utils/utils.py
@@ -16,10 +16,14 @@
     "denormalize_imagenet",
     "denormalize_mask",
     "patch_class_to_main",
+    "flatten",
+    # "Dictionary",
 ]
 
 from icevision.imports import *
 
+# from addict import Dict as _Dict
+
 
 def notnone(x):
     return x is not None
@@ -109,3 +113,18 @@ def patch_class_to_main(cls):
     setattr(__main__, cls.__name__, cls)
     cls.__module__ = "__main__"
     return cls
+
+
+def flatten(x: Any) -> List[Any]:
+    flattened_list = []
+    for item in x:
+        if isinstance(item, (tuple, list, np.ndarray, pd.Series)):
+            [flattened_list.append(i) for i in item]
+        else:
+            flattened_list.append(item)
+    return flattened_list
+
+
+# class Dictionary(_Dict):
+#     def __missing__(self, key):
+#         raise KeyError(key)

From 62754728522d5ba494f96e2a1955dfe00eeb6bdc Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 1 Jun 2021 00:28:43 +0530
Subject: [PATCH 004/122] hybrid single stage detector, dataloader, prediction

---
 icevision/models/multitask/__init__.py        |   0
 icevision/models/multitask/mmdet/__init__.py  |   0
 .../models/multitask/mmdet/dataloader.py      | 194 +++++++++++
 .../models/multitask/mmdet/prediction.py      | 258 ++++++++++++++
 .../models/multitask/mmdet/single_stage.py    | 329 ++++++++++++++++++
 5 files changed, 781 insertions(+)
 create mode 100644 icevision/models/multitask/__init__.py
 create mode 100644 icevision/models/multitask/mmdet/__init__.py
 create mode 100644 icevision/models/multitask/mmdet/dataloader.py
 create mode 100644 icevision/models/multitask/mmdet/prediction.py
 create mode 100644 icevision/models/multitask/mmdet/single_stage.py

diff --git a/icevision/models/multitask/__init__.py b/icevision/models/multitask/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/mmdet/__init__.py b/icevision/models/multitask/mmdet/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/mmdet/dataloader.py b/icevision/models/multitask/mmdet/dataloader.py
new file mode 100644
index 000000000..486d07e03
--- /dev/null
+++ b/icevision/models/multitask/mmdet/dataloader.py
@@ -0,0 +1,194 @@
+# from icevision.all import *
+from icevision.core import *
+from icevision.models.multitask.data.dtypes import *
+from icevision.models.mmdet.common.utils import convert_background_from_zero_to_last
+from icevision.models.utils import unload_records
+from icevision.models.mmdet.common.bbox.dataloaders import (
+    _img_tensor,
+    _img_meta,
+    _labels,
+    _bboxes,
+)
+from collections import defaultdict
+
+
+def unload_records(build_batch, **build_batch_kwargs):
+    """
+    This decorator function unloads records to not carry them around after batch creation
+      and will also accept any additional args required by the `build_batch`` function
+    """
+
+    def inner(records):
+        tupled_output, records = build_batch(records, **build_batch_kwargs)
+        for record in records:
+            record.unload()
+        return tupled_output, records
+
+    return inner
+
+
+def build_multi_aug_batch(
+    records: Sequence[RecordType], classification_transform_groups: dict
+) -> Tuple[
+    Dict[str, Union[DataDictClassification, DataDictDetection]], Sequence[RecordType]
+]:
+    """
+    Docs:
+        Take as inputs `records` and `classification_transform_groups` and return
+        a tuple of dictionaries, one for detection data and the other for classification.
+
+        Each `record` is expected to have a specific structure. For example:
+
+            BaseRecord
+
+            common:
+                - Image ID: 4
+                - Filepath: sample_image.png
+                - Image: 640x640x3 <np.ndarray> Image
+                - Image size ImgSize(width=640, height=640)
+            color_saturation:
+                - Image: 640x640x3 <np.ndarray> Image
+                - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
+                - Labels: [1]
+            shot_composition:
+                - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
+                - Labels: [1]
+                - Image: 640x640x3 <np.ndarray> Image
+            detection:
+                - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
+                - Image: 640x640x3 <np.ndarray> Image
+                - Class Map: <ClassMap: {'background': 0, 'person': 1}>
+                - Labels: [1]
+            shot_framing:
+                - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
+                - Labels: [3]
+                - Image: 640x640x3 <np.ndarray> Image
+
+        `classification_transform_groups` describes how to group classification data. For example:
+            {
+                "group1": dict(tasks=["shot_composition"]),
+                "group2": dict(tasks=["color_saturation", "shot_framing"])
+            }
+
+
+        And return a nested data dictionary - (`detection_data`, `classification_data`) and
+        the loaded records
+        {
+            `detection_data`:
+                {
+                    "detection": dict(
+                        images: Tensor = ...,
+                        img_metas: Dict[
+                            'img_shape': HWC tuple,
+                            'pad_shape': HWC tuple,
+                            'scale_factor': np.ndarray <len=4>
+                        ] = ...,
+                        gt_bboxes: Tensor = ...,
+                        gt_bbox_labels: Tensor = ...,
+                    )
+                }
+
+            `classification_data`:
+                {
+                    "group1": dict(
+                        tasks = ["shot_composition"],
+                        images: Tensor = ...,
+                        gt_classification_labels=dict(
+                            "shot_composition": Tensor = ...,
+                        )
+                    ),
+                    "group2": dict(
+                        tasks = ["color_saturation", "shot_framing"],
+                        images: Tensor = ...,
+                        gt_classification_labels=dict(
+                            "color_saturation": Tensor = ...,
+                            "shot_framing": Tensor = ...,
+                        )
+                    )
+                }
+        }
+    """
+    # NOTE: `detection` is ALWAYS treated as a distinct group
+    det_images, bbox_labels, bboxes, img_metas = [], [], [], []
+    classification_data = defaultdict(lambda: defaultdict(list))
+    classification_labels = defaultdict(list)
+
+    for record in records:
+        # Create detection data
+        det_images.append(_img_tensor(record.detection))
+        img_metas.append(_img_meta(record))
+        bbox_labels.append(_labels(record))
+        bboxes.append(_bboxes(record))
+
+        # Get classification images for each group
+        for key, group in classification_transform_groups.items():
+            task = getattr(record, group["tasks"][0])
+            # assert (record.color_saturation.img == record.shot_framing.img).all()
+
+            classification_data[key]["tasks"] = group["tasks"]
+            classification_data[key]["images"].append(_img_tensor(task))
+
+        # Get classification labels for each group
+        for comp in record.components:
+            name = comp.task.name
+            if isinstance(comp, ClassificationLabelsRecordComponent):
+                if comp.is_multilabel:
+                    classification_labels[name].append(comp.one_hot_encoded())
+                else:
+                    classification_labels[name].extend(comp.label_ids)
+
+    # Massage data
+    for group in classification_data.values():
+        group["classification_labels"] = {
+            task: tensor(classification_labels[task]) for task in group["tasks"]
+        }
+        group["images"] = torch.stack(group["images"])
+    classification_data = {k: dict(v) for k, v in classification_data.items()}
+
+    detection_data = {
+        "img": torch.stack(det_images),
+        "img_metas": img_metas,
+        "gt_bboxes": bboxes,
+        "gt_bbox_labels": bbox_labels,
+    }
+
+    data = dict(detection=detection_data, classification=classification_data)
+    return data, records
+
+
+@unload_records
+def build_single_aug_batch(records: Sequence[RecordType]):
+    """
+    Regular `mmdet` dataloader but with classification added in
+    """
+    images, bbox_labels, bboxes, img_metas = [], [], [], []
+    classification_labels = defaultdict(list)
+
+    for record in records:
+        images.append(_img_tensor(record))
+        img_metas.append(_img_meta(record))
+        bbox_labels.append(_labels(record))
+        bboxes.append(_bboxes(record))
+
+        # Loop through and create classifier dict of inputs
+        for comp in record.components:
+            name = comp.task.name
+            if isinstance(comp, ClassificationLabelsRecordComponent):
+                if comp.is_multilabel:
+                    labels = comp.one_hot_encoded()
+                    classification_labels[name].append(labels)
+                else:
+                    labels = comp.label_ids
+                    classification_labels[name].extend(labels)
+
+    classification_labels = {k: tensor(v) for k, v in classification_labels.items()}
+
+    data = {
+        "img": torch.stack(images),
+        "img_metas": img_metas,
+        "gt_bboxes": bboxes,
+        "gt_bbox_labels": bbox_labels,
+        "gt_classification_labels": classification_labels,
+    }
+
+    return data, records
diff --git a/icevision/models/multitask/mmdet/prediction.py b/icevision/models/multitask/mmdet/prediction.py
new file mode 100644
index 000000000..bc8968570
--- /dev/null
+++ b/icevision/models/multitask/mmdet/prediction.py
@@ -0,0 +1,258 @@
+# Modified from icevision.models.mmdet.common.bbox.prediction
+
+from icevision.all import *
+from icevision.models.mmdet.common.bbox.prediction import _unpack_raw_bboxes
+
+from ..utils import *
+
+
+__all__ = [
+    "predict",
+    "predict_from_dl",
+    "convert_raw_prediction",
+    "convert_raw_predictions",
+    "finalize_classifier_preds",
+]
+
+from icevision.imports import *
+from icevision.utils import *
+from icevision.core import *
+from icevision.data import *
+from icevision.core.tasks import Task
+from icevision.models.utils import _predict_from_dl
+from icevision.models.mmdet.common.utils import *
+from icevision.models.mmdet.common.bbox.dataloaders import build_infer_batch
+from icevision.models.mmdet.common.utils import convert_background_from_last_to_zero
+
+
+def finalize_classifier_preds(pred, cfg: dict, record: RecordType, task: str) -> tuple:
+    """
+    Analyse preds post-activations based on `cfg` arguments; return the
+    relevant scores and string labels derived from `record`
+
+    Can compute the following:
+        * top-k (`cfg` defaults to 1 for single-label problems)
+        * filter preds by threshold
+    """
+
+    # pred = np.array(pred)
+    pred = pred.detach().cpu().numpy()
+
+    if cfg.topk is not None:
+        index = np.argsort(pred)[-cfg.topk :]  # argsort gives idxs in ascending order
+        value = pred[index]
+
+    elif cfg.thresh is not None:
+        index = np.where(pred > cfg.thresh)[0]  # index into the tuple
+        value = pred[index]
+
+    labels = [getattr(record, task).class_map._id2class[i] for i in index]
+    scores = pred[index].tolist()
+
+    return labels, scores
+
+
+@torch.no_grad()
+def _predict_batch(
+    model: nn.Module,
+    batch: Sequence[torch.Tensor],
+    records: Sequence[BaseRecord],
+    classification_configs: dict,
+    detection_threshold: float = 0.5,
+    keep_images: bool = False,
+    device: Optional[torch.device] = None,
+):
+    device = device or model_device(model)
+    batch["img"] = [img.to(device) for img in batch["img"]]
+
+    raw_preds = model(return_loss=False, rescale=False, **batch)
+    return convert_raw_predictions(
+        batch=batch,
+        raw_preds=raw_preds,
+        records=records,
+        classification_configs=classification_configs,
+        keep_images=keep_images,
+        detection_threshold=detection_threshold,
+    )
+
+
+def predict(
+    model: nn.Module,
+    dataset: Dataset,
+    classification_configs: dict,
+    detection_threshold: float = 0.5,
+    keep_images: bool = False,
+    device: Optional[torch.device] = None,
+) -> List[Prediction]:
+    batch, records = build_infer_batch(dataset)
+
+    return _predict_batch(
+        model=model,
+        batch=batch,
+        records=records,
+        classification_configs=classification_configs,
+        detection_threshold=detection_threshold,
+        keep_images=keep_images,
+        device=device,
+    )
+
+
+@torch.no_grad()
+def _predict_from_dl(
+    predict_fn,
+    model: nn.Module,
+    infer_dl: DataLoader,
+    keep_images: bool = False,
+    show_pbar: bool = True,
+    **predict_kwargs,
+) -> List[Prediction]:
+    all_preds = []
+    for batch, records in pbar(infer_dl, show=show_pbar):
+        preds = predict_fn(
+            model=model,
+            batch=batch,
+            records=records,
+            keep_images=keep_images,
+            **predict_kwargs,
+        )
+        all_preds.extend(preds)
+
+    return all_preds
+
+
+def predict_from_dl(
+    model: nn.Module,
+    infer_dl: DataLoader,
+    # classification_configs: dict,
+    show_pbar: bool = True,
+    keep_images: bool = False,
+    **predict_kwargs,
+):
+    _predict_batch_fn = partial(_predict_batch, keep_images=keep_images)
+    # FIXME `classification_configs` needs to be passed in as **predict_kwargs
+    return _predict_from_dl(
+        predict_fn=_predict_batch_fn,
+        model=model,
+        # classification_configs=classification_configs,
+        infer_dl=infer_dl,
+        show_pbar=show_pbar,
+        keep_images=keep_images,
+        **predict_kwargs,
+    )
+
+
+def convert_raw_predictions(
+    batch,
+    raw_preds,
+    records: Sequence[BaseRecord],
+    classification_configs: dict,
+    detection_threshold: float,
+    keep_images: bool = False,
+):
+
+    # In inference, both "img" and "img_metas" are lists. Check out the `build_infer_batch()` definition
+    # We need to convert that to a batch similar to train and valid batches
+    if isinstance(batch["img"], list):
+        batch = {
+            "img": batch["img"][0],
+            "img_metas": batch["img_metas"][0],
+        }
+    bbox_preds, classification_preds = (
+        raw_preds["bbox_results"],
+        raw_preds["classification_results"],
+    )
+
+    # Convert dicts of sequences into a form that we can iterate over in a for loop
+    # A test / infer dataloader will not have "gt_classification_labels" as a key
+    if "gt_classification_labels" in batch:
+        gt_classification_labels = [
+            dict(zip(batch["gt_classification_labels"], t))
+            for t in zipsafe(*batch["gt_classification_labels"].values())
+        ]
+        batch["gt_classification_labels"] = gt_classification_labels
+    classification_preds = [
+        dict(zip(classification_preds, t))
+        for t in zipsafe(*classification_preds.values())
+    ]
+    batch_list = [dict(zip(batch, t)) for t in zipsafe(*batch.values())]
+
+    return [
+        convert_raw_prediction(
+            sample=sample,
+            raw_bbox_pred=bbox_pred,
+            raw_classification_pred=classification_pred,
+            classification_configs=classification_configs,
+            record=record,
+            detection_threshold=detection_threshold,
+            keep_image=keep_images,
+        )
+        for sample, bbox_pred, classification_pred, record in zip(
+            batch_list, bbox_preds, classification_preds, records
+        )
+    ]
+
+
+def convert_raw_prediction(
+    sample,
+    raw_bbox_pred: dict,
+    raw_classification_pred: TensorDict,
+    classification_configs: dict,
+    record: BaseRecord,
+    detection_threshold: float,
+    keep_image: bool = False,
+):
+    # convert predictions
+    raw_bboxes = raw_bbox_pred
+    scores, labels, bboxes = _unpack_raw_bboxes(raw_bboxes)
+
+    keep_mask = scores > detection_threshold
+    keep_scores = scores[keep_mask]
+    keep_labels = labels[keep_mask]
+    keep_bboxes = [BBox.from_xyxy(*o) for o in bboxes[keep_mask]]
+
+    keep_labels = convert_background_from_last_to_zero(
+        label_ids=keep_labels, class_map=record.detection.class_map
+    )
+
+    pred = BaseRecord(
+        [
+            FilepathRecordComponent(),
+            ScoresRecordComponent(),
+            ImageRecordComponent(),
+            InstancesLabelsRecordComponent(),
+            BBoxesRecordComponent(),
+            *[ScoresRecordComponent(Task(task)) for task in classification_configs],
+            *[
+                ClassificationLabelsRecordComponent(
+                    Task(task), is_multilabel=cfg.multilabel
+                )
+                for task, cfg in classification_configs.items()
+            ],
+        ]
+    )
+    pred.detection.set_class_map(record.detection.class_map)
+    pred.detection.set_scores(keep_scores)
+    pred.detection.set_labels_by_id(keep_labels)
+    pred.detection.set_bboxes(keep_bboxes)
+    pred.above_threshold = keep_mask
+
+    # TODO: Refactor classification loop into `common`
+    for task, classification_pred in raw_classification_pred.items():
+        labels, scores = finalize_classifier_preds(
+            pred=classification_pred,
+            cfg=classification_configs[task],
+            record=record,
+            task=task,
+        )
+        pred.set_filepath(record.filepath)
+        getattr(pred, task).set_class_map(getattr(record, task).class_map)
+        getattr(pred, task).set_scores(scores)
+        getattr(pred, task).set_labels(labels)
+
+    if keep_image:
+        image = mmdet_tensor_to_image(sample["img"])
+
+        pred.set_img(image)
+        record.set_img(image)
+
+    return Prediction(pred=pred, ground_truth=record)
diff --git a/icevision/models/multitask/mmdet/single_stage.py b/icevision/models/multitask/mmdet/single_stage.py
new file mode 100644
index 000000000..677f7ef45
--- /dev/null
+++ b/icevision/models/multitask/mmdet/single_stage.py
@@ -0,0 +1,329 @@
+from typing import Dict, List
+from collections import OrderedDict
+from icevision.models.multitask.data.dtypes import *
+from icevision.models.multitask.classification_heads import *
+
+
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+from torch import Tensor
+
+from icevision.models.mmdet.utils import *
+from mmcv import Config, ConfigDict
+from mmdet.models.builder import DETECTORS
+from mmdet.models.builder import build_backbone, build_detector, build_head, build_neck
+from mmdet.models.detectors.single_stage import SingleStageDetector
+from mmdet.core.bbox import *
+from typing import Union, List, Dict, Tuple
+
+from ...data.dataloader import (
+    TensorDict,
+    ClassificationGroupDataDict,
+    DataDictClassification,
+    DataDictDetection,
+)
+import numpy as np
+from enum import Enum
+
+
+__all__ = [
+    "ForwardType",
+    "HybridSingleStageDetector",
+    "build_backbone",
+    "build_detector",
+    "build_head",
+    "build_neck",
+]
+
+
+class ForwardType(Enum):
+    TRAIN_MULTI_AUG = 1
+    TRAIN = 2
+    EVAL = 3
+
+
+@DETECTORS.register_module(name="HybridSingleStageDetector")
+class HybridSingleStageDetector(SingleStageDetector):
+    # TODO: Add weights for loss functions
+    def __init__(
+        self,
+        backbone: Union[dict, ConfigDict],
+        neck: Union[dict, ConfigDict],
+        bbox_head: Union[dict, ConfigDict],
+        classification_heads: Optional[dict, ConfigDict] = None,
+        # keypoint_heads=None,  # TODO Someday SOON.
+        train_cfg: Optional[dict, ConfigDict] = None,
+        test_cfg: Optional[dict, ConfigDict] = None,
+        pretrained=None,
+        init_cfg: Optional[dict, ConfigDict] = None,
+    ):
+        super(HybridSingleStageDetector, self).__init__(
+            # Use `init_cfg` post mmdet 2.12
+            # backbone, neck, bbox_head, train_cfg, test_cfg, pretrained, init_cfg
+            backbone=ConfigDict(backbone),
+            neck=ConfigDict(neck),
+            bbox_head=ConfigDict(bbox_head),
+            train_cfg=ConfigDict(train_cfg),
+            test_cfg=ConfigDict(test_cfg),
+            pretrained=pretrained,
+            init_cfg=ConfigDict(init_cfg),
+        )
+        if classification_heads is not None:
+            self.classifier_heads = build_classifier_heads(classification_heads)
+
+    def train_step(
+        self,
+        data: dict,
+        step_type: ForwardType = ForwardType.TRAIN,
+    ) -> Dict[str, Union[Tensor, TensorDict, int]]:
+        """
+        A single iteration step (over a batch)
+        Args:
+            data: The output of dataloader. Typically `self.fwd_train_data_keys` or
+                  `self.fwd_eval_data_keys`
+            step_type (Enum): ForwardType.TRAIN | ForwardType.EVAL | ForwardType.TRAIN_MULTI_AUG
+
+        Returns:
+            dict[str, Union[Tensor, TensorDict, int]]
+                * `loss` <Tensor> : summed losses for backprop
+                * `log_vars` <TensorDict> : variables to be logged
+                * `num_samples` <int> : batch size per GPU when using DDP
+        """
+        losses = self(data=data, forward_type=step_type)
+        loss, log_vars = self._parse_losses(losses)
+
+        outputs = dict(
+            loss=loss,
+            log_vars=log_vars,
+            num_samples=len(data["img_metas"])
+            if "img_metas" in data.keys()
+            else len(data["detection"]["img_metas"]),
+        )
+        return outputs
+
+    # @auto_fp16(apply_to=("img",))
+    def forward(self, data: dict, forward_type: ForwardType):
+        """
+        Calls either `self.forward_train`, `self.forward_eval` or
+        `self.forward_multi_aug_train` depending on the value of `forward_type`
+
+        No TTA supported unlike all other mmdet models
+        """
+        if forward_type.value == ForwardType.TRAIN_MULTI_AUG.value:
+            return self.forward_multi_aug_train(data)
+        elif forward_type.value == ForwardType.TRAIN.value:
+            return self.forward_train(data, gt_bboxes_ignore=None)
+        elif forward_type.value == ForwardType.EVAL.value:
+            return self.forward_eval(data, rescale=False)
+        else:
+            raise ValueError(f"{type(ForwardType)}, {type(forward_type)}")
+
+    fwd_multi_aug_train_data_keys = ["detection", "classification"]
+    fwd_train_data_keys = [
+        "img",
+        "gt_bboxes",
+        "gt_bbox_labels",
+        "gt_classification_labels",
+    ]
+    fwd_eval_data_keys = ["img", "img_metas"]
+
+    def forward_multi_aug_train(
+        self,
+        data: Dict[str, Union[DataDictClassification, DataDictDetection]],
+    ) -> Dict[str, Tensor]:
+        """
+        Forward method where multiple views of the same image are passed.
+        The model does a dedicated forward pass for the `detection` images
+          and dedicated forward passes for each `classification` group. See
+          the dataloader docs for more details
+        Args:
+            data <Dict[str, TensorDict]> : a dictionary with two keys -
+              `detection` and `classification`. See the dataloader docs for
+              more details on the exact structure
+
+        Returns:
+            dict[str, Tensor]
+                * `loss_classification`: Dictionary of classification losses where each key
+                                         corresponds to the classification head / task name
+                * `loss_cls`: Bbox classification loss
+                * `loss_bbox`: Bbox regression loss
+        """
+        assert set(data.keys()).issuperset(self.fwd_multi_aug_train_data_keys)
+        # detection_img, img_metas, gt_bboxes, gt_bbox_labels = data["detection"].values()
+        super(SingleStageDetector, self).forward_train(
+            data["detection"]["img"],
+            data["detection"]["img_metas"],
+        )
+        detection_features = self.extract_feat(data["detection"]["img"])
+
+        losses = self.bbox_head.forward_train(
+            x=detection_features,
+            img_metas=data["detection"]["img_metas"],
+            gt_bboxes=data["detection"]["gt_bboxes"],
+            gt_labels=data["detection"]["gt_bbox_labels"],
+            # NOTE we do not return `gt_bboxes_ignore` in the dataloader
+            gt_bboxes_ignore=data["detection"].get("gt_bboxes_ignore", None),
+        )
+
+        # Compute features per _group_, then do a forward pass through each
+        # classification head in that group to compute the loss
+        classification_losses = {}
+        for group, data in data["classification"].items():
+            classification_features = self.extract_feat(data["images"])
+            for task in data["tasks"]:
+                head = self.classifier_heads[task]
+                classification_losses[task] = head.forward_train(
+                    x=classification_features,
+                    gt_label=data["classification_labels"][task],
+                )
+
+        losses["loss_classification"] = classification_losses
+        return losses
+
+    def forward_train(self, data: dict, gt_bboxes_ignore=None) -> Dict[str, Tensor]:
+        """
+        Forward pass
+        Args:
+            img: Normalised input images of shape (N, C, H, W).
+            img_metas: A List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                :class:`mmdet.datasets.pipelines.Collect`.
+            gt_bboxes: List of gt bboxes in `xyxy` format for each image
+            gt_labels: Integer class indices corresponding to each box
+            gt_classification_labels: Dict of ground truths per classification task
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
+                boxes can be ignored when computing the loss.
+
+        Returns:
+            dict[str, Tensor]
+                * `loss_classification`: Dictionary of classification losses where each key
+                                         corresponds to the classification head / task name
+                * `loss_cls`: Bbox classification loss
+                * `loss_bbox`: Bbox regression loss
+        """
+        assert set(data.keys()).issuperset(self.fwd_train_data_keys)
+        super(SingleStageDetector, self).forward_train(data["img"], data["img_metas"])
+        features = self.extract_feat(data["img"])
+        losses = self.bbox_head.forward_train(
+            x=features,
+            img_metas=data["img_metas"],
+            gt_bboxes=data["gt_bboxes"],
+            gt_labels=data["gt_bbox_labels"],
+            gt_bboxes_ignore=gt_bboxes_ignore,
+        )
+
+        classification_losses = {
+            name: head.forward_train(
+                x=features,
+                gt_label=data["gt_classification_labels"][name],
+            )
+            for name, head in self.classifier_heads.items()
+        }
+        losses["loss_classification"] = classification_losses
+        return losses
+
+    # Maintain API
+    # Placeholder in case we want to do TTA during eval?
+    def simple_test(self, *args):
+        return self.forward_eval(*args)
+
+    def forward_eval(
+        self, data: dict, rescale: bool = False
+    ) -> Dict[str, Union[TensorDict, List[np.ndarray]]]:
+        """
+        TODO Update mmdet docstring
+
+        Eval / test function on a single image (without TTA). Returns raw predictions of
+        the model that can be processed in `convert_raw_predictions`
+
+        Args:
+            imgs: List of multiple images
+            img_metas: List of image metadata.
+            rescale: Whether to rescale the results.
+
+        Returns:
+            {
+                "bbox_results": List[ArrayList],
+                "classification_results": TensorDict
+            }
+
+            bbox_results: Nested list of BBox results The outer list corresponds
+                          to each image. The inner list
+                          corresponds to each class.
+            classification_results: Dictionary of activated outputs for each classification head
+        """
+        assert set(data.keys()).issuperset(self.fwd_eval_data_keys)
+        # Raw outputs from network
+        img, img_metas = data["img"], data["img_metas"]
+        features = self.extract_feat(img)
+        bbox_outs = self.bbox_head(features)
+        classification_results = {
+            name: head.forward_activate(features)
+            for name, head in self.classifier_heads.items()
+        }
+
+        # Get original input shape to support onnx dynamic shape
+        if torch.onnx.is_in_onnx_export():
+            # get shape as tensor
+            img_shape = torch._shape_as_tensor(img)[2:]
+            img_metas[0]["img_shape_for_onnx"] = img_shape
+
+        bbox_list = self.bbox_head.get_bboxes(*bbox_outs, img_metas, rescale=rescale)
+
+        # Skip post-processing when exporting to ONNX
+        if torch.onnx.is_in_onnx_export():
+            return bbox_list, classification_results
+
+        bbox_results = [
+            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
+            for det_bboxes, det_labels in bbox_list
+        ]
+        return {
+            "bbox_results": bbox_results,
+            "classification_results": classification_results,
+        }
+
+    def _parse_losses(
+        self, losses: Dict[str, Union[Tensor, TensorDict, TensorList]]
+    ) -> tuple:
+        # TODO: Pass weights into loss
+        # NOTE: This is where you can pass in weights for each loss function
+        r"""Parse the raw outputs (losses) of the network.
+
+        Args:
+            losses (dict): Raw output of the network, coming typically from `self.train_step`
+
+        Returns:
+            tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor \
+                which may be a weighted sum of all losses, log_vars contains \
+                all the variables to be sent to the logger.
+        """
+        log_vars = OrderedDict()
+        for loss_name, loss_value in losses.items():
+            if isinstance(loss_value, torch.Tensor):
+                log_vars[loss_name] = loss_value.mean()
+            elif isinstance(loss_value, list):
+                log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+            elif isinstance(loss_value, dict):
+                # Unroll classification losses returned as a dict
+                for k, v in loss_value.items():
+                    log_vars[f"loss_classification_{k}"] = v
+            else:
+                raise TypeError(
+                    f"{loss_name} is not a tensor or list or dict of tensors"
+                )
+
+        loss = sum(_value for _key, _value in log_vars.items() if "loss" in _key)
+
+        log_vars["loss"] = loss
+        for loss_name, loss_value in log_vars.items():
+            # reduce loss when distributed training
+            if dist.is_available() and dist.is_initialized():
+                loss_value = loss_value.data.clone()
+                dist.all_reduce(loss_value.div_(dist.get_world_size()))
+            log_vars[loss_name] = loss_value.item()
+
+        return loss, log_vars

From ce033d582f0fde2115e35c79d6d1a59502f51c71 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 1 Jun 2021 00:28:48 +0530
Subject: [PATCH 005/122] lightning adapter

---
 .../models/multitask/mmdet/pl_adapter.py      | 158 ++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 icevision/models/multitask/mmdet/pl_adapter.py

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
new file mode 100644
index 000000000..7de005218
--- /dev/null
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -0,0 +1,158 @@
+# Modified from `icevision.models.mmdet.lightning.model_adapter`
+# NOTE `torchmetrics` comes installed with `pytorch-lightning`
+# We could in theory also do `pl.metrics`
+
+import torchmetrics as tm
+from icevision.all import *
+from mmcv.utils import ConfigDict
+from loguru import logger
+from icevision.models.multitask.mmdet.single_stage import ForwardType
+from icevision.models.multitask.mmdet.prediction import *
+
+
+__all__ = ["HybridSingleStageDetectorLightningAdapter"]
+
+
+class HybridSingleStageDetectorLightningAdapter(pl.LightningModule, ABC):
+    """Lightning module specialized for MultiTask training, with metrics support.
+
+    The methods `forward`, `training_step`, `validation_step`, `validation_epoch_end`
+    are already overriden.
+
+    # Arguments
+        model: The pytorch model to use.
+        metrics: `Sequence` of metrics to use.
+        debug: Whether to run in `debug` mode. Prints out useful info
+
+    # Returns
+        A `LightningModule`.
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        metrics: List[Metric] = None,
+        debug: bool = False,
+    ):
+        super().__init__()
+        self.metrics = metrics or []
+        self.model = model
+        self.debug = debug
+
+        # TODO: Make `self.classification_metrics` a `nn.ModuleDict`
+        # self.classification_metrics = {}
+        for name, head in model.classifier_heads.items():
+            if head.multilabel:
+                thresh = head.thresh if head.thresh is not None else 0.5
+                metric = tm.Accuracy(threshold=thresh, subset_accuracy=True)
+            else:
+                metric = tm.Accuracy(threshold=0.01, top_k=1)
+            setattr(self, f"{name}_accuracy", metric)
+        self.post_init()
+
+    def post_init(self):
+        pass
+
+    # ======================== TRAINING METHODS ======================== #
+
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)
+
+    def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
+        # Unpack batch into dict + list of records
+        data, samples = batch
+        # Get model outputs - dict of losses and vars to log
+        step_type = ForwardType.TRAIN_MULTI_AUG
+        if "img_metas" in data.keys():
+            step_type = ForwardType.TRAIN
+
+        if self.debug:
+            logger.info(f"Training Step: {data.keys()}")
+            logger.info(f"Batch Idx: {batch_idx}")
+            logger.info(f"Training Mode: {step_type}")
+
+        outputs = self.model.train_step(data=data, step_type=step_type)
+
+        # Log losses
+        self._log_vars(outputs["log_vars"], "train")
+
+        # Return loss for PL to do its thing
+        return outputs["loss"]
+
+    def validation_step(self, batch, batch_idx):
+        data, records = batch
+        if self.debug:
+            logger.info(f"Validation Step: {data.keys()}")
+            logger.info(f"Batch Idx: {batch_idx}")
+
+        self.model.eval()
+        with torch.no_grad():
+            # get losses
+            outputs = self.model.train_step(data=data, step_type=ForwardType.TRAIN)
+            raw_preds = self.model(data=data, forward_type=ForwardType.EVAL)
+            self.log_classification_metrics(
+                classification_preds=raw_preds["classification_results"],
+                yb_classif=data["gt_classification_labels"],
+            )
+
+        preds = self.convert_raw_predictions(
+            batch=data, raw_preds=raw_preds, records=records
+        )
+        self.accumulate_metrics(preds)
+        # self._log_vars(outputs["log_vars"], "valid")
+
+        # TODO: is train and eval model automatically set by lighnting?
+        self.model.train()
+
+    # ======================== LOGGING METHODS ======================== #
+
+    def convert_raw_predictions(self, batch, raw_preds, records):
+        classification_configs = {
+            name: ConfigDict(
+                multilabel=head.multilabel, topk=head.topk, thresh=head.thresh
+            )
+            for name, head in self.model.classifier_heads.items()
+        }
+        return convert_raw_predictions(
+            batch=batch,
+            raw_preds=raw_preds,
+            records=records,
+            detection_threshold=0.0,
+            classification_configs=classification_configs,
+        )
+
+    def log_classification_metrics(
+        self,
+        classification_preds: Dict[str, Tensor],
+        yb_classif: Dict[str, Tensor],
+        on_step: bool = False,
+        prefix: str = "valid",
+    ):
+        prefix = f"{prefix}_" if not prefix == "" else ""
+        for name in self.model.classifier_heads.keys():
+            # for name, metric in self.classification_metrics.items():
+            metric = getattr(self, f"{name}_accuracy")
+            self.log(
+                f"{prefix}{metric.__class__.__name__.lower()}__{name}",  # accuracy__shot_framing
+                # metric(classification_preds[name], yb_classif[name]),
+                metric(classification_preds[name], yb_classif[name].type(torch.int)),
+                on_step=on_step,
+                on_epoch=True,
+            )
+
+    def _log_vars(self, log_vars: dict, mode: str):
+        for k, v in log_vars.items():
+            self.log(f"{mode}/{k}", v)
+
+    def validation_epoch_end(self, outs):
+        self.finalize_metrics()
+
+    def accumulate_metrics(self, preds):
+        for metric in self.metrics:
+            metric.accumulate(preds=preds)
+
+    def finalize_metrics(self) -> None:
+        for metric in self.metrics:
+            metric_logs = metric.finalize()
+            for k, v in metric_logs.items():
+                self.log(f"{metric.name}/{k}", v)

From 0dabe85da13cfbd1086e85125fc604b229081b79 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 5 Jun 2021 12:46:10 +0530
Subject: [PATCH 006/122] add loss weight param

---
 .../models/multitask/classification_heads/head.py   | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index 0fcba1c83..996f9289f 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -41,6 +41,7 @@ class ClassifierConfig:
     activation: Optional[nn.Module] = None
     multilabel: bool = False
     loss_func_wts: Optional[Tensor] = None
+    loss_weight: float = 1.0
     # Post activation processing
     thresh: Optional[float] = None
     topk: Optional[int] = None
@@ -84,6 +85,7 @@ def __init__(
         activation: Optional[nn.Module] = None,
         multilabel: bool = False,
         loss_func_wts: Optional[Tensor] = None,
+        loss_weight: float = 1.0,
         # Final postprocessing args
         thresh: Optional[float] = None,
         topk: Optional[int] = None,
@@ -91,12 +93,13 @@ def __init__(
         super().__init__()
 
         # Setup loss function & activation
-        self.multilabel, self.loss_func, self.loss_func_wts, self.activation = (
-            multilabel,
+        self.multilabel = multilabel
+        self.loss_func, self.loss_func_wts, self.loss_weight = (
             loss_func,
             loss_func_wts,
-            activation,
+            loss_weight,
         )
+        self.activation = activation
         self.thresh, self.topk = thresh, topk
 
         # Setup head
@@ -169,7 +172,7 @@ def forward(self, features: Union[Tensor, TensorDict, TensorList]):
 
     # TorchVision style API
     def compute_loss(self, predictions, targets):
-        return self.loss_func(predictions, targets)
+        return self.loss_weight * self.loss_func(predictions, targets)
 
     def postprocess(self, predictions):
         return self.activation(predictions)
@@ -177,7 +180,7 @@ def postprocess(self, predictions):
     # MMDet style API
     def forward_train(self, x, gt_label) -> Tensor:
         preds = self(x)
-        return self.loss_func(preds, gt_label)
+        return self.loss_weight * self.loss_func(preds, gt_label)
 
     def forward_activate(self, x):
         "Run forward pass with activation function"

From 622e57f0faff4247c769f7601db00e3fded2bd51 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 12 Jun 2021 18:05:22 +0530
Subject: [PATCH 007/122] return activated preds in eval mode; import
 `ClassifierConfig`

---
 .../multitask/classification_heads/head.py    | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index 996f9289f..242a9e496 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -19,7 +19,7 @@
 MODELS = Registry("models", parent=MMCV_MODELS)
 CLASSIFICATION_HEADS = MODELS
 
-__all__ = ["ImageClassificationHead"]
+__all__ = ["ImageClassificationHead", "ClassifierConfig"]
 
 
 class Passthrough(nn.Module):
@@ -27,14 +27,22 @@ def forward(self, x):
         return x
 
 
-# NOTE: We aren't using `ClassifierConfig` anymore, and are sticking to `mmdet`'s
-# regular python `dict` convention
+"""
+`ClassifierConfig` is useful to instantiate `ImageClassificationHead`
+in different settings. If using `mmdet`, we don't use this as the config
+is then a regular dictionary.
+
+When using yolov5, we can easily pass around this config to create the model
+Often, it'll be used inside a dictionary of configs
+"""
+
+
 @dataclass
 class ClassifierConfig:
     # classifier_name: str
     out_classes: int
-    fpn_keys: Union[List[str], List[int], None] = None
     num_fpn_features: int = 512
+    fpn_keys: Union[List[str], List[int], None] = None
     dropout: Optional[float] = 0.2
     # Loss function args
     loss_func: Optional[nn.Module] = None
@@ -47,6 +55,9 @@ class ClassifierConfig:
     topk: Optional[int] = None
 
     def __post_init__(self):
+        if isinstance(self.fpn_keys, int):
+            self.fpn_keys = [fpn_keys]
+
         if self.multilabel:
             if self.topk is None and self.thresh is None:
                 self.thresh = 0.5
@@ -167,8 +178,12 @@ def forward(self, features: Union[Tensor, TensorDict, TensorList]):
             raise TypeError(
                 f"Expected TensorList|TensorDict|Tensor|tuple, got {type(features)}"
             )
-
-        return self.classifier(pooled_features)
+        if self.training:
+            # Return raw predictions in training mode
+            return self.classifier(pooled_features)
+        else:
+            # Return predictions -> sigmoid / softmax in eval mode
+            return self.postprocess(self.classifier(pooled_features))
 
     # TorchVision style API
     def compute_loss(self, predictions, targets):

From 74472441bd74e9c215409430e0cfac74ea2f2e8d Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 12 Jun 2021 18:05:46 +0530
Subject: [PATCH 008/122] use `ClassifierConfig`s, doc improvements

---
 .../multitask/classification_heads/builder.py | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/builder.py b/icevision/models/multitask/classification_heads/builder.py
index ea3767313..1d81b99d0 100644
--- a/icevision/models/multitask/classification_heads/builder.py
+++ b/icevision/models/multitask/classification_heads/builder.py
@@ -1,22 +1,47 @@
 from typing import Dict
-from .head import CLASSIFICATION_HEADS
+from .head import CLASSIFICATION_HEADS, ImageClassificationHead, ClassifierConfig
 import torch.nn as nn
 
-__all__ = ["build_classifier_heads"]
+__all__ = ["build_classifier_heads", "build_classifier_heads_from_configs"]
 
 # Enter dict of dicts as `cfg`
-def build_classifier_heads(cfg: Dict[str, Dict[str, dict]]):
+def build_classifier_heads(configs: Dict[str, Dict[str, dict]]) -> nn.ModuleDict:
     """
-    Build classification head from a config which is
-    a dict of dicts. A head is created for each key in the
-    input dictionary
+    Build classification head from a config which is a dict of dicts.
+    A head is created for each key in the input dictionary.
 
-    Returns a `nn.ModuleDict()` mapping keys from `cfg` to
-    classifier heads
+    Expected to be used with `mmdet` models as it uses the
+    `CLASSIFICATION_HEADS` registry internally
+
+    Returns:
+        a `nn.ModuleDict()` mapping keys from `configs` to classifier heads
     """
     heads = nn.ModuleDict()
-    # if cfg is not None:
-    for name, config in cfg.items():
+    # if configs is not None:
+    for name, config in configs.items():
         head = CLASSIFICATION_HEADS.build(config)
         heads.update({name: head})
     return heads
+
+
+def build_classifier_heads_from_configs(
+    configs: Dict[str, ClassifierConfig] = None
+) -> nn.ModuleDict:
+    """
+    Build a `nn.ModuleDict` of `ImageClassificationHead`s from a list of `ClassifierConfig`s
+    """
+    if configs is None:
+        return nn.ModuleDict()
+
+    assert isinstance(configs, dict), f"Expected a `dict`, got {type(configs)}"
+    if not all(isinstance(cfg, ClassifierConfig) for cfg in configs.values()):
+        raise ValueError(
+            f"Expected a `list` of `ClassifierConfig`s \n"
+            f"Either one or more elements in the list are not of type `ClassifierConfig`"
+        )
+
+    heads = nn.ModuleDict()
+    for name, config in configs.items():
+        head = ImageClassificationHead.from_config(config)
+        heads.update({name: head})
+    return heads

From eeee30a42479060780c214a26c996b15a64dd627 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 12 Jun 2021 21:01:43 +0530
Subject: [PATCH 009/122] add experimental onnx forward method

---
 .../models/multitask/mmdet/single_stage.py    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/icevision/models/multitask/mmdet/single_stage.py b/icevision/models/multitask/mmdet/single_stage.py
index 677f7ef45..aa0a8398f 100644
--- a/icevision/models/multitask/mmdet/single_stage.py
+++ b/icevision/models/multitask/mmdet/single_stage.py
@@ -286,6 +286,27 @@ def forward_eval(
             "classification_results": classification_results,
         }
 
+    # NOTE: This is experimental
+    def forward_onnx(self, one_img: Tensor, one_img_metas: List[ImgMetadataDict]):
+        """ """
+        # assert torch.onnx.is_in_onnx_export()
+        assert len(one_img) == len(one_img_metas) == 1
+
+        img, img_metas = one_img, one_img_metas
+
+        features = self.extract_feat(img)
+        bbox_outs = self.bbox_head(features)
+        classification_results = {
+            name: head.forward_activate(features)
+            for name, head in self.classifier_heads.items()
+        }
+
+        img_shape = torch._shape_as_tensor(img)[2:]  # Gets (H, W)
+        img_metas[0]["img_shape_for_onnx"] = img_shape
+        bbox_list = self.bbox_head.get_bboxes(*bbox_outs, img_metas, rescale=False)
+
+        return bbox_list, list(classification_results.values())
+
     def _parse_losses(
         self, losses: Dict[str, Union[Tensor, TensorDict, TensorList]]
     ) -> tuple:

From db46a491053b537fde480b619e550730b048e707 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 09:27:09 +0530
Subject: [PATCH 010/122] rename for consistency with library

---
 .../models/multitask/mmdet/{dataloader.py => dataloaders.py}      | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename icevision/models/multitask/mmdet/{dataloader.py => dataloaders.py} (100%)

diff --git a/icevision/models/multitask/mmdet/dataloader.py b/icevision/models/multitask/mmdet/dataloaders.py
similarity index 100%
rename from icevision/models/multitask/mmdet/dataloader.py
rename to icevision/models/multitask/mmdet/dataloaders.py

From 3dcfe4eb61350a699866b8220906cefe7ccca62f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 09:49:43 +0530
Subject: [PATCH 011/122] placeholders

---
 icevision/models/multitask/ultralytics/__init__.py   | 0
 icevision/models/multitask/ultralytics/pl_adapter.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 icevision/models/multitask/ultralytics/__init__.py
 create mode 100644 icevision/models/multitask/ultralytics/pl_adapter.py

diff --git a/icevision/models/multitask/ultralytics/__init__.py b/icevision/models/multitask/ultralytics/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/ultralytics/pl_adapter.py b/icevision/models/multitask/ultralytics/pl_adapter.py
new file mode 100644
index 000000000..e69de29bb

From 9bbe4b3d26f1d1f59b662236b7ad043f264445e5 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 09:49:55 +0530
Subject: [PATCH 012/122] implement hybrid yolov5

---
 .../multitask/ultralytics/yolo_multitask.py   | 229 ++++++++++++++++++
 1 file changed, 229 insertions(+)
 create mode 100644 icevision/models/multitask/ultralytics/yolo_multitask.py

diff --git a/icevision/models/multitask/ultralytics/yolo_multitask.py b/icevision/models/multitask/ultralytics/yolo_multitask.py
new file mode 100644
index 000000000..870d44a9d
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolo_multitask.py
@@ -0,0 +1,229 @@
+__all__ = ["HybridYOLOV5", "ClassifierConfig"]
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from pathlib import Path
+from torch import Tensor
+from icevision.models.multitask.classification_heads.head import (
+    ClassifierConfig,
+    ImageClassificationHead,
+    Passthrough,
+)
+from icevision.models.multitask.classification_heads.builder import (
+    build_classifier_heads_from_configs,
+)
+
+# from .yolo import *
+from yolov5.models.yolo import *
+
+from typing import Dict, Optional, List, Tuple
+from copy import deepcopy
+from loguru import logger
+
+logger = logger.opt(colors=True)
+
+
+# fmt: off
+YOLO_FEATURE_MAP_DIMS = {
+    # models/*yaml
+    "yolov5s": [128, 256,  512],            # (128, 32, 32), (256, 16, 16),  (512, 8, 8)
+    "yolov5m": [192, 384,  768],            # (192, 32, 32), (384, 16, 16),  (768, 8, 8)
+    "yolov5l": [256, 512, 1024],            # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov5x": [320, 640, 1280],            # (320, 32, 32), (640, 16, 16), (1280, 8, 8)
+
+    # models/hub/*yaml
+    "yolov3-spp":   [256, 512, 1024],             # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov3-tiny":  [256, 512],                   # (256, 16, 16), (512,  8,  8)
+    "yolov3":       [256, 512, 1024],             # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov5-fpn":   [256, 512, 1024],             # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov5-p2":    [256, 512, 1024],             # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov5-p6":    [256, 512, 768, 1024],        # (256, 32, 32), (512, 16, 16),  (768, 8, 8), (1024, 4, 4)
+    "yolov5-p7":    [256, 512, 768, 1024, 1280],  # (256, 32, 32), (512, 16, 16),  (768, 8, 8), (1024, 4, 4), (1280, 2, 2)
+    "yolov5-panet": [256, 512, 1024],             # (256, 32, 32), (512, 16, 16), (1024, 8, 8)
+    "yolov5l6":     [256, 512, 768, 1024],        # (256, 32, 32), (512, 16, 16),  (768, 8, 8), (1024, 4, 4)
+    "yolov5m6":     [192, 384, 576, 768],         # (192, 32, 32), (384, 16, 16),  (576, 8, 8),  (768, 4, 4)
+    "yolov5s6":     [128, 256, 384, 512],         # (128, 32, 32), (256, 16, 16),  (384, 8, 8),  (512, 4, 4)
+    "yolov5x6":     [320, 640, 960, 1280],        # (320, 32, 32), (640, 16, 16),  (960, 8, 8), (1280, 4, 4)
+    "yolov5s-transformer": [128, 256, 512],       # (128, 32, 32), (256, 16, 16),  (512, 8, 8)
+}
+# fmt: on
+
+
+class HybridYOLOV5(nn.Module):
+    """
+    Info:
+        Create a multitask variant of any YOLO model from ultralytics
+        Currently, multitasking detection + classification is supported. An
+          arbitrary number of classification heads can be created by passing
+          in a list of `ClassifierConfig`s
+    """
+
+    # HACK sort of... as subclassing is a bit problematic with super(...).__init__()
+    fuse = Model.fuse
+    nms = Model.nms
+    _initialize_biases = Model._initialize_biases
+    _print_biases = Model._print_biases
+    autoshape = Model.autoshape
+    info = Model.info
+    in_export_mode = False
+
+    def __init__(
+        self,
+        cfg,  # Path to `.yaml` config
+        ch=3,  # Num. input channels (3 for RGB image)
+        nc=None,  # Num. bbox classes
+        anchors=None,
+        classifier_configs: Dict[str, ClassifierConfig] = None,
+    ):
+        super(HybridYOLOV5, self).__init__()
+
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.safe_load(f)  # model dict
+
+        self.classifier_configs = classifier_configs
+        self.build_classifier_heads()
+
+        # Define model
+        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
+        if nc and nc != self.yaml["nc"]:
+            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        if anchors:
+            logger.info(f"Overriding model.yaml anchors with anchors={anchors}")
+            self.yaml["anchors"] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])
+        self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
+        self.inplace = self.yaml.get("inplace", True)
+        # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            # NOTE: This is the only modified line before classifier heads
+            # because we are now returning 2 outputs, not one
+            m.stride = torch.tensor(
+                # Index into [0] because [1]th index is the classification preds
+                [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]]
+                # [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]
+            )  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # logger.info('Strides: %s' % m.stride.tolist())
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        logger.success(f"Built *{Path(self.yaml_file).stem}* model successfully")
+
+        self.post_init()
+
+    def post_init(self):
+        pass
+
+    def set_export_mode(self, mode: bool):
+        self.in_export_mode = mode
+
+    def build_classifier_heads(self):
+        """
+        Description:
+            Build classifier heads from `self.classifier_configs`.
+            Does checks to see if `num_fpn_features` are given and if they are
+              correct for each classifier config, and corrects them if not
+        """
+        arch = Path(self.yaml_file).stem
+        fpn_dims = np.array(YOLO_FEATURE_MAP_DIMS[arch])
+
+        for task, cfg in self.classifier_configs.items():
+            num_fpn_features = (
+                sum(fpn_dims) if cfg.fpn_keys is None else sum(fpn_dims[cfg.fpn_keys])
+            )
+
+            if cfg.num_fpn_features is None:
+                cfg.num_fpn_features = num_fpn_features
+
+            elif cfg.num_fpn_features != num_fpn_features:
+                logger.warning(
+                    f"Incompatible `num_fpn_features={cfg.num_fpn_features}` detected in task '{task}'. "
+                    f"Replacing with the correct dimensions: {num_fpn_features}"
+                )
+                cfg.num_fpn_features = num_fpn_features
+
+        self.classifier_heads = build_classifier_heads_from_configs(
+            self.classifier_configs
+        )
+        logger.success(f"Built classifier heads successfully")
+
+    def forward(self, x, profile=False):
+        return self.forward_once(x=x, profile=profile)
+
+    # This is here for API compatibility with the main repo; will likely not be used
+    def forward_augment(self, x):
+        raise NotImplementedError
+
+    # TODO: multi-task multi-augmentation training
+    def forward_multi_augment(self, x: Dict[str, Tensor]):
+        raise NotImplementedError
+
+    def forward_once(self, x, profile=False) -> Tuple[Tensor, Dict[str, Tensor]]:
+        y, dt = [], []  # outputs
+        classification_preds: Dict[str, Tensor] = {}
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = (
+                    y[m.f]
+                    if isinstance(m.f, int)
+                    else [x if j == -1 else y[j] for j in m.f]
+                )  # from earlier layers
+
+            if profile:
+                o = (
+                    thop.profile(m, inputs=(x,), verbose=False)[0] / 1e9 * 2
+                    if thop
+                    else 0
+                )  # FLOPs
+                t = time_synchronized()
+                for _ in range(10):
+                    _ = m(x)
+                dt.append((time_synchronized() - t) * 100)
+                if m == self.model[0]:
+                    logger.info(
+                        f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}"
+                    )
+                logger.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}")
+
+            """
+            This is where the feature maps are passed into the classification heads.
+            Is there a cleaner way to do this? It's tricky as the whole model is wrapped in an
+              `nn.Sequential` container and we can't access attribues like `.backbone` or `.neck`.
+            We know for certain that `Detect` is the last layer in the model, so this should be
+              safe to do.
+            """
+            if isinstance(m, Detect):
+                for name, head in self.classifier_heads.items():
+                    classification_preds[name] = head(x)
+
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+
+        if profile:
+            logger.info("%.1fms total" % sum(dt))
+
+        # TODO: Replace with `torch.jit.is_scripting()` if that works for tracing too
+        if self.in_export_mode:
+            # Return tuple in export mode
+            return x, tuple(classification_preds.values())
+        else:
+            return x, classification_preds

From e56fa0e962f745f6be8b62a2a5154f961ab0e6ff Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 09:51:55 +0530
Subject: [PATCH 013/122] add sample usage in docstring

---
 .../models/multitask/ultralytics/yolo_multitask.py    | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolo_multitask.py b/icevision/models/multitask/ultralytics/yolo_multitask.py
index 870d44a9d..8c64445a2 100644
--- a/icevision/models/multitask/ultralytics/yolo_multitask.py
+++ b/icevision/models/multitask/ultralytics/yolo_multitask.py
@@ -58,7 +58,16 @@ class HybridYOLOV5(nn.Module):
         Create a multitask variant of any YOLO model from ultralytics
         Currently, multitasking detection + classification is supported. An
           arbitrary number of classification heads can be created by passing
-          in a list of `ClassifierConfig`s
+          in a dictionary of `ClassifierConfig`s where the keys are names of tasks
+
+    Sample Usage:
+        HybridYOLOV5(
+            cfg="models/yolov5s.yaml",
+            classifier_configs=dict(
+                classifier_head_1=ClassifierConfig(out_classes=10),
+                classifier_head_2=ClassifierConfig(out_classes=20, multilabel=True),
+            ),
+        )
     """
 
     # HACK sort of... as subclassing is a bit problematic with super(...).__init__()

From bcc02569df07be96d6f57c7a7d7aa789fe1330be Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 09:57:21 +0530
Subject: [PATCH 014/122] basic dataloader

---
 .../multitask/ultralytics/dataloader.py       | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 icevision/models/multitask/ultralytics/dataloader.py

diff --git a/icevision/models/multitask/ultralytics/dataloader.py b/icevision/models/multitask/ultralytics/dataloader.py
new file mode 100644
index 000000000..c601f0bc5
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/dataloader.py
@@ -0,0 +1,65 @@
+from icevision.imports import *
+from icevision.core import *
+from icevision.models.utils import *
+from icevision.models.ultralytics.yolov5.dataloaders import (
+    _build_train_sample as _build_train_detection_sample,
+)
+from torch import Tensor
+from icevision.models.multitask.data.dtypes import *
+
+
+def build_single_aug_batch(
+    records: Sequence[RecordType],
+) -> Tuple[TensorList, TensorDict, Sequence[RecordType]]:
+    """Builds a batch in the format required by the model when training.
+
+    # Arguments
+        records: A `Sequence` of records.
+
+    # Returns
+        A tuple with two items. The first will be a tuple like `(images, detection_targets, classification_targets)`
+        in the input format required by the model. The second will be an updated list
+        of the input records.
+
+    # Examples
+
+    Use the result of this function to feed the model.
+    ```python
+    batch, records = build_train_batch(records)
+    outs = model(*batch)
+    ```
+    """
+    images, targets = [], []
+    classification_targets = defaultdict(list)
+
+    for i, record in enumerate(records):
+        image, target = _build_train_detection_sample(record)
+        images.append(image)
+
+        if target.numel() > 0:
+            target[:, 0] = i
+
+        targets.append(target)
+
+        # Classification
+        for comp in record.components:
+            name = comp.task.name
+            if isinstance(comp, ClassificationLabelsRecordComponent):
+                if comp.is_multilabel:
+                    labels = comp.one_hot_encoded()
+                    classification_targets[name].append(labels)
+                else:
+                    labels = comp.label_ids
+                    classification_targets[name].extend(labels)
+
+    classification_targets = {k: tensor(v) for k, v in classification_targets.items()}
+
+    return (
+        torch.stack(images, 0),
+        torch.cat(targets, 0),
+        classification_targets,
+    ), records
+
+
+def build_multi_aug_batch():
+    raise NotImplementedError

From 0a76855a1114e17023d8f7aa52946d8c64ce44a3 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 12:01:32 +0530
Subject: [PATCH 015/122] fix error

---
 icevision/models/multitask/classification_heads/head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index 242a9e496..a97e4be68 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -56,7 +56,7 @@ class ClassifierConfig:
 
     def __post_init__(self):
         if isinstance(self.fpn_keys, int):
-            self.fpn_keys = [fpn_keys]
+            self.fpn_keys = [self.fpn_keys]
 
         if self.multilabel:
             if self.topk is None and self.thresh is None:

From 5deddd866ec6e6b7ab68dc2126641486cc7e0151 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 18:12:54 +0530
Subject: [PATCH 016/122] variable names

---
 .../models/multitask/ultralytics/dataloader.py   | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/dataloader.py b/icevision/models/multitask/ultralytics/dataloader.py
index c601f0bc5..21fc8ee4f 100644
--- a/icevision/models/multitask/ultralytics/dataloader.py
+++ b/icevision/models/multitask/ultralytics/dataloader.py
@@ -29,17 +29,15 @@ def build_single_aug_batch(
     outs = model(*batch)
     ```
     """
-    images, targets = [], []
+    images, detection_targets = [], []
     classification_targets = defaultdict(list)
 
     for i, record in enumerate(records):
-        image, target = _build_train_detection_sample(record)
+        image, detection_target = _build_train_detection_sample(record)
         images.append(image)
 
-        if target.numel() > 0:
-            target[:, 0] = i
-
-        targets.append(target)
+        detection_target[:, 0] = i if detection_target.numel() > 0 else None
+        detection_targets.append(target)
 
         # Classification
         for comp in record.components:
@@ -56,10 +54,6 @@ def build_single_aug_batch(
 
     return (
         torch.stack(images, 0),
-        torch.cat(targets, 0),
+        torch.cat(detection_targets, 0),
         classification_targets,
     ), records
-
-
-def build_multi_aug_batch():
-    raise NotImplementedError

From c72acfd4bf935c37fce861fccb29d0a3453a2b88 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 18:40:48 +0530
Subject: [PATCH 017/122] fix

---
 icevision/utils/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/icevision/utils/utils.py b/icevision/utils/utils.py
index bffb4f64a..65a97e4e9 100644
--- a/icevision/utils/utils.py
+++ b/icevision/utils/utils.py
@@ -116,6 +116,8 @@ def patch_class_to_main(cls):
 
 
 def flatten(x: Any) -> List[Any]:
+    import pandas as pd
+
     flattened_list = []
     for item in x:
         if isinstance(item, (tuple, list, np.ndarray, pd.Series)):

From 2bcb094171b35685e31169456fdc745069571eda Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 13 Jun 2021 18:40:55 +0530
Subject: [PATCH 018/122] multi aug dataloader, fix typo

---
 .../multitask/ultralytics/dataloader.py       | 50 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/dataloader.py b/icevision/models/multitask/ultralytics/dataloader.py
index 21fc8ee4f..33c0863d1 100644
--- a/icevision/models/multitask/ultralytics/dataloader.py
+++ b/icevision/models/multitask/ultralytics/dataloader.py
@@ -37,7 +37,7 @@ def build_single_aug_batch(
         images.append(image)
 
         detection_target[:, 0] = i if detection_target.numel() > 0 else None
-        detection_targets.append(target)
+        detection_targets.append(detection_target)
 
         # Classification
         for comp in record.components:
@@ -57,3 +57,51 @@ def build_single_aug_batch(
         torch.cat(detection_targets, 0),
         classification_targets,
     ), records
+
+
+def build_multi_aug_batch(
+    records: Sequence[RecordType], classification_transform_groups: dict
+):
+    detection_images = []
+    detection_targets = []
+    classification_data = defaultdict(lambda: defaultdict(list))
+    classification_targets = defaultdict(list)
+
+    for i, record in enumerate(records):
+        detection_image, detection_target = _build_train_detection_sample(record)
+        detection_images.append(detection_image)
+
+        detection_target[:, 0] = i if detection_target.numel() > 0 else None
+        detection_targets.append(detection_target)
+
+        for key, group in classification_transform_groups.items():
+            task = getattr(record, group["tasks"][0])
+            classification_data[key]["tasks"] = group["tasks"]
+            classification_data[key]["images"].append(im2tensor(task.img))
+
+        for comp in record.components:
+            name = comp.task.name
+            if isinstance(comp, ClassificationLabelsRecordComponent):
+                if comp.is_multilabel:
+                    labels = comp.one_hot_encoded()
+                    classification_targets[name].append(labels)
+                else:
+                    labels = comp.label_ids
+                    classification_targets[name].extend(labels)
+
+    # Massage data
+    for group in classification_data.values():
+        group["targets"] = {
+            task: tensor(classification_targets[task]) for task in group["tasks"]
+        }
+        group["images"] = torch.stack(group["images"])
+    classification_data = {k: dict(v) for k, v in classification_data.items()}
+
+    detection_data = dict(
+        images=torch.stack(detection_images, 0),
+        targets=torch.cat(detection_targets, 0),
+    )
+
+    return (detection_data, classification_data)
+    # return (detection_data, classification_data), records
+    # return dict(detection=detection_data, classification=classification_data)

From 01ea8354a9a40775af947a87d67c186b54a85c93 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 00:13:24 +0530
Subject: [PATCH 019/122] add links to discord where relevant

---
 icevision/models/multitask/ultralytics/dataloader.py | 12 ++++++++++++
 .../models/multitask/ultralytics/yolo_multitask.py   | 10 ++++++++++
 2 files changed, 22 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/dataloader.py b/icevision/models/multitask/ultralytics/dataloader.py
index 33c0863d1..a5640c63f 100644
--- a/icevision/models/multitask/ultralytics/dataloader.py
+++ b/icevision/models/multitask/ultralytics/dataloader.py
@@ -1,3 +1,13 @@
+"""
+YOLO-V5 dataloaders for multitask training.
+
+The model uses a peculiar format for bounding box annotations where the
+  length of the tensor is the total number of bounding boxes for that batch
+The first dimension is the index of the image that the box belongs to/
+See https://discord.com/channels/735877944085446747/770279401791160400/853691059338084372
+  for a more thorough explanation
+"""
+
 from icevision.imports import *
 from icevision.core import *
 from icevision.models.utils import *
@@ -36,6 +46,7 @@ def build_single_aug_batch(
         image, detection_target = _build_train_detection_sample(record)
         images.append(image)
 
+        # See file header for more info on why this is done
         detection_target[:, 0] = i if detection_target.numel() > 0 else None
         detection_targets.append(detection_target)
 
@@ -71,6 +82,7 @@ def build_multi_aug_batch(
         detection_image, detection_target = _build_train_detection_sample(record)
         detection_images.append(detection_image)
 
+        # See file header for more info on why this is done
         detection_target[:, 0] = i if detection_target.numel() > 0 else None
         detection_targets.append(detection_target)
 
diff --git a/icevision/models/multitask/ultralytics/yolo_multitask.py b/icevision/models/multitask/ultralytics/yolo_multitask.py
index 8c64445a2..d94d74a50 100644
--- a/icevision/models/multitask/ultralytics/yolo_multitask.py
+++ b/icevision/models/multitask/ultralytics/yolo_multitask.py
@@ -1,3 +1,13 @@
+"""
+Multitask implementation of YOLO-V5.
+Supports the following tasks:
+    * Object Detection
+    * Image Classification
+
+See https://discord.com/channels/735877944085446747/770279401791160400/853698548750745610
+  for a more detailed discussion
+"""
+
 __all__ = ["HybridYOLOV5", "ClassifierConfig"]
 
 import torch

From 6f4da157c4ad24d173faf758ef20699a68267882 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 08:33:19 +0530
Subject: [PATCH 020/122] keep code consistent

---
 icevision/models/multitask/mmdet/dataloaders.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index 486d07e03..f26f3e403 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -133,9 +133,11 @@ def build_multi_aug_batch(
             name = comp.task.name
             if isinstance(comp, ClassificationLabelsRecordComponent):
                 if comp.is_multilabel:
-                    classification_labels[name].append(comp.one_hot_encoded())
+                    labels = comp.one_hot_encoded()
+                    classification_targets[name].append(labels)
                 else:
-                    classification_labels[name].extend(comp.label_ids)
+                    labels = comp.label_ids
+                    classification_targets[name].extend(labels)
 
     # Massage data
     for group in classification_data.values():

From c7bdfa2b073840761e4190ce1bbc54121474f3b0 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 09:30:09 +0530
Subject: [PATCH 021/122] create `utils` module; move common code there

minor cleanup
---
 icevision/models/multitask/mmdet/pl_adapter.py      |  1 +
 icevision/models/multitask/mmdet/single_stage.py    | 13 +++----------
 icevision/models/multitask/utils/__init__.py        |  2 ++
 .../models/multitask/{data => utils}/dtypes.py      |  0
 icevision/models/multitask/utils/model.py           |  9 +++++++++
 5 files changed, 15 insertions(+), 10 deletions(-)
 create mode 100644 icevision/models/multitask/utils/__init__.py
 rename icevision/models/multitask/{data => utils}/dtypes.py (100%)
 create mode 100644 icevision/models/multitask/utils/model.py

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index 7de005218..541144dde 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -2,6 +2,7 @@
 # NOTE `torchmetrics` comes installed with `pytorch-lightning`
 # We could in theory also do `pl.metrics`
 
+# import pytorch_lightning.metrics as tm
 import torchmetrics as tm
 from icevision.all import *
 from mmcv.utils import ConfigDict
diff --git a/icevision/models/multitask/mmdet/single_stage.py b/icevision/models/multitask/mmdet/single_stage.py
index aa0a8398f..fd1c0f946 100644
--- a/icevision/models/multitask/mmdet/single_stage.py
+++ b/icevision/models/multitask/mmdet/single_stage.py
@@ -1,6 +1,5 @@
 from typing import Dict, List
 from collections import OrderedDict
-from icevision.models.multitask.data.dtypes import *
 from icevision.models.multitask.classification_heads import *
 
 
@@ -17,18 +16,18 @@
 from mmdet.core.bbox import *
 from typing import Union, List, Dict, Tuple
 
-from ...data.dataloader import (
+from icevision.models.multitask.mmdet.dataloaders import (
     TensorDict,
     ClassificationGroupDataDict,
     DataDictClassification,
     DataDictDetection,
 )
 import numpy as np
-from enum import Enum
+from icevision.models.multitask.utils.model import *
+from icevision.models.multitask.utils.dtypes import *
 
 
 __all__ = [
-    "ForwardType",
     "HybridSingleStageDetector",
     "build_backbone",
     "build_detector",
@@ -37,12 +36,6 @@
 ]
 
 
-class ForwardType(Enum):
-    TRAIN_MULTI_AUG = 1
-    TRAIN = 2
-    EVAL = 3
-
-
 @DETECTORS.register_module(name="HybridSingleStageDetector")
 class HybridSingleStageDetector(SingleStageDetector):
     # TODO: Add weights for loss functions
diff --git a/icevision/models/multitask/utils/__init__.py b/icevision/models/multitask/utils/__init__.py
new file mode 100644
index 000000000..3b4ead489
--- /dev/null
+++ b/icevision/models/multitask/utils/__init__.py
@@ -0,0 +1,2 @@
+from .dtypes import *
+from .model import *
diff --git a/icevision/models/multitask/data/dtypes.py b/icevision/models/multitask/utils/dtypes.py
similarity index 100%
rename from icevision/models/multitask/data/dtypes.py
rename to icevision/models/multitask/utils/dtypes.py
diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
new file mode 100644
index 000000000..26700bb1c
--- /dev/null
+++ b/icevision/models/multitask/utils/model.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+__all__ = ["ForwardType"]
+
+
+class ForwardType(Enum):
+    TRAIN_MULTI_AUG = 1
+    TRAIN = 2
+    EVAL = 3

From 2e1e2d474b4d3bafed2db28af7f9796a756d246e Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 09:34:20 +0530
Subject: [PATCH 022/122] update

---
 icevision/models/multitask/mmdet/pl_adapter.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index 541144dde..f5c7d27cd 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -15,7 +15,7 @@
 
 
 class HybridSingleStageDetectorLightningAdapter(pl.LightningModule, ABC):
-    """Lightning module specialized for MultiTask training, with metrics support.
+    """Lightning module specialized for EfficientDet, with metrics support.
 
     The methods `forward`, `training_step`, `validation_step`, `validation_epoch_end`
     are already overriden.
@@ -23,7 +23,6 @@ class HybridSingleStageDetectorLightningAdapter(pl.LightningModule, ABC):
     # Arguments
         model: The pytorch model to use.
         metrics: `Sequence` of metrics to use.
-        debug: Whether to run in `debug` mode. Prints out useful info
 
     # Returns
         A `LightningModule`.
@@ -40,8 +39,7 @@ def __init__(
         self.model = model
         self.debug = debug
 
-        # TODO: Make `self.classification_metrics` a `nn.ModuleDict`
-        # self.classification_metrics = {}
+        self.classification_metrics = {}
         for name, head in model.classifier_heads.items():
             if head.multilabel:
                 thresh = head.thresh if head.thresh is not None else 0.5
@@ -77,7 +75,8 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         # Log losses
         self._log_vars(outputs["log_vars"], "train")
 
-        # Return loss for PL to do its thing
+        # NOTE: outputs["loss"] is not scaled in distributed training... ?
+        # Maybe we should return `outputs["log_vars"]["loss"]` instead?
         return outputs["loss"]
 
     def validation_step(self, batch, batch_idx):
@@ -100,7 +99,7 @@ def validation_step(self, batch, batch_idx):
             batch=data, raw_preds=raw_preds, records=records
         )
         self.accumulate_metrics(preds)
-        # self._log_vars(outputs["log_vars"], "valid")
+        self._log_vars(outputs["log_vars"], "valid")
 
         # TODO: is train and eval model automatically set by lighnting?
         self.model.train()
@@ -143,7 +142,7 @@ def log_classification_metrics(
 
     def _log_vars(self, log_vars: dict, mode: str):
         for k, v in log_vars.items():
-            self.log(f"{mode}/{k}", v)
+            self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
 
     def validation_epoch_end(self, outs):
         self.finalize_metrics()

From 35e0cec2747266c80258dab2f641a57d5522b590 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 09:52:06 +0530
Subject: [PATCH 023/122] rename with lib naming scheme

---
 .../multitask/ultralytics/{dataloader.py => dataloaders.py}     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename icevision/models/multitask/ultralytics/{dataloader.py => dataloaders.py} (98%)

diff --git a/icevision/models/multitask/ultralytics/dataloader.py b/icevision/models/multitask/ultralytics/dataloaders.py
similarity index 98%
rename from icevision/models/multitask/ultralytics/dataloader.py
rename to icevision/models/multitask/ultralytics/dataloaders.py
index a5640c63f..9b1a0126a 100644
--- a/icevision/models/multitask/ultralytics/dataloader.py
+++ b/icevision/models/multitask/ultralytics/dataloaders.py
@@ -15,7 +15,7 @@
     _build_train_sample as _build_train_detection_sample,
 )
 from torch import Tensor
-from icevision.models.multitask.data.dtypes import *
+from icevision.models.multitask.utils.dtypes import *
 
 
 def build_single_aug_batch(

From 4ce890f66d961bff416f40296d1a6fa81988c32a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 14:17:12 +0530
Subject: [PATCH 024/122] fix imports, docs

---
 icevision/models/multitask/mmdet/dataloaders.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index f26f3e403..150ca5903 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -1,6 +1,11 @@
 # from icevision.all import *
+from icevision.imports import *
 from icevision.core import *
-from icevision.models.multitask.data.dtypes import *
+from icevision.models.multitask.utils.dtypes import *
+from icevision.models.multitask.utils.dtypes import (
+    DataDictClassification,  # Not imported in __all__ as they are mmdet specific
+    DataDictDetection,
+)
 from icevision.models.mmdet.common.utils import convert_background_from_zero_to_last
 from icevision.models.utils import unload_records
 from icevision.models.mmdet.common.bbox.dataloaders import (
@@ -93,14 +98,14 @@ def build_multi_aug_batch(
                     "group1": dict(
                         tasks = ["shot_composition"],
                         images: Tensor = ...,
-                        gt_classification_labels=dict(
+                        classification_labels=dict(
                             "shot_composition": Tensor = ...,
                         )
                     ),
                     "group2": dict(
                         tasks = ["color_saturation", "shot_framing"],
                         images: Tensor = ...,
-                        gt_classification_labels=dict(
+                        classification_labels=dict(
                             "color_saturation": Tensor = ...,
                             "shot_framing": Tensor = ...,
                         )

From a7b374c1a09cb16a95d6215cd02bb2f1a8d50b2f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 14:17:39 +0530
Subject: [PATCH 025/122] forward api (wip)

---
 .../multitask/ultralytics/yolo_multitask.py       | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolo_multitask.py b/icevision/models/multitask/ultralytics/yolo_multitask.py
index d94d74a50..0b2376a13 100644
--- a/icevision/models/multitask/ultralytics/yolo_multitask.py
+++ b/icevision/models/multitask/ultralytics/yolo_multitask.py
@@ -25,6 +25,7 @@
 from icevision.models.multitask.classification_heads.builder import (
     build_classifier_heads_from_configs,
 )
+from icevision.models.multitask.utils.model import ForwardType
 
 # from .yolo import *
 from yolov5.models.yolo import *
@@ -185,8 +186,18 @@ def build_classifier_heads(self):
         )
         logger.success(f"Built classifier heads successfully")
 
-    def forward(self, x, profile=False):
-        return self.forward_once(x=x, profile=profile)
+    def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
+        if step_type is ForwardType.TRAIN:
+            return self.forward_once(x=x, profile=profile)
+        elif step_type is ForwardType.TRAIN_MULTI_AUG:
+            return self.forward_multi_augment(x=x, profile=profile)
+        elif step_type is ForwardType.EVAL:
+            self.eval()
+            return self.forward_once(x=x, profile=False)
+        else:
+            raise RuntimeError(
+                f"Invalid `step_type`. Received: {type(step_type.__class__)}; Expected: {ForwardType.__class__}"
+            )
 
     # This is here for API compatibility with the main repo; will likely not be used
     def forward_augment(self, x):

From 8a8704d97ed849ab6d71c41ca0c1f5e988774716 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 14:18:07 +0530
Subject: [PATCH 026/122] fix return format, add docs for multi-aug dataloader

---
 .../multitask/ultralytics/dataloaders.py      | 74 ++++++++++++++++++-
 1 file changed, 71 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/dataloaders.py b/icevision/models/multitask/ultralytics/dataloaders.py
index 9b1a0126a..2aafa9a2e 100644
--- a/icevision/models/multitask/ultralytics/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/dataloaders.py
@@ -73,6 +73,76 @@ def build_single_aug_batch(
 def build_multi_aug_batch(
     records: Sequence[RecordType], classification_transform_groups: dict
 ):
+    """
+    Docs:
+        Take as inputs `records` and `classification_transform_groups` and return
+        a tuple of dictionaries, one for detection data and the other for classification.
+
+        Each `record` is expected to have a specific structure. For example:
+
+            BaseRecord
+
+            common:
+                - Image ID: 4
+                - Filepath: sample_image.png
+                - Image: 640x640x3 <np.ndarray> Image
+                - Image size ImgSize(width=640, height=640)
+            color_saturation:
+                - Image: 640x640x3 <np.ndarray> Image
+                - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
+                - Labels: [1]
+            shot_composition:
+                - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
+                - Labels: [1]
+                - Image: 640x640x3 <np.ndarray> Image
+            detection:
+                - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
+                - Image: 640x640x3 <np.ndarray> Image
+                - Class Map: <ClassMap: {'background': 0, 'person': 1}>
+                - Labels: [1]
+            shot_framing:
+                - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
+                - Labels: [3]
+                - Image: 640x640x3 <np.ndarray> Image
+
+        `classification_transform_groups` describes how to group classification data. For example:
+            {
+                "group1": dict(tasks=["shot_composition"]),
+                "group2": dict(tasks=["color_saturation", "shot_framing"])
+            }
+
+
+    Returns:
+        A tuple with two items:
+        1. A tuple with two dictionaries - (`detection_data`, `classification_data`)
+            `detection_data`:
+                {
+                    "detection": dict(
+                        images: Tensor = ...,  # (N,C,H,W)
+                        targets: Tensor = ..., # of shape (num_boxes, 6)
+                                               # (img_idx, box_class_idx, **bbox_relative_coords)
+                    )
+                }
+            `classification_data`:
+                {
+                    "group1": dict(
+                        tasks = ["shot_composition"],
+                        images: Tensor = ...,
+                        targets=dict(
+                            "shot_composition": Tensor = ...,
+                        )
+                    ),
+                    "group2": dict(
+                        tasks = ["color_saturation", "shot_framing"],
+                        images: Tensor = ...,
+                        targets=dict(
+                            "color_saturation": Tensor = ...,
+                            "shot_framing": Tensor = ...,
+                        )
+                    )
+                }
+        2. Loaded records
+    """
     detection_images = []
     detection_targets = []
     classification_data = defaultdict(lambda: defaultdict(list))
@@ -114,6 +184,4 @@ def build_multi_aug_batch(
         targets=torch.cat(detection_targets, 0),
     )
 
-    return (detection_data, classification_data)
-    # return (detection_data, classification_data), records
-    # return dict(detection=detection_data, classification=classification_data)
+    return (detection_data, classification_data), records

From 516edcc6482741ca2357618ff90e99d9f1971c4f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 14:18:21 +0530
Subject: [PATCH 027/122] add icevision style model API

---
 .../models/multitask/ultralytics/model.py     | 119 ++++++++++++++++++
 .../models/multitask/ultralytics/utils.py     |  29 +++++
 2 files changed, 148 insertions(+)
 create mode 100644 icevision/models/multitask/ultralytics/model.py
 create mode 100644 icevision/models/multitask/ultralytics/utils.py

diff --git a/icevision/models/multitask/ultralytics/model.py b/icevision/models/multitask/ultralytics/model.py
new file mode 100644
index 000000000..8ea034478
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/model.py
@@ -0,0 +1,119 @@
+"""
+Largely copied over from `icevision.models.ultralytics.yolov5.model`
+The only aspect added is the ability to pass in a `Dict[str, ClassifierCongig]` to
+  create the classification heads
+"""
+
+
+__all__ = ["model"]
+
+from icevision.imports import *
+from icevision.utils import *
+
+import yaml
+import yolov5
+from yolov5.utils.google_utils import attempt_download
+from yolov5.utils.torch_utils import intersect_dicts
+from yolov5.utils.general import check_img_size
+
+# from icevision.models.ultralytics.yolov5.utils import *
+from icevision.models.multitask.ultralytics.utils import *
+from icevision.models.ultralytics.yolov5.backbones import *
+
+from icevision.models.multitask.ultralytics.yolo_multitask import HybridYOLOV5
+from icevision.models.multitask.classification_heads import ClassifierConfig
+
+yolo_dir = get_root_dir() / "yolo"
+yolo_dir.mkdir(exist_ok=True)
+
+
+def model(
+    backbone: YoloV5BackboneConfig,
+    num_classes: int,
+    img_size: int,  # must be multiple of 32
+    device: Optional[torch.device] = None,
+    classifier_configs: Dict[str, ClassifierConfig] = None,
+) -> nn.Module:
+    model_name = backbone.model_name
+    pretrained = backbone.pretrained
+
+    # this is to remove background from ClassMap as discussed
+    # here: https://github.com/ultralytics/yolov5/issues/2950
+    # and here: https://discord.com/channels/735877944085446747/782062040168267777/836692604224536646
+    # so we should pass `num_classes=parser.class_map.num_classes`
+    num_classes -= 1
+
+    device = (
+        torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if device is None
+        else device
+    )
+
+    cfg_filepath = Path(yolov5.__file__).parent / f"models/{model_name}.yaml"
+    if pretrained:
+        weights_path = yolo_dir / f"{model_name}.pt"
+
+        with open(Path(yolov5.__file__).parent / "data/hyp.finetune.yaml") as f:
+            hyp = yaml.load(f, Loader=yaml.SafeLoader)
+
+        attempt_download(weights_path)  # download if not found locally
+        sys.path.insert(0, str(Path(yolov5.__file__).parent))
+        ckpt = torch.load(weights_path, map_location=device)  # load checkpoint
+        sys.path.remove(str(Path(yolov5.__file__).parent))
+        if hyp.get("anchors"):
+            ckpt["model"].yaml["anchors"] = round(hyp["anchors"])  # force autoanchor
+        model = HybridYOLOV5(
+            cfg_filepath or ckpt["model"].yaml,
+            ch=3,
+            nc=num_classes,
+            classifier_configs=classifier_configs,
+        ).to(device)
+        exclude = []  # exclude keys
+        state_dict = ckpt["model"].float().state_dict()  # to FP32
+        state_dict = intersect_dicts(
+            state_dict, model.state_dict(), exclude=exclude
+        )  # intersect
+        model.load_state_dict(state_dict, strict=False)  # load
+    else:
+        with open(Path(yolov5.__file__).parent / "data/hyp.scratch.yaml") as f:
+            hyp = yaml.load(f, Loader=yaml.SafeLoader)
+
+        model = HybridYOLOV5(
+            cfg_filepath,
+            ch=3,
+            nc=num_classes,
+            anchors=hyp.get("anchors"),
+            classifier_configs=classifier_configs,
+        ).to(device)
+
+    gs = int(model.stride.max())  # grid size (max stride)
+    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz = check_img_size(img_size, gs)  # verify imgsz are gs-multiples
+
+    hyp["box"] *= 3.0 / nl  # scale to layers
+    hyp["cls"] *= num_classes / 80.0 * 3.0 / nl  # scale to classes and layers
+    hyp["obj"] *= (imgsz / 640) ** 2 * 3.0 / nl  # scale to image size and layers
+    model.nc = num_classes  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
+
+    def param_groups_fn(model: nn.Module) -> List[List[nn.Parameter]]:
+        spp_index = [
+            i + 2
+            for i, layer in enumerate(model.model.children())
+            if layer._get_name() == "SPP"
+        ][0]
+        backbone = list(model.model.children())[:spp_index]
+        neck = list(model.model.children())[spp_index:-1]
+        head = list(model.model.children())[-1]
+
+        layers = [nn.Sequential(*backbone), nn.Sequential(*neck), nn.Sequential(head)]
+
+        param_groups = [list(group.parameters()) for group in layers]
+        check_all_model_params_in_groups2(model.model, param_groups)
+
+        return param_groups
+
+    model.param_groups = MethodType(param_groups_fn, model)
+
+    return model
diff --git a/icevision/models/multitask/ultralytics/utils.py b/icevision/models/multitask/ultralytics/utils.py
new file mode 100644
index 000000000..5ae9ea83c
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/utils.py
@@ -0,0 +1,29 @@
+"""
+This file is redundant in terms of code as it uses the exact same code
+as `icevision.models.ultralytics.yolov5.utils.YoloV5BackboneConfig`
+
+We're keeping it to maintain structure, and in case we want to change
+something in the future that is multitask model specific
+"""
+
+from icevision.models.ultralytics.yolov5.utils import YoloV5BackboneConfig
+
+__all__ = ["YoloV5BackboneConfig"]
+
+# from icevision.imports import *
+# from icevision.backbones import BackboneConfig
+
+
+# class YoloV5MultitaskBackboneConfig(BackboneConfig):
+#     def __init__(self, model_name: str):
+#         self.model_name = model_name
+#         self.pretrained: bool
+
+#     def __call__(self, pretrained: bool = True) -> "YoloV5MultitaskBackboneConfig":
+#         """Completes the configuration of the backbone
+
+#         # Arguments
+#             pretrained: If True, use a pretrained backbone (on COCO).
+#         """
+#         self.pretrained = pretrained
+#         return self

From 1a8857f8206ef61cb9e1bcf590d931c057f83577 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 14:18:40 +0530
Subject: [PATCH 028/122] lightning adapter w/ train step (TODO val)

---
 .../multitask/ultralytics/pl_adapter.py       | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/pl_adapter.py b/icevision/models/multitask/ultralytics/pl_adapter.py
index e69de29bb..39688ece5 100644
--- a/icevision/models/multitask/ultralytics/pl_adapter.py
+++ b/icevision/models/multitask/ultralytics/pl_adapter.py
@@ -0,0 +1,116 @@
+# Modified from `icevision.models.mmdet.lightning.model_adapter`
+# NOTE `torchmetrics` comes installed with `pytorch-lightning`
+# We could in theory also do `pl.metrics`
+
+from icevision.models.multitask.classification_heads.head import TensorDict
+import torchmetrics as tm
+import pytorch_lightning as pl
+
+from icevision.imports import *
+from icevision.metrics import *
+from icevision.core import *
+
+from loguru import logger
+from icevision.models.multitask.ultralytics.yolo_multitask import HybridYOLOV5
+from icevision.models.multitask.utils.model import ForwardType
+from yolov5.utils.loss import ComputeLoss
+
+
+class HybridYOLOV5LightningAdapter(pl.LightningModule, ABC):
+    """ """
+
+    def __init__(
+        self,
+        model: HybridYOLOV5,
+        metrics: List[Metric] = None,
+        debug: bool = False,
+    ):
+        super().__init__()
+        self.metrics = metrics or []
+        self.model = model
+        self.debug = debug
+        self.compute_loss = ComputeLoss(model)
+
+        self.classification_metrics = nn.ModuleDict()
+        for name, head in model.classifier_heads.items():
+            if head.multilabel:
+                thresh = head.thresh if head.thresh is not None else 0.5
+                metric = tm.Accuracy(threshold=thresh, subset_accuracy=True)
+            else:
+                metric = tm.Accuracy(threshold=0.01, top_k=1)
+            self.classification_metrics[name] = metric
+            setattr(self, f"{name}_accuracy", metric)
+        self.post_init()
+
+    def post_init(self):
+        pass
+
+    def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
+        batch, _ = batch
+        if isinstance(batch[0], torch.Tensor):
+            (xb, detection_targets, classification_targets) = batch
+            step_type = ForwardType.TRAIN
+
+        elif isinstance(batch[0], dict):
+            (detection_data, classification_data) = batch
+            detection_targets = detection_data["targets"]
+
+            step_type = ForwardType.TRAIN_MULTI_AUG
+            raise RuntimeError
+
+        detection_preds, classification_preds = self(xb, step_type=step_type)
+        detection_loss = self.compute_loss(detection_preds, detection_targets)[0]
+
+        # Iterate through each head and compute classification losses
+        classification_losses = {
+            head.compute_loss(
+                predictions=classification_preds[name],
+                targets=classification_targets[name],
+            )
+            for name, head in self.model.classifier_heads.items()
+        }
+        total_classification_loss = sum(classification_losses.values())
+
+        self.log_losses(
+            "train", detection_loss, total_classification_loss, classification_losses
+        )
+
+        return detection_loss + total_classification_loss
+
+    def log_losses(
+        self,
+        mode: str,
+        detection_loss: Tensor,
+        classification_total_loss: Tensor,
+        classification_losses: TensorDict,
+    ):
+        log_vars = dict(
+            detection_loss=detection_loss,
+            classification_total_loss=classification_total_loss,
+            **{
+                f"classification_{name}": loss
+                for name, loss in classification_losses.items()
+            },
+        )
+        for k, v in log_vars.items():
+            self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
+
+    def validation_step(self, batch, batch_idx):
+        raise NotImplementedError
+
+    # ======================== TRAINING METHODS ======================== #
+
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)
+
+    # ======================== LOGGING METHODS ======================== #
+
+    def accumulate_metrics(self, preds):
+        for metric in self.metrics:
+            metric.accumulate(preds=preds)
+
+    def finalize_metrics(self) -> None:
+        for metric in self.metrics:
+            metric_logs = metric.finalize()
+            for k, v in metric_logs.items():
+                self.log(f"{metric.name}/{k}", v)

From 73796a5c6c9da8a787eac6378893b5d14c043125 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 15:23:27 +0530
Subject: [PATCH 029/122] reorganise module to mimic the rest of the library

---
 .../multitask/ultralytics/yolov5/__init__.py  | 33 +++++++++++++++++++
 .../multitask/ultralytics/yolov5/backbones.py | 17 ++++++++++
 .../ultralytics/{ => yolov5}/dataloaders.py   |  0
 .../ultralytics/yolov5/fastai/__init__.py     |  0
 .../ultralytics/yolov5/lightning/__init__.py  |  1 +
 .../lightning/model_adapter.py}               |  2 +-
 .../ultralytics/{ => yolov5}/model.py         |  4 +--
 .../ultralytics/yolov5/prediction.py          |  0
 .../ultralytics/{ => yolov5}/utils.py         |  0
 .../yolo_hybrid.py}                           |  0
 10 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/__init__.py
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/backbones.py
 rename icevision/models/multitask/ultralytics/{ => yolov5}/dataloaders.py (100%)
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/fastai/__init__.py
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/lightning/__init__.py
 rename icevision/models/multitask/ultralytics/{pl_adapter.py => yolov5/lightning/model_adapter.py} (97%)
 rename icevision/models/multitask/ultralytics/{ => yolov5}/model.py (96%)
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/prediction.py
 rename icevision/models/multitask/ultralytics/{ => yolov5}/utils.py (100%)
 rename icevision/models/multitask/ultralytics/{yolo_multitask.py => yolov5/yolo_hybrid.py} (100%)

diff --git a/icevision/models/multitask/ultralytics/yolov5/__init__.py b/icevision/models/multitask/ultralytics/yolov5/__init__.py
new file mode 100644
index 000000000..7db056ccf
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolov5/__init__.py
@@ -0,0 +1,33 @@
+"""
+The following imports are not strictly necessary if you're only using the high level API,
+  but are nice to have for quick dev / debugging, and to use some of the lower level APIs.
+  They must be imported before the rest of the imports to respect namespaces
+"""
+
+import icevision.tfms as tfms
+from icevision.models.multitask.classification_heads import *
+from icevision.models.multitask.utils import *
+from icevision.data.dataset import Dataset
+from yolov5.utils.loss import ComputeLoss
+
+
+"""
+The following imports are what are essential for the high level API, and in line with
+  the way modules are imported with the rest of the library
+"""
+
+from icevision.models.multitask.ultralytics.yolov5.dataloaders import *
+from icevision.models.multitask.ultralytics.yolov5.model import *
+from icevision.models.multitask.ultralytics.yolov5.prediction import *
+from icevision.models.multitask.ultralytics.yolov5.utils import *
+from icevision.models.multitask.ultralytics.yolov5.backbones import *
+from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import *
+
+
+from icevision.soft_dependencies import SoftDependencies
+
+if SoftDependencies.fastai:
+    from icevision.models.multitask.ultralytics.yolov5 import fastai
+
+if SoftDependencies.pytorch_lightning:
+    from icevision.models.multitask.ultralytics.yolov5 import lightning
diff --git a/icevision/models/multitask/ultralytics/yolov5/backbones.py b/icevision/models/multitask/ultralytics/yolov5/backbones.py
new file mode 100644
index 000000000..0ee5f34b8
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolov5/backbones.py
@@ -0,0 +1,17 @@
+"""
+This file is redundant in terms of code as it uses the exact same code
+as `icevision.models.ultralytics.yolov5.backbones`
+
+We're keeping it to maintain structure, and in case we want to change
+something in the future that is multitask model specific
+"""
+
+from icevision.models.multitask.ultralytics.yolov5.utils import *
+from icevision.models.ultralytics.yolov5.backbones import *
+
+__all__ = [
+    "small",
+    "medium",
+    "large",
+    "extra_large",
+]
diff --git a/icevision/models/multitask/ultralytics/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
similarity index 100%
rename from icevision/models/multitask/ultralytics/dataloaders.py
rename to icevision/models/multitask/ultralytics/yolov5/dataloaders.py
diff --git a/icevision/models/multitask/ultralytics/yolov5/fastai/__init__.py b/icevision/models/multitask/ultralytics/yolov5/fastai/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/__init__.py b/icevision/models/multitask/ultralytics/yolov5/lightning/__init__.py
new file mode 100644
index 000000000..33a818e05
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/__init__.py
@@ -0,0 +1 @@
+from icevision.models.multitask.ultralytics.yolov5.lightning.model_adapter import *
diff --git a/icevision/models/multitask/ultralytics/pl_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
similarity index 97%
rename from icevision/models/multitask/ultralytics/pl_adapter.py
rename to icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 39688ece5..7b579ed2c 100644
--- a/icevision/models/multitask/ultralytics/pl_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -11,7 +11,7 @@
 from icevision.core import *
 
 from loguru import logger
-from icevision.models.multitask.ultralytics.yolo_multitask import HybridYOLOV5
+from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import HybridYOLOV5
 from icevision.models.multitask.utils.model import ForwardType
 from yolov5.utils.loss import ComputeLoss
 
diff --git a/icevision/models/multitask/ultralytics/model.py b/icevision/models/multitask/ultralytics/yolov5/model.py
similarity index 96%
rename from icevision/models/multitask/ultralytics/model.py
rename to icevision/models/multitask/ultralytics/yolov5/model.py
index 8ea034478..fa5c7adb5 100644
--- a/icevision/models/multitask/ultralytics/model.py
+++ b/icevision/models/multitask/ultralytics/yolov5/model.py
@@ -17,10 +17,10 @@
 from yolov5.utils.general import check_img_size
 
 # from icevision.models.ultralytics.yolov5.utils import *
-from icevision.models.multitask.ultralytics.utils import *
+from icevision.models.multitask.ultralytics.yolov5.utils import *
 from icevision.models.ultralytics.yolov5.backbones import *
 
-from icevision.models.multitask.ultralytics.yolo_multitask import HybridYOLOV5
+from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import HybridYOLOV5
 from icevision.models.multitask.classification_heads import ClassifierConfig
 
 yolo_dir = get_root_dir() / "yolo"
diff --git a/icevision/models/multitask/ultralytics/yolov5/prediction.py b/icevision/models/multitask/ultralytics/yolov5/prediction.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/ultralytics/utils.py b/icevision/models/multitask/ultralytics/yolov5/utils.py
similarity index 100%
rename from icevision/models/multitask/ultralytics/utils.py
rename to icevision/models/multitask/ultralytics/yolov5/utils.py
diff --git a/icevision/models/multitask/ultralytics/yolo_multitask.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
similarity index 100%
rename from icevision/models/multitask/ultralytics/yolo_multitask.py
rename to icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py

From 4ad3c08050956777edfc3757a633d18491a9966c Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 14 Jun 2021 23:39:32 +0530
Subject: [PATCH 030/122] documentation.

---
 icevision/models/multitask/data/dataset.py    | 112 ++++++++++++++++--
 .../models/multitask/mmdet/dataloaders.py     |  40 +------
 .../ultralytics/yolov5/dataloaders.py         |  36 +-----
 3 files changed, 111 insertions(+), 77 deletions(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index a17078556..8fe0bb485 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -13,22 +13,116 @@
 
 class HybridAugmentationsRecordDataset(Dataset):
     """
-    Dataset that stores records internally and dynamically attaches an `img` component
-    to each task when being fetched
+    A Dataset that allows you to apply different augmentations to different tasks in your
+      record. `detection_transforms` are applied to the `detection` task specifically, and
+      `classification_transforms_groups` describe how to group and apply augmentations to
+      the classification tasks in the record.
+
+    This object stores the records internally and dynamically attaches an `img` component
+      to each task when being fetched. Some basic validation is done on init to ensure that
+      the given transforms cover all tasks described in the record.
+
+    Important NOTE: All images are returned as normalised numpy arrays upon fetching. If
+      running in `debug` mode, normalisation is skipped and PIL Images are returned inside
+      the record instead. This is done to facilitate visual inspection of the transforms
+      applied to the images
 
     Arguments:
-        * records: A list of records.
+        * records: A list of records where only the `common` attribute has an `img`. Upon fetching,
+                   _each_ task in the record will have an `img` attribute added to it based on the
+                   `classification_transforms_groups`
         * classification_transforms_groups <Dict[str, Dict[str, Union[Tfms.Compose, List[str]]]] : a dict
             that creates groups of tasks, where each task receives the same transforms and gets a dedicated
-            forward pass in the network. For example:
-                dict(
-                    tasks=["shot_framing", "color_tones"],
-                    transforms=Tfms.Compose([Tfms.Resize(224), Tfms.ToTensor()])
-                )
-        * detection_transforms <tfms.A.Adapter> - Icevision albumentations adapter for detection transforms
+            forward pass in the network. See below for an example.
+        * detection_transforms <tfms.A.Adapter> - Icevision albumentations adapter for detection transforms.
         * norm_mean <List[float]> : norm mean stats
         * norm_std <List[float]> : norm stdev stats
         * debug <bool> : If true, prints info & unnormalised `PIL.Image`s are returned on fetching items
+
+    Usage:
+        Sample record:
+            BaseRecord
+
+            common:
+                - Image ID: 4
+                - Filepath: sample_image.png
+                - Image: 640x640x3 <np.ndarray> Image
+                - Image size ImgSize(width=640, height=640)
+            color_saturation:
+                - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
+                - Labels: [1]
+            shot_composition:
+                - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
+                - Labels: [1]
+            detection:
+                - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
+                - Class Map: <ClassMap: {'background': 0, 'person': 1}>
+                - Labels: [1]
+            shot_framing:
+                - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
+                - Labels: [3]
+
+        classification_transforms_groups = {
+                "group1": dict(
+                    tasks=["shot_composition"],
+                    transforms=Tfms.Compose([
+                        Tfms.Resize((IMG_HEIGHT, IMG_WIDTH)),
+                        Tfms.RandomPerspective(),
+                    ])
+                ),
+                "group2": dict(
+                    tasks=["color_saturation", "shot_framing"],
+                    transforms=Tfms.Compose([
+                        Tfms.Resize((IMG_HEIGHT, IMG_WIDTH)),
+                        Tfms.RandomPerspective(),
+                        Tfms.RandomHorizontalFlip(),
+                        Tfms.RandomVerticalFlip(),
+                    ])
+                )
+            }
+        import icevision.tfms as tfms
+        detection_transforms = tfms.A.Adapter([
+                tfms.A.Normalize(),
+                tfms.A.Resize(height=IMG_HEIGHT, width=IMG_WIDTH),
+                tfms.A.PadIfNeeded(img_H, img_W, border_mode=cv2.BORDER_CONSTANT),
+            ])
+
+        dset = HybridAugmentationsRecordDataset(
+            records=records,
+            classification_transforms_groups=classification_transforms_groups,
+            detection_transforms=detection_transforms,
+        )
+
+    Returned Record Example:
+        Note that unlike the input record, each task has an `Image` attribute which
+        is after the transforms have been applied. In the dataloader, these task specific
+        images must be used, and the `record.common.img` is just the original image
+        untransformed that shouldn't be used to train the model
+
+        BaseRecord
+
+        common:
+            - Image ID: 4
+            - Filepath: sample_image.png
+            - Image: 640x640x3 <np.ndarray> Image
+            - Image size ImgSize(width=640, height=640)
+        color_saturation:
+            - Image: 640x640x3 <np.ndarray> Image
+            - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
+            - Labels: [1]
+        shot_composition:
+            - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
+            - Labels: [1]
+            - Image: 640x640x3 <np.ndarray> Image
+        detection:
+            - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
+            - Image: 640x640x3 <np.ndarray> Image
+            - Class Map: <ClassMap: {'background': 0, 'person': 1}>
+            - Labels: [1]
+        shot_framing:
+            - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
+            - Labels: [3]
+            - Image: 640x640x3 <np.ndarray> Image
     """
 
     def __init__(
diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index 150ca5903..907c35940 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -42,41 +42,11 @@ def build_multi_aug_batch(
         Take as inputs `records` and `classification_transform_groups` and return
         a tuple of dictionaries, one for detection data and the other for classification.
 
-        Each `record` is expected to have a specific structure. For example:
-
-            BaseRecord
-
-            common:
-                - Image ID: 4
-                - Filepath: sample_image.png
-                - Image: 640x640x3 <np.ndarray> Image
-                - Image size ImgSize(width=640, height=640)
-            color_saturation:
-                - Image: 640x640x3 <np.ndarray> Image
-                - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
-                - Labels: [1]
-            shot_composition:
-                - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
-                - Labels: [1]
-                - Image: 640x640x3 <np.ndarray> Image
-            detection:
-                - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
-                - Image: 640x640x3 <np.ndarray> Image
-                - Class Map: <ClassMap: {'background': 0, 'person': 1}>
-                - Labels: [1]
-            shot_framing:
-                - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
-                - Labels: [3]
-                - Image: 640x640x3 <np.ndarray> Image
-
-        `classification_transform_groups` describes how to group classification data. For example:
-            {
-                "group1": dict(tasks=["shot_composition"]),
-                "group2": dict(tasks=["color_saturation", "shot_framing"])
-            }
-
-
-        And return a nested data dictionary - (`detection_data`, `classification_data`) and
+        See `icevision.models.multitask.data.dataset.HybridAugmentationsRecordDataset`
+        for example of what `records` and `classification_transform_groups` look like
+
+    Returns:
+        A nested data dictionary - (`detection_data`, `classification_data`) and
         the loaded records
         {
             `detection_data`:
diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index 2aafa9a2e..4822e282f 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -78,38 +78,8 @@ def build_multi_aug_batch(
         Take as inputs `records` and `classification_transform_groups` and return
         a tuple of dictionaries, one for detection data and the other for classification.
 
-        Each `record` is expected to have a specific structure. For example:
-
-            BaseRecord
-
-            common:
-                - Image ID: 4
-                - Filepath: sample_image.png
-                - Image: 640x640x3 <np.ndarray> Image
-                - Image size ImgSize(width=640, height=640)
-            color_saturation:
-                - Image: 640x640x3 <np.ndarray> Image
-                - Class Map: <ClassMap: {'desaturated': 0, 'neutral': 1}>
-                - Labels: [1]
-            shot_composition:
-                - Class Map: <ClassMap: {'balanced': 0, 'center': 1}>
-                - Labels: [1]
-                - Image: 640x640x3 <np.ndarray> Image
-            detection:
-                - BBoxes: [<BBox (xmin:29, ymin:91, xmax:564, ymax:625)>]
-                - Image: 640x640x3 <np.ndarray> Image
-                - Class Map: <ClassMap: {'background': 0, 'person': 1}>
-                - Labels: [1]
-            shot_framing:
-                - Class Map: <ClassMap: {'01-wide': 0, '02-medium': 1, '03-closeup': 2}>
-                - Labels: [3]
-                - Image: 640x640x3 <np.ndarray> Image
-
-        `classification_transform_groups` describes how to group classification data. For example:
-            {
-                "group1": dict(tasks=["shot_composition"]),
-                "group2": dict(tasks=["color_saturation", "shot_framing"])
-            }
+        See `icevision.models.multitask.data.dataset.HybridAugmentationsRecordDataset`
+        for example of what `records` and `classification_transform_groups` look like
 
 
     Returns:
@@ -141,7 +111,7 @@ def build_multi_aug_batch(
                         )
                     )
                 }
-        2. Loaded records
+        2. Loaded records (same ones passed as inputs)
     """
     detection_images = []
     detection_targets = []

From eac9ed49652dba57a3c9534780fb8b2077914acc Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 08:51:31 +0530
Subject: [PATCH 031/122] move dataloading util to common module

---
 .../multitask/data/dataloading_utils.py       | 28 +++++++++++++++++++
 .../models/multitask/mmdet/dataloaders.py     | 20 ++-----------
 2 files changed, 31 insertions(+), 17 deletions(-)
 create mode 100644 icevision/models/multitask/data/dataloading_utils.py

diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
new file mode 100644
index 000000000..89f593bf8
--- /dev/null
+++ b/icevision/models/multitask/data/dataloading_utils.py
@@ -0,0 +1,28 @@
+"""
+This may be a temporary file that may eventually be removed,
+as it only slightly modifies an existing function.
+"""
+
+__all__ = ["unload_records"]
+
+
+from typing import Dict, Optional, Callable
+
+from numpy.lib.arraysetops import isin
+
+
+def unload_records(build_batch: Callable, build_batch_kwargs: Optional[Dict] = None):
+    """
+    This decorator function unloads records to not carry them around after batch creation
+      and will also accept any additional args required by the `build_batch`` function
+    """
+    build_batch_kwargs = build_batch_kwargs or {}
+    assert isinstance(build_batch_kwargs, dict)
+
+    def inner(records):
+        tupled_output, records = build_batch(records, **build_batch_kwargs)
+        for record in records:
+            record.unload()
+        return tupled_output, records
+
+    return inner
diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index 907c35940..204f2a92e 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -14,24 +14,10 @@
     _labels,
     _bboxes,
 )
+from icevision.models.multitask.data.dataloading_utils import *
 from collections import defaultdict
 
 
-def unload_records(build_batch, **build_batch_kwargs):
-    """
-    This decorator function unloads records to not carry them around after batch creation
-      and will also accept any additional args required by the `build_batch`` function
-    """
-
-    def inner(records):
-        tupled_output, records = build_batch(records, **build_batch_kwargs)
-        for record in records:
-            record.unload()
-        return tupled_output, records
-
-    return inner
-
-
 def build_multi_aug_batch(
     records: Sequence[RecordType], classification_transform_groups: dict
 ) -> Tuple[
@@ -109,10 +95,10 @@ def build_multi_aug_batch(
             if isinstance(comp, ClassificationLabelsRecordComponent):
                 if comp.is_multilabel:
                     labels = comp.one_hot_encoded()
-                    classification_targets[name].append(labels)
+                    classification_labels[name].append(labels)
                 else:
                     labels = comp.label_ids
-                    classification_targets[name].extend(labels)
+                    classification_labels[name].extend(labels)
 
     # Massage data
     for group in classification_data.values():

From 0817f0716825cfe1a0fb6d309516621e12954aad Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 08:52:54 +0530
Subject: [PATCH 032/122] add `addict` safe Dictionary

---
 icevision/utils/utils.py | 11 +++++------
 setup.cfg                |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/icevision/utils/utils.py b/icevision/utils/utils.py
index 65a97e4e9..7d426b582 100644
--- a/icevision/utils/utils.py
+++ b/icevision/utils/utils.py
@@ -17,12 +17,11 @@
     "denormalize_mask",
     "patch_class_to_main",
     "flatten",
-    # "Dictionary",
+    "Dictionary",
 ]
 
 from icevision.imports import *
-
-# from addict import Dict as _Dict
+from addict import Dict as _Dict
 
 
 def notnone(x):
@@ -127,6 +126,6 @@ def flatten(x: Any) -> List[Any]:
     return flattened_list
 
 
-# class Dictionary(_Dict):
-#     def __missing__(self, key):
-#         raise KeyError(key)
+class Dictionary(_Dict):
+    def __missing__(self, key):
+        raise KeyError(key)
diff --git a/setup.cfg b/setup.cfg
index 0edd535a5..6789f67e0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,6 +37,7 @@ install_requires =
     loguru >=0.5.3
     pillow > 8.0.0
     importlib-metadata>=1;python_version<"3.8"
+    addict
 
 [options.extras_require]
 all =

From 7f66f71fc87fd234ad41bf60246f4411c12d30e7 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 08:53:33 +0530
Subject: [PATCH 033/122] remove accidental import

---
 icevision/models/multitask/data/dataloading_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
index 89f593bf8..7f7d9246d 100644
--- a/icevision/models/multitask/data/dataloading_utils.py
+++ b/icevision/models/multitask/data/dataloading_utils.py
@@ -8,8 +8,6 @@
 
 from typing import Dict, Optional, Callable
 
-from numpy.lib.arraysetops import isin
-
 
 def unload_records(build_batch: Callable, build_batch_kwargs: Optional[Dict] = None):
     """

From d20ed4537c77eb3ff6ea988566c5d3de62ac7b8f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 09:01:55 +0530
Subject: [PATCH 034/122] doc, type anno

---
 .../multitask/data/dataloading_utils.py       | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
index 7f7d9246d..2bd2e4b60 100644
--- a/icevision/models/multitask/data/dataloading_utils.py
+++ b/icevision/models/multitask/data/dataloading_utils.py
@@ -6,13 +6,27 @@
 __all__ = ["unload_records"]
 
 
-from typing import Dict, Optional, Callable
+from icevision.core.record_type import RecordType
+from typing import Any, Dict, Optional, Callable, Sequence, Tuple
 
 
-def unload_records(build_batch: Callable, build_batch_kwargs: Optional[Dict] = None):
+def unload_records(
+    build_batch: Callable, build_batch_kwargs: Optional[Dict] = None
+) -> Tuple[Tuple[Any, ...], Sequence[RecordType]]:
     """
-    This decorator function unloads records to not carry them around after batch creation
-      and will also accept any additional args required by the `build_batch`` function
+    This decorator function unloads records to not carry them around after batch creation.
+      It also optionally accepts `build_batch_kwargs` that are to be passed into
+      `build_batch`. These aren't accepted as keyword arguments as those are reserved
+      for PyTorch's DataLoader class which is used later in this chain of function calls
+
+    Args:
+        build_batch (Callable): A collate function that describes how to mash records
+                                into a batch of inputs for a model
+        build_batch_kwargs (Optional[Dict], optional): Keyword arguments to pass into
+                                                       `build_batch`. Defaults to None.
+
+    Returns:
+        Tuple[Tuple[Any, ...], Sequence[RecordType]]: [description]
     """
     build_batch_kwargs = build_batch_kwargs or {}
     assert isinstance(build_batch_kwargs, dict)

From d40edae1e8802399ff855f6052b55c3cd62ea637 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 09:57:36 +0530
Subject: [PATCH 035/122] higher level dataloading functions

---
 .../ultralytics/yolov5/dataloaders.py         | 84 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index 4822e282f..4a3b4ea66 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -14,8 +14,23 @@
 from icevision.models.ultralytics.yolov5.dataloaders import (
     _build_train_sample as _build_train_detection_sample,
 )
-from torch import Tensor
+from icevision.models.ultralytics.yolov5.dataloaders import build_infer_batch, infer_dl
 from icevision.models.multitask.utils.dtypes import *
+from icevision.models.multitask.data.dataset import HybridAugmentationsRecordDataset
+from icevision.models.multitask.data.dataloading_utils import *
+from torch.utils.data import Dataset
+
+transform_dl()
+
+__all__ = [
+    "build_single_aug_batch",  # <- build_train_batch, build_valid_batch
+    "build_multi_aug_batch",  # <- build_train_batch
+    "build_infer_batch",
+    "train_dl",
+    "train_dl_multi_aug",
+    "valid_dl",
+    "infer_dl",
+]
 
 
 def build_single_aug_batch(
@@ -155,3 +170,70 @@ def build_multi_aug_batch(
     )
 
     return (detection_data, classification_data), records
+
+
+def train_dl(dataset: Dataset, batch_tfms=None, **dataloader_kwargs) -> DataLoader:
+    """
+    A `DataLoader` with a custom `collate_fn` that batches records as required for feeding a YOLO-V5 model.
+
+    Args:
+        dataset (Dataset): A `Dataset` that returns a transformed record upon indexing
+        batch_tfms: ...  # TODO
+        **dataloader_kwargs: Keyword arguments that will be internally passed to a Pytorch `DataLoader`.
+        The parameter `collate_fn` is already defined internally and cannot be passed here.
+
+    Returns:
+        DataLoader: A PyTorch `DataLoader`
+    """
+    return transform_dl(
+        dataset=dataset,
+        build_batch=build_single_aug_batch,
+        batch_tfms=batch_tfms,
+        **dataloader_kwargs,
+    )
+
+
+def valid_dl(dataset: Dataset, batch_tfms=None, **dataloader_kwargs) -> DataLoader:
+    """
+    A `DataLoader` with a custom `collate_fn` that batches items as required for validating the YOLO-V5 model.
+
+    Args:
+        dataset (Dataset): A `Dataset` that returns a transformed record upon indexing
+        batch_tfms: ...  # TODO
+        **dataloader_kwargs: Keyword arguments that will be internally passed to a Pytorch `DataLoader`.
+        The parameter `collate_fn` is already defined internally and cannot be passed here.
+
+    Returns:
+        DataLoader: A PyTorch `DataLoader`
+    """
+    return train_dl(dataset=dataset, batch_tfms=batch_tfms, **dataloader_kwargs)
+
+
+def train_dl_multi_aug(
+    dataset: HybridAugmentationsRecordDataset,
+    classification_transform_groups: dict,
+    **dataloader_kwargs,
+) -> DataLoader:
+    """
+    A `DataLoader` meant to work with `HybridAugmentationsRecordDataset`, a multitasking
+        dataset, where individual or groups of tasks receive their own unique transforms.
+        `batch_tfms` is not yet implemented for this DataLoader.
+
+    Args:
+        dataset (HybridAugmentationsRecordDataset): A custom dataset that groups tasks and returns
+        records where _each_ task has its own `img`
+
+        classification_transform_groups (dict): The exact same dictionary that is passed to
+        HybridAugmentationsRecordDataset`, describing how to group and transform classification tasks.
+        See the dataset's docs for more details.
+
+    Returns:
+        DataLoader: A PyTorch `DataLoader`
+    """
+    collate_fn = unload_records(
+        build_batch=build_multi_aug_batch,
+        build_batch_kwargs=dict(
+            classification_transform_groups=classification_transform_groups
+        ),
+    )
+    return DataLoader(dataset=dataset, collate_fn=collate_fn, **dataloader_kwargs)

From 2acc5169fdbed0f3232e890e9c38db56d9a37292 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 10:26:14 +0530
Subject: [PATCH 036/122] TODO val step

---
 .../yolov5/lightning/model_adapter.py         | 47 +++++++++++++++----
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 7b579ed2c..63281adef 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -45,6 +45,11 @@ def __init__(
     def post_init(self):
         pass
 
+    # ======================== TRAINING METHODS ======================== #
+
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)
+
     def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         batch, _ = batch
         if isinstance(batch[0], torch.Tensor):
@@ -52,8 +57,10 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
             step_type = ForwardType.TRAIN
 
         elif isinstance(batch[0], dict):
+            # TODO: Model method not yet implemented
             (detection_data, classification_data) = batch
             detection_targets = detection_data["targets"]
+            classification_targets = classification_data["targets"]
 
             step_type = ForwardType.TRAIN_MULTI_AUG
             raise RuntimeError
@@ -77,6 +84,36 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
 
         return detection_loss + total_classification_loss
 
+    def validation_step(self, batch, batch_idx):
+        batch, records = batch
+        (xb, detection_targets, classification_targets) = batch
+
+        with torch.no_grad():
+            inference_out, (detection_preds, classification_preds) = self(
+                xb, step_type=ForwardType.EVAL
+            )
+            # preds = convert_raw_predictions(...)
+            detection_loss = self.compute_loss(detection_preds, yb)[0]
+            classification_losses = {
+                head.compute_loss(
+                    predictions=classification_preds[name],
+                    targets=classification_targets[name],
+                )
+                for name, head in self.model.classifier_heads.items()
+            }
+            total_classification_loss = sum(classification_losses.values())
+
+        # self.accumulate_metrics(preds)
+        self.log_losses(
+            "valid", detection_loss, total_classification_loss, classification_losses
+        )
+
+    def validation_epoch_end(self, outs):
+        pass
+        # self.finalize_metrics()
+
+    # ======================== LOGGING METHODS ======================== #
+
     def log_losses(
         self,
         mode: str,
@@ -95,16 +132,6 @@ def log_losses(
         for k, v in log_vars.items():
             self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
 
-    def validation_step(self, batch, batch_idx):
-        raise NotImplementedError
-
-    # ======================== TRAINING METHODS ======================== #
-
-    def forward(self, *args, **kwargs):
-        return self.model(*args, **kwargs)
-
-    # ======================== LOGGING METHODS ======================== #
-
     def accumulate_metrics(self, preds):
         for metric in self.metrics:
             metric.accumulate(preds=preds)

From d73d41813a933bfcb872138971ad5327a8ef95b7 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 10:29:22 +0530
Subject: [PATCH 037/122] -___-

---
 icevision/models/multitask/ultralytics/yolov5/dataloaders.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index 4a3b4ea66..bae5a4a92 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -20,7 +20,6 @@
 from icevision.models.multitask.data.dataloading_utils import *
 from torch.utils.data import Dataset
 
-transform_dl()
 
 __all__ = [
     "build_single_aug_batch",  # <- build_train_batch, build_valid_batch

From 7bc3604462b9043a56a7e2814389246f29bd7077 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 10:45:00 +0530
Subject: [PATCH 038/122] add doc for high level `model` creator

---
 .../multitask/ultralytics/yolov5/model.py     | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/model.py b/icevision/models/multitask/ultralytics/yolov5/model.py
index fa5c7adb5..e8c9b360b 100644
--- a/icevision/models/multitask/ultralytics/yolov5/model.py
+++ b/icevision/models/multitask/ultralytics/yolov5/model.py
@@ -29,19 +29,33 @@
 
 def model(
     backbone: YoloV5BackboneConfig,
-    num_classes: int,
+    num_detection_classes: int,
     img_size: int,  # must be multiple of 32
     device: Optional[torch.device] = None,
     classifier_configs: Dict[str, ClassifierConfig] = None,
-) -> nn.Module:
+) -> HybridYOLOV5:
+    """
+    Build a `HybridYOLOV5` Multitask Model with detection & classification heads.
+
+    Args:
+        backbone (YoloV5BackboneConfig): Config from `icevision.models.ultralytics.yolov5.backbones.{}`
+        num_detection_classes (int): Number of object detection classes (including background)
+        img_size (int): Size of input images (assumes square inputs)
+        classifier_configs (Dict[str, ClassifierConfig], optional): A dictionary mapping of `ClassifierConfig`s
+        where each key corresponds to the name of the task in the input records. Defaults to None.
+
+    Returns:
+        HybridYOLOV5: A multitask YOLO-V5 model with one detection head and `len(classifier_configs)`
+        classification heads
+    """
     model_name = backbone.model_name
     pretrained = backbone.pretrained
 
     # this is to remove background from ClassMap as discussed
     # here: https://github.com/ultralytics/yolov5/issues/2950
     # and here: https://discord.com/channels/735877944085446747/782062040168267777/836692604224536646
-    # so we should pass `num_classes=parser.class_map.num_classes`
-    num_classes -= 1
+    # so we should pass `num_detection_classes=parser.class_map.num_detection_classes`
+    num_detection_classes -= 1
 
     device = (
         torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -65,7 +79,7 @@ def model(
         model = HybridYOLOV5(
             cfg_filepath or ckpt["model"].yaml,
             ch=3,
-            nc=num_classes,
+            nc=num_detection_classes,
             classifier_configs=classifier_configs,
         ).to(device)
         exclude = []  # exclude keys
@@ -81,7 +95,7 @@ def model(
         model = HybridYOLOV5(
             cfg_filepath,
             ch=3,
-            nc=num_classes,
+            nc=num_detection_classes,
             anchors=hyp.get("anchors"),
             classifier_configs=classifier_configs,
         ).to(device)
@@ -91,9 +105,9 @@ def model(
     imgsz = check_img_size(img_size, gs)  # verify imgsz are gs-multiples
 
     hyp["box"] *= 3.0 / nl  # scale to layers
-    hyp["cls"] *= num_classes / 80.0 * 3.0 / nl  # scale to classes and layers
+    hyp["cls"] *= num_detection_classes / 80.0 * 3.0 / nl  # scale to classes and layers
     hyp["obj"] *= (imgsz / 640) ** 2 * 3.0 / nl  # scale to image size and layers
-    model.nc = num_classes  # attach number of classes to model
+    model.nc = num_detection_classes  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
 

From 08bc1be375de057eaba1479232fedc19456f7ca8 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 11:12:45 +0530
Subject: [PATCH 039/122] fix bug where I forgot to enter the dict key, making
 the output a `set` object instead...

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 63281adef..c358b5dd1 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -70,7 +70,7 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
 
         # Iterate through each head and compute classification losses
         classification_losses = {
-            head.compute_loss(
+            name: head.compute_loss(
                 predictions=classification_preds[name],
                 targets=classification_targets[name],
             )
@@ -95,7 +95,7 @@ def validation_step(self, batch, batch_idx):
             # preds = convert_raw_predictions(...)
             detection_loss = self.compute_loss(detection_preds, yb)[0]
             classification_losses = {
-                head.compute_loss(
+                name: head.compute_loss(
                     predictions=classification_preds[name],
                     targets=classification_targets[name],
                 )

From 83f1768b6c7591c5cfcf16bd384311760f62caec Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 14:48:55 +0530
Subject: [PATCH 040/122] return same outputs regardless of mode; it's good to
 have consistent `forward` outputs, and use the other `forward_*` methods for
 other stuff

---
 icevision/models/multitask/classification_heads/head.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index a97e4be68..a875c5cd1 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -178,12 +178,8 @@ def forward(self, features: Union[Tensor, TensorDict, TensorList]):
             raise TypeError(
                 f"Expected TensorList|TensorDict|Tensor|tuple, got {type(features)}"
             )
-        if self.training:
-            # Return raw predictions in training mode
-            return self.classifier(pooled_features)
-        else:
-            # Return predictions -> sigmoid / softmax in eval mode
-            return self.postprocess(self.classifier(pooled_features))
+
+        return self.classifier(pooled_features)
 
     # TorchVision style API
     def compute_loss(self, predictions, targets):

From 3fb8a4b374eb68e209860d4a1e49d40f4719f716 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 14:51:25 +0530
Subject: [PATCH 041/122] correct tuple unpacking

---
 .../ultralytics/yolov5/lightning/model_adapter.py  | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index c358b5dd1..c6e1f02d6 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -39,7 +39,6 @@ def __init__(
             else:
                 metric = tm.Accuracy(threshold=0.01, top_k=1)
             self.classification_metrics[name] = metric
-            setattr(self, f"{name}_accuracy", metric)
         self.post_init()
 
     def post_init(self):
@@ -89,11 +88,18 @@ def validation_step(self, batch, batch_idx):
         (xb, detection_targets, classification_targets) = batch
 
         with torch.no_grad():
-            inference_out, (detection_preds, classification_preds) = self(
+            (inference_det_preds, training_det_preds), classification_preds = self(
                 xb, step_type=ForwardType.EVAL
             )
-            # preds = convert_raw_predictions(...)
-            detection_loss = self.compute_loss(detection_preds, yb)[0]
+
+            # Use head.postprocess(classificatio_preds) here
+            # classification_preds_postprocessed = {
+            #     name: head.postprocess(classification_preds[name])
+            #     for name, head in self.model.classifier_heads.items()
+            # }
+            # preds = convert_raw_predictions(inference_det_preds)
+
+            detection_loss = self.compute_loss(training_det_preds, detection_targets)[0]
             classification_losses = {
                 name: head.compute_loss(
                     predictions=classification_preds[name],

From 213de999f34030af7845e86eccd5952e454d626e Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 14:51:51 +0530
Subject: [PATCH 042/122] add `forward_export`, different `step_type`s for
 exporting

---
 .../ultralytics/yolov5/yolo_hybrid.py         | 54 +++++++++++++++++--
 icevision/models/multitask/utils/model.py     |  3 ++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 0b2376a13..f30fd3c42 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -22,6 +22,7 @@
     ImageClassificationHead,
     Passthrough,
 )
+from icevision.models.multitask.utils.dtypes import *
 from icevision.models.multitask.classification_heads.builder import (
     build_classifier_heads_from_configs,
 )
@@ -30,7 +31,7 @@
 # from .yolo import *
 from yolov5.models.yolo import *
 
-from typing import Dict, Optional, List, Tuple
+from typing import Dict, Optional, List, Tuple, Union
 from copy import deepcopy
 from loguru import logger
 
@@ -187,13 +188,25 @@ def build_classifier_heads(self):
         logger.success(f"Built classifier heads successfully")
 
     def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
-        if step_type is ForwardType.TRAIN:
+        if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
+            # Assume that model is set to `.eval()` mode before calling this function
             return self.forward_once(x=x, profile=profile)
+
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x=x, profile=profile)
-        elif step_type is ForwardType.EVAL:
+
+        elif step_type is ForwardType.EXPORT_COREML:
+            self.train()
+            self.classifier_heads.eval()
+            return self.forward_export(x=x)
+
+        elif (
+            step_type is ForwardType.EXPORT_ONNX
+            or step_type is ForwardType.EXPORT_TORCHSCRIPT
+        ):
             self.eval()
-            return self.forward_once(x=x, profile=False)
+            self.forward_export(x=x)
+
         else:
             raise RuntimeError(
                 f"Invalid `step_type`. Received: {type(step_type.__class__)}; Expected: {ForwardType.__class__}"
@@ -207,7 +220,38 @@ def forward_augment(self, x):
     def forward_multi_augment(self, x: Dict[str, Tensor]):
         raise NotImplementedError
 
-    def forward_once(self, x, profile=False) -> Tuple[Tensor, Dict[str, Tensor]]:
+    def forward_export(self, x: Tensor):
+        "No nonsense forward method for inference / when exporting the model"
+        y = []
+        classification_preds: Dict[str, Tensor] = {}
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = (
+                    y[m.f]
+                    if isinstance(m.f, int)
+                    else [x if j == -1 else y[j] for j in m.f]
+                )  # from earlier layers
+
+            if isinstance(m, Detect):
+                for name, head in self.classifier_heads.items():
+                    classification_preds[name] = head.forward_activate(x)
+
+            x = m(x)
+            y.append(x if m.i in self.save else None)  # save output
+
+        return x, classification_preds
+
+    def forward_once(
+        self, x, profile=False
+    ) -> Tuple[Union[TensorList, Tuple[Tensor, TensorList]], TensorDict]:
+        """
+        Returns:
+            A tuple of 2 elements:
+            1) A TensorList in training mode, and a Tuple[Tensor, TensorList] in
+            eval mode where the first element (Tensor) is the inference output and
+            second is the training output (for loss computation)
+            2) A TensorDict of classification predictions
+        """
         y, dt = [], []  # outputs
         classification_preds: Dict[str, Tensor] = {}
         for m in self.model:
diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
index 26700bb1c..d7188c252 100644
--- a/icevision/models/multitask/utils/model.py
+++ b/icevision/models/multitask/utils/model.py
@@ -7,3 +7,6 @@ class ForwardType(Enum):
     TRAIN_MULTI_AUG = 1
     TRAIN = 2
     EVAL = 3
+    EXPORT_ONNX = 4
+    EXPORT_TORCHSCRIPT = 5
+    EXPORT_COREML = 6

From af7cc0538ef61dcab0496a38ef6db4b28f19f5e1 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 14:54:17 +0530
Subject: [PATCH 043/122] WIP notebook - move to GPU and re-run.

---
 notebooks/multitask.ipynb | 344 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 344 insertions(+)
 create mode 100644 notebooks/multitask.ipynb

diff --git a/notebooks/multitask.ipynb b/notebooks/multitask.ipynb
new file mode 100644
index 000000000..d3bcc5245
--- /dev/null
+++ b/notebooks/multitask.ipynb
@@ -0,0 +1,344 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "married-network",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "colored-commercial",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from icevision.models.multitask.ultralytics.yolov5 import *\n",
+    "from icevision.data.data_splitter import *\n",
+    "from icevision.visualize import *\n",
+    "from icevision.imports import *\n",
+    "import icedata.datasets.exdark_trimmed as exdark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "mobile-enterprise",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc584f4264b2495aa1a62060ba044b31",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/4626 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m\u001b[1mINFO    \u001b[0m\u001b[1m\u001b[0m - \u001b[1m\u001b[34m\u001b[1mAutofixing records\u001b[0m\u001b[1m\u001b[34m\u001b[0m\u001b[1m\u001b[0m | \u001b[36micevision.parsers.parser\u001b[0m:\u001b[36mparse\u001b[0m:\u001b[36m136\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "29053f8f9d84454184833347ac40c72f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1850 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2178)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 428 to image height 427 (Before: <BBox (xmin:3, ymin:245, xmax:205, ymax:428)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 512)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:91, ymin:5, xmax:501, ymax:204)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 1237)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 536 to image height 528 (Before: <BBox (xmin:530, ymin:285, xmax:695, ymax:536)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2063)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 801 to image height 800 (Before: <BBox (xmin:399, ymin:657, xmax:758, ymax:801)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 323)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:441, ymin:95, xmax:501, ymax:132)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 199)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 481 to image height 480 (Before: <BBox (xmin:208, ymin:429, xmax:282, ymax:481)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 919)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 453 to image height 450 (Before: <BBox (xmin:240, ymin:146, xmax:452, ymax:453)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 361)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 428 to image height 427 (Before: <BBox (xmin:402, ymin:28, xmax:613, ymax:428)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 761)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 694 to image height 679 (Before: <BBox (xmin:771, ymin:546, xmax:930, ymax:694)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2048)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1028 to image width 1024 (Before: <BBox (xmin:517, ymin:418, xmax:1028, ymax:668)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2298)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1281 to image width 1280 (Before: <BBox (xmin:1035, ymin:466, xmax:1281, ymax:712)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 976)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:88, ymin:158, xmax:291, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 180)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 641 to image width 640 (Before: <BBox (xmin:563, ymin:357, xmax:641, ymax:400)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 896)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 775 to image height 774 (Before: <BBox (xmin:75, ymin:15, xmax:708, ymax:775)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2082)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmin from -1 to 0 (Before: <BBox (xmin:-1, ymin:177, xmax:104, ymax:276)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2138)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1030 to image width 1024 (Before: <BBox (xmin:834, ymin:186, xmax:1030, ymax:556)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 1102)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmin from -2 to 0 (Before: <BBox (xmin:-2, ymin:294, xmax:199, ymax:417)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "772188a3021642ee98e755a529a5f8fb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/463 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 391)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 641 to image width 640 (Before: <BBox (xmin:578, ymin:133, xmax:641, ymax:241)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2098)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:439, ymin:202, xmax:501, ymax:270)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2098)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:447, ymin:243, xmax:501, ymax:324)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 93)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:146, ymin:259, xmax:354, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 971)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:19, ymin:191, xmax:149, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
+      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2182)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:436, ymin:185, xmax:501, ymax:259)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "IMG_SIZE=384\n",
+    "# data_dir = exdark.load_data()\n",
+    "data_dir = Path(\"/Users/rahulsomani/datasets/ExDark-Trimmed/\")\n",
+    "parser = exdark.parser(data_dir)\n",
+    "train_ds, valid_ds = exdark.dataset(\n",
+    "    data_dir = data_dir,\n",
+    "    size = IMG_SIZE,\n",
+    "    data_splitter = RandomSplitter([0.8, 0.2])\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "aggregate-protocol",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl_train = train_dl(train_ds)\n",
+    "dl_valid = valid_dl(valid_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "textile-reproduction",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33m\u001b[1m\u001b[1mWARNING \u001b[0m\u001b[33m\u001b[1m\u001b[0m - \u001b[33m\u001b[1mIncompatible `num_fpn_features=512` detected in task 'lighting'. Replacing with the correct dimensions: 896\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m180\u001b[0m\n",
+      "\u001b[33m\u001b[1m\u001b[1mWARNING \u001b[0m\u001b[33m\u001b[1m\u001b[0m - \u001b[33m\u001b[1mIncompatible `num_fpn_features=512` detected in task 'location'. Replacing with the correct dimensions: 896\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m180\u001b[0m\n",
+      "\u001b[32m\u001b[1m\u001b[1mSUCCESS \u001b[0m\u001b[32m\u001b[1m\u001b[0m - \u001b[32m\u001b[1mBuilt classifier heads successfully\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m188\u001b[0m\n",
+      "\u001b[1m\u001b[1mINFO    \u001b[0m\u001b[1m\u001b[0m - \u001b[1mOverriding model.yaml nc=80 with nc=12\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m119\u001b[0m\n",
+      "\u001b[32m\u001b[1m\u001b[1mSUCCESS \u001b[0m\u001b[32m\u001b[1m\u001b[0m - \u001b[32m\u001b[1mBuilt *yolov5s* model successfully\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m150\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "hybrid_model = model(\n",
+    "    backbone=backbones.small(),\n",
+    "    num_detection_classes=len(parser.CLASS_MAPS['detection']),\n",
+    "    classifier_configs={\n",
+    "        name: ClassifierConfig(out_classes=len(cm))\n",
+    "        for name, cm in parser.CLASS_MAPS.items() if not name==\"detection\"\n",
+    "    },\n",
+    "    img_size=IMG_SIZE,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "noticed-variation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch import optim\n",
+    "import pytorch_lightning as pl\n",
+    "\n",
+    "class LightModel(lightning.HybridYOLOV5LightningAdapter):\n",
+    "    def configure_optimizers(self):\n",
+    "        return optim.Adam(self.parameters(), lr=3e-4)\n",
+    "\n",
+    "pl_model = LightModel(\n",
+    "    model=hybrid_model,\n",
+    "    metrics=None,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "reflected-section",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "GPU available: False, used: False\n",
+      "TPU available: False, using: 0 TPU cores\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<pytorch_lightning.trainer.trainer.Trainer at 0x7ff0abd0fc90>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer = pl.Trainer(max_epochs=3, gpus=[0])\n",
+    "trainer = pl.Trainer(\n",
+    "    max_epochs=3,\n",
+    "    # limit_train_batches=10,\n",
+    "    # limit_val_batches=2,\n",
+    ")\n",
+    "trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "foreign-munich",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.fit(pl_model, dl_train, dl_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "geological-immune",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "australian-fantasy",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "political-referral",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "suspended-angle",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "structured-plate",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "northern-parcel",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "indirect-savannah",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "det",
+   "language": "python",
+   "name": "det"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.9"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 00aec8856b88fdf1a4936d450180598e83356908 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 17:36:44 +0530
Subject: [PATCH 044/122] create common prediction utils for classification

---
 .../models/multitask/mmdet/prediction.py      |  31 +----
 .../models/multitask/utils/prediction.py      | 110 ++++++++++++++++++
 2 files changed, 113 insertions(+), 28 deletions(-)
 create mode 100644 icevision/models/multitask/utils/prediction.py

diff --git a/icevision/models/multitask/mmdet/prediction.py b/icevision/models/multitask/mmdet/prediction.py
index bc8968570..1f0daa108 100644
--- a/icevision/models/multitask/mmdet/prediction.py
+++ b/icevision/models/multitask/mmdet/prediction.py
@@ -23,33 +23,7 @@
 from icevision.models.mmdet.common.utils import *
 from icevision.models.mmdet.common.bbox.dataloaders import build_infer_batch
 from icevision.models.mmdet.common.utils import convert_background_from_last_to_zero
-
-
-def finalize_classifier_preds(pred, cfg: dict, record: RecordType, task: str) -> tuple:
-    """
-    Analyse preds post-activations based on `cfg` arguments; return the
-    relevant scores and string labels derived from `record`
-
-    Can compute the following:
-        * top-k (`cfg` defaults to 1 for single-label problems)
-        * filter preds by threshold
-    """
-
-    # pred = np.array(pred)
-    pred = pred.detach().cpu().numpy()
-
-    if cfg.topk is not None:
-        index = np.argsort(pred)[-cfg.topk :]  # argsort gives idxs in ascending order
-        value = pred[index]
-
-    elif cfg.thresh is not None:
-        index = np.where(pred > cfg.thresh)[0]  # index into the tuple
-        value = pred[index]
-
-    labels = [getattr(record, task).class_map._id2class[i] for i in index]
-    scores = pred[index].tolist()
-
-    return labels, scores
+from icevision.models.multitask.utils.prediction import finalize_classifier_preds
 
 
 @torch.no_grad()
@@ -214,6 +188,7 @@ def convert_raw_prediction(
         label_ids=keep_labels, class_map=record.detection.class_map
     )
 
+    # TODO: Refactor with functions from `...multitask.utils.prediction`
     pred = BaseRecord(
         [
             FilepathRecordComponent(),
@@ -236,7 +211,7 @@ def convert_raw_prediction(
     pred.detection.set_bboxes(keep_bboxes)
     pred.above_threshold = keep_mask
 
-    # TODO: Refactor classification loop into `common`
+    # TODO: Refactor with functions from `...multitask.utils.prediction`
     for task, classification_pred in raw_classification_pred.items():
         labels, scores = finalize_classifier_preds(
             pred=classification_pred,
diff --git a/icevision/models/multitask/utils/prediction.py b/icevision/models/multitask/utils/prediction.py
new file mode 100644
index 000000000..93f1bc465
--- /dev/null
+++ b/icevision/models/multitask/utils/prediction.py
@@ -0,0 +1,110 @@
+from icevision.imports import *
+from icevision.core import *
+from icevision.utils import Dictionary
+from icevision.models.multitask.classification_heads.head import (
+    ImageClassificationHead,
+    ClassifierConfig,
+    TensorDict,
+)
+from icevision.core.tasks import Task
+
+
+# __all__ = ["finalize_classifier_preds"]
+
+
+def finalize_classifier_preds(
+    pred, cfg: Dictionary, record: RecordType, task: str
+) -> tuple:
+    """
+    Analyse preds post-activations based on `cfg` arguments; return the
+    relevant scores and string labels derived from `record`
+
+    Can compute the following:
+        * top-k (`cfg` defaults to 1 for single-label problems)
+        * filter preds by threshold
+    """
+
+    # pred = np.array(pred)
+    pred = pred.detach().cpu().numpy()
+
+    if cfg.topk is not None:
+        index = np.argsort(pred)[-cfg.topk :]  # argsort gives idxs in ascending order
+        value = pred[index]
+
+    elif cfg.thresh is not None:
+        index = np.where(pred > cfg.thresh)[0]  # index into the tuple
+        value = pred[index]
+
+    labels = [getattr(record, task).class_map._id2class[i] for i in index]
+    scores = pred[index].tolist()
+
+    return labels, scores
+
+
+def extract_classifier_pred_cfgs(model: nn.Module):
+    return {
+        name: Dictionary(multilabel=head.multilabel, topk=head.topk, thresh=head.thresh)
+        for name, head in model.classifier_heads.items()
+    }
+
+
+def add_classification_components_to_pred_record(
+    pred_record: RecordType, classification_configs: dict
+):
+    """
+    Adds `ClassificationLabelsRecordComponent` and `ScoresRecordComponent` to `pred_record`
+    for each task; where the keys of `classification_configs` are the names of the tasks
+
+    Args:
+        pred_record (RecordType)
+        classification_configs (dict)
+
+    Returns:
+        [type]: [description]
+    """
+    r = pred_record
+    for name, cfg in classification_configs.items():
+        r.add_component(ScoresRecordComponent(Task(name=name)))
+        r.add_component(
+            ClassificationLabelsRecordComponent(
+                Task(name=name), is_multilabel=cfg.multilabel
+            )
+        )
+    return r
+
+
+def postprocess_and_add_classification_preds_to_record(
+    gt_record: RecordType,
+    pred_record: RecordType,
+    classification_configs: dict,
+    raw_classification_pred: TensorDict,
+):
+    """
+    Postprocesses predictions based on `classification_configs` and adds the results
+    to `pred_record`. Uses `gt_record` to set the `pred_record`'s class maps
+
+    Args:
+        gt_record (RecordType)
+        pred_record (RecordType)
+
+        classification_configs (dict): A dict that describes how to postprocess raw
+        classification preds. Note that the raw preds are assumed to have already gone
+        through an activation function like Softmax or Sigmoid. For example:
+            dict(
+                multilabel=False, topk=1, thresh=None
+            )
+
+        raw_classification_pred (TensorDict): Container whose preds will be processed. Is
+        expected to have the exact same keys as `classification_configs`
+    """
+    for task, classification_pred in raw_classification_pred.items():
+        labels, scores = finalize_classifier_preds(
+            pred=classification_pred,
+            cfg=classification_configs[task],
+            record=gt_record,
+            task=task,
+        )
+        # sub_record = getattr(pred_record, task)
+        getattr(pred_record, task).set_class_map(getattr(gt_record, task).class_map)
+        getattr(pred_record, task).set_labels(labels)
+        getattr(pred_record, task).set_scores(scores)

From 26b6b3034e4c00517fa21bff6b84fc6865cd1cd1 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 17:36:51 +0530
Subject: [PATCH 045/122] add `unroll_dict`

---
 icevision/utils/utils.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/icevision/utils/utils.py b/icevision/utils/utils.py
index 7d426b582..039c3ae3a 100644
--- a/icevision/utils/utils.py
+++ b/icevision/utils/utils.py
@@ -126,6 +126,36 @@ def flatten(x: Any) -> List[Any]:
     return flattened_list
 
 
+def unroll_dict(x: dict) -> List[dict]:
+    """
+    Unroll a dictionary into a list of dictionaries where the key is repeated.
+    Useful when you want to throw a dictionary into a for loop
+
+    Args:
+        x (dict)
+
+    Returns:
+        List[dict]
+
+    Example:
+        x = dict(
+            location=[[0.8, 0.2], [0.9, 0.1]],
+            lighting=[[0.6, 0.4], [0.2, 0.8]]
+        )
+        unroll_dict(x) == [
+            {"location": [0.8, 0.2], "lighting": [0.6, 0.4]},
+            {"location": [0.9, 0.1], "lighting": [0.2, 0.8]},
+        ]
+    """
+    return [dict(zip(x, t)) for t in zipsafe(*x.values())]
+
+
+[
+    {"location": [0.8, 0.2], "lighting": [0.6, 0.4]},
+    {"location": [0.9, 0.1], "lighting": [0.2, 0.8]},
+]
+
+
 class Dictionary(_Dict):
     def __missing__(self, key):
         raise KeyError(key)

From d092e98053234d4762a5470d46e6653762157a42 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 17:37:10 +0530
Subject: [PATCH 046/122] add yolov5 multitask raw predictions converter

---
 .../ultralytics/yolov5/prediction.py          | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/prediction.py b/icevision/models/multitask/ultralytics/yolov5/prediction.py
index e69de29bb..2787792d9 100644
--- a/icevision/models/multitask/ultralytics/yolov5/prediction.py
+++ b/icevision/models/multitask/ultralytics/yolov5/prediction.py
@@ -0,0 +1,52 @@
+"""
+Largely copied over from `icevision.models.ultralytics.yolov5.prectiction`, but with
+classification added
+"""
+
+from icevision.utils.utils import unroll_dict
+from icevision.imports import *
+from icevision.utils import *
+from icevision.core import *
+from icevision.data import *
+from icevision.models.utils import _predict_from_dl
+
+# from icevision.models.ultralytics.yolov5.dataloaders import *
+from icevision.models.ultralytics.yolov5.prediction import (
+    convert_raw_predictions as convert_raw_detection_predictions,
+)
+from icevision.models.multitask.utils.prediction import *
+
+
+def convert_raw_predictions(
+    batch,
+    raw_detection_preds: Tensor,
+    raw_classification_preds: TensorDict,
+    records: Sequence[BaseRecord],
+    classification_configs: dict,
+    detection_threshold: float = 0.4,
+    nms_iou_threshold: float = 0.6,
+    keep_images: bool = False,
+):
+    preds = convert_raw_detection_predictions(
+        batch=batch,
+        raw_preds=raw_detection_preds,
+        records=records,
+        detection_threshold=detection_threshold,
+        nms_iou_threshold=nms_iou_threshold,
+        keep_images=keep_images,
+    )
+    for pred, raw_classification_pred in zipsafe(
+        preds, unroll_dict(raw_classification_preds)
+    ):
+        add_classification_components_to_pred_record(
+            pred_record=pred.pred,
+            classification_configs=classification_configs,
+        )
+        postprocess_and_add_classification_preds_to_record(
+            gt_record=pred.ground_truth,
+            pred_record=pred.pred,
+            classification_configs=classification_configs,
+            raw_classification_pred=raw_classification_pred,
+        )
+
+    return preds

From b50309bb9d099b9368321f691989b4e54a465676 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 18:10:03 +0530
Subject: [PATCH 047/122] rename `forward_export` -> `forward_eval`; minor
 changes

---
 .../multitask/ultralytics/yolov5/yolo_hybrid.py     | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index f30fd3c42..54c34cb2f 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -188,24 +188,27 @@ def build_classifier_heads(self):
         logger.success(f"Built classifier heads successfully")
 
     def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
-        if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
-            # Assume that model is set to `.eval()` mode before calling this function
+        if step_type is ForwardType.TRAIN:
             return self.forward_once(x=x, profile=profile)
 
+        elif step_type is ForwardType.EVAL:
+            # Assume that model is set to `.eval()` mode before calling this function...?
+            return self.forward_eval(x)
+
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x=x, profile=profile)
 
         elif step_type is ForwardType.EXPORT_COREML:
             self.train()
             self.classifier_heads.eval()
-            return self.forward_export(x=x)
+            return self.forward_eval(x)
 
         elif (
             step_type is ForwardType.EXPORT_ONNX
             or step_type is ForwardType.EXPORT_TORCHSCRIPT
         ):
             self.eval()
-            self.forward_export(x=x)
+            self.forward_eval(x)
 
         else:
             raise RuntimeError(
@@ -220,7 +223,7 @@ def forward_augment(self, x):
     def forward_multi_augment(self, x: Dict[str, Tensor]):
         raise NotImplementedError
 
-    def forward_export(self, x: Tensor):
+    def forward_eval(self, x: Tensor):
         "No nonsense forward method for inference / when exporting the model"
         y = []
         classification_preds: Dict[str, Tensor] = {}

From 63385d8651eb0d0429899ef2ec8d49df31aabe8a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 18:10:20 +0530
Subject: [PATCH 048/122] add higher level pred funs to yolov5... will this
 work?

---
 .../ultralytics/yolov5/prediction.py          | 80 ++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/prediction.py b/icevision/models/multitask/ultralytics/yolov5/prediction.py
index 2787792d9..5f7068a68 100644
--- a/icevision/models/multitask/ultralytics/yolov5/prediction.py
+++ b/icevision/models/multitask/ultralytics/yolov5/prediction.py
@@ -3,6 +3,7 @@
 classification added
 """
 
+from icevision.models.multitask.utils.model import ForwardType
 from icevision.utils.utils import unroll_dict
 from icevision.imports import *
 from icevision.utils import *
@@ -10,13 +11,90 @@
 from icevision.data import *
 from icevision.models.utils import _predict_from_dl
 
-# from icevision.models.ultralytics.yolov5.dataloaders import *
+from icevision.models.multitask.ultralytics.yolov5.dataloaders import *
 from icevision.models.ultralytics.yolov5.prediction import (
     convert_raw_predictions as convert_raw_detection_predictions,
 )
 from icevision.models.multitask.utils.prediction import *
 
 
+@torch.no_grad()
+def _predict_batch(
+    model: nn.Module,
+    batch: Sequence[Tensor],
+    records: Sequence[BaseRecord],
+    detection_threshold: float = 0.25,
+    nms_iou_threshold: float = 0.45,
+    keep_images: bool = False,
+    device: Optional[torch.device] = None,
+) -> List[Prediction]:
+    # device issue addressed on discord: https://discord.com/channels/735877944085446747/770279401791160400/832361687855923250
+    if device is not None:
+        raise ValueError(
+            "For YOLOv5 device can only be specified during model creation, "
+            "for more info take a look at the discussion here: "
+            "https://discord.com/channels/735877944085446747/770279401791160400/832361687855923250"
+        )
+    grid = model.model[-1].grid[-1]
+    # if `grid.numel() == 1` it means the grid isn't initialized yet and we can't
+    # trust it's device (will always be CPU)
+    device = grid.device if grid.numel() > 1 else model_device(model)
+
+    batch = batch[0].to(device)
+    model = model.eval().to(device)
+
+    (det_preds, _), classif_preds = model(batch, step_type=ForwardType.EVAL)
+    classification_configs = extract_classifier_pred_cfgs(model)
+
+    return convert_raw_predictions(
+        batch=batch,
+        raw_detection_preds=det_preds,
+        raw_classification_preds=classif_preds,
+        records=records,
+        classification_configs=classification_configs,
+        detection_threshold=detection_threshold,
+        nms_iou_threshold=nms_iou_threshold,
+        keep_images=keep_images,
+    )
+
+
+def predict(
+    model: nn.Module,
+    dataset: Dataset,
+    detection_threshold: float = 0.25,
+    nms_iou_threshold: float = 0.45,
+    keep_images: bool = False,
+    device: Optional[torch.device] = None,
+) -> List[Prediction]:
+    batch, records = build_infer_batch(dataset)
+    return _predict_batch(
+        model=model,
+        batch=batch,
+        records=records,
+        detection_threshold=detection_threshold,
+        nms_iou_threshold=nms_iou_threshold,
+        keep_images=keep_images,
+        device=device,
+    )
+
+
+def predict_from_dl(
+    model: nn.Module,
+    infer_dl: DataLoader,
+    show_pbar: bool = True,
+    keep_images: bool = False,
+    **predict_kwargs,
+):
+    return _predict_from_dl(
+        predict_fn=_predict_batch,
+        model=model,
+        infer_dl=infer_dl,
+        show_pbar=show_pbar,
+        keep_images=keep_images,
+        **predict_kwargs,
+    )
+
+
 def convert_raw_predictions(
     batch,
     raw_detection_preds: Tensor,

From 648cfe75bda5147f6ae60561714577f281b0399f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 18:19:53 +0530
Subject: [PATCH 049/122] add validation code. lets gooo

---
 .../yolov5/lightning/model_adapter.py         | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index c6e1f02d6..2c7540dbc 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -2,6 +2,7 @@
 # NOTE `torchmetrics` comes installed with `pytorch-lightning`
 # We could in theory also do `pl.metrics`
 
+
 from icevision.models.multitask.classification_heads.head import TensorDict
 import torchmetrics as tm
 import pytorch_lightning as pl
@@ -12,6 +13,10 @@
 
 from loguru import logger
 from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import HybridYOLOV5
+from icevision.models.multitask.utils.prediction import *
+from icevision.models.multitask.ultralytics.yolov5.prediction import (
+    convert_raw_predictions,
+)
 from icevision.models.multitask.utils.model import ForwardType
 from yolov5.utils.loss import ComputeLoss
 
@@ -91,13 +96,15 @@ def validation_step(self, batch, batch_idx):
             (inference_det_preds, training_det_preds), classification_preds = self(
                 xb, step_type=ForwardType.EVAL
             )
-
-            # Use head.postprocess(classificatio_preds) here
-            # classification_preds_postprocessed = {
-            #     name: head.postprocess(classification_preds[name])
-            #     for name, head in self.model.classifier_heads.items()
-            # }
-            # preds = convert_raw_predictions(inference_det_preds)
+            preds = convert_raw_predictions(
+                batch=xb,
+                raw_detection_preds=inference_det_preds,
+                raw_classification_preds=inference_det_preds,
+                classification_configs=extract_classifier_pred_cfgs(self.model),
+                detection_threshold=0.001,
+                nms_iou_threshold=0.6,
+                keep_images=False,
+            )
 
             detection_loss = self.compute_loss(training_det_preds, detection_targets)[0]
             classification_losses = {
@@ -109,14 +116,13 @@ def validation_step(self, batch, batch_idx):
             }
             total_classification_loss = sum(classification_losses.values())
 
-        # self.accumulate_metrics(preds)
+        self.accumulate_metrics(preds)
         self.log_losses(
             "valid", detection_loss, total_classification_loss, classification_losses
         )
 
     def validation_epoch_end(self, outs):
-        pass
-        # self.finalize_metrics()
+        self.finalize_metrics()
 
     # ======================== LOGGING METHODS ======================== #
 
@@ -131,7 +137,7 @@ def log_losses(
             detection_loss=detection_loss,
             classification_total_loss=classification_total_loss,
             **{
-                f"classification_{name}": loss
+                f"classification_loss_{name}": loss
                 for name, loss in classification_losses.items()
             },
         )

From 4dafd9165db27a7d649749a5a2b74a03a0c38e8e Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 18:57:29 +0530
Subject: [PATCH 050/122] revert to common fwd method for train/eval mode; fix
 val loop

---
 .../yolov5/lightning/model_adapter.py         | 26 ++++++++++++-------
 .../ultralytics/yolov5/prediction.py          |  2 +-
 .../ultralytics/yolov5/yolo_hybrid.py         | 13 ++++------
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 2c7540dbc..86ad410e1 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -93,18 +93,10 @@ def validation_step(self, batch, batch_idx):
         (xb, detection_targets, classification_targets) = batch
 
         with torch.no_grad():
+            # Get bbox preds and unactivated classifier preds, ready to feed to loss funcs
             (inference_det_preds, training_det_preds), classification_preds = self(
                 xb, step_type=ForwardType.EVAL
             )
-            preds = convert_raw_predictions(
-                batch=xb,
-                raw_detection_preds=inference_det_preds,
-                raw_classification_preds=inference_det_preds,
-                classification_configs=extract_classifier_pred_cfgs(self.model),
-                detection_threshold=0.001,
-                nms_iou_threshold=0.6,
-                keep_images=False,
-            )
 
             detection_loss = self.compute_loss(training_det_preds, detection_targets)[0]
             classification_losses = {
@@ -116,6 +108,22 @@ def validation_step(self, batch, batch_idx):
             }
             total_classification_loss = sum(classification_losses.values())
 
+            # Run activation function on classification predictions
+            classification_preds = {
+                name: head.postprocess(classification_preds[name])
+                for name, head in self.model.classifier_heads.items()
+            }
+
+            preds = convert_raw_predictions(
+                batch=xb,
+                raw_detection_preds=inference_det_preds,
+                raw_classification_preds=classification_preds,
+                classification_configs=extract_classifier_pred_cfgs(self.model),
+                detection_threshold=0.001,
+                nms_iou_threshold=0.6,
+                keep_images=False,
+            )
+
         self.accumulate_metrics(preds)
         self.log_losses(
             "valid", detection_loss, total_classification_loss, classification_losses
diff --git a/icevision/models/multitask/ultralytics/yolov5/prediction.py b/icevision/models/multitask/ultralytics/yolov5/prediction.py
index 5f7068a68..6abb0b634 100644
--- a/icevision/models/multitask/ultralytics/yolov5/prediction.py
+++ b/icevision/models/multitask/ultralytics/yolov5/prediction.py
@@ -43,7 +43,7 @@ def _predict_batch(
     batch = batch[0].to(device)
     model = model.eval().to(device)
 
-    (det_preds, _), classif_preds = model(batch, step_type=ForwardType.EVAL)
+    (det_preds, _), classif_preds = model(batch, step_type=ForwardType.EXPORT_ONNX)
     classification_configs = extract_classifier_pred_cfgs(model)
 
     return convert_raw_predictions(
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 54c34cb2f..2de73b339 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -188,12 +188,9 @@ def build_classifier_heads(self):
         logger.success(f"Built classifier heads successfully")
 
     def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
-        if step_type is ForwardType.TRAIN:
-            return self.forward_once(x=x, profile=profile)
-
-        elif step_type is ForwardType.EVAL:
+        if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
             # Assume that model is set to `.eval()` mode before calling this function...?
-            return self.forward_eval(x)
+            return self.forward_once(x=x, profile=profile)
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x=x, profile=profile)
@@ -201,14 +198,14 @@ def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
         elif step_type is ForwardType.EXPORT_COREML:
             self.train()
             self.classifier_heads.eval()
-            return self.forward_eval(x)
+            return self.forward_inference(x)
 
         elif (
             step_type is ForwardType.EXPORT_ONNX
             or step_type is ForwardType.EXPORT_TORCHSCRIPT
         ):
             self.eval()
-            self.forward_eval(x)
+            self.forward_inference(x)
 
         else:
             raise RuntimeError(
@@ -223,7 +220,7 @@ def forward_augment(self, x):
     def forward_multi_augment(self, x: Dict[str, Tensor]):
         raise NotImplementedError
 
-    def forward_eval(self, x: Tensor):
+    def forward_inference(self, x: Tensor):
         "No nonsense forward method for inference / when exporting the model"
         y = []
         classification_preds: Dict[str, Tensor] = {}

From c4204ae9ab314597f6b3647d927a7f85476be81a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 19:06:40 +0530
Subject: [PATCH 051/122] add todos

---
 icevision/models/multitask/mmdet/pl_adapter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index f5c7d27cd..ff6d01b6d 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -39,6 +39,7 @@ def __init__(
         self.model = model
         self.debug = debug
 
+        # TODO: Convert to nn.ModuleDict
         self.classification_metrics = {}
         for name, head in model.classifier_heads.items():
             if head.multilabel:
@@ -121,6 +122,8 @@ def convert_raw_predictions(self, batch, raw_preds, records):
             classification_configs=classification_configs,
         )
 
+    # TODO rename to `compute_and_log_classification_metrics`
+    # TODO refactor with dict, zip
     def log_classification_metrics(
         self,
         classification_preds: Dict[str, Tensor],

From 6e0e99f58c0754b5d9b92b2e050d6f1496203326 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 19:06:45 +0530
Subject: [PATCH 052/122] add classification metrics

---
 .../yolov5/lightning/model_adapter.py         | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 86ad410e1..86b774b15 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -133,6 +133,35 @@ def validation_epoch_end(self, outs):
         self.finalize_metrics()
 
     # ======================== LOGGING METHODS ======================== #
+    def compute_and_log_classification_metrics(
+        self,
+        classification_preds: TensorDict,  # activated predictions
+        yb: TensorDict,
+        on_step: bool = False,
+        # prefix: str = "valid",
+    ):
+        # prefix = f"{prefix}/" if not prefix == "" else ""
+        prefix = "valid/"
+        for (name, metric), (_, preds) in zip(
+            self.classification_metrics.items(), classification_preds.items()
+        ):
+            self.log(
+                f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
+                metric(preds, yb.type(torch.int)),
+                on_step=on_step,
+                on_epoch=True,
+            )
+
+        for name in self.model.classifier_heads.keys():
+            # for name, metric in self.classification_metrics.items():
+            metric = getattr(self, f"{name}_accuracy")
+            self.log(
+                f"{prefix}{metric.__class__.__name__.lower()}__{name}",  # accuracy__shot_framing
+                # metric(classification_preds[name], yb_classif[name]),
+                metric(classification_preds[name], yb_classif[name].type(torch.int)),
+                on_step=on_step,
+                on_epoch=True,
+            )
 
     def log_losses(
         self,

From 71056ae1f7570288f0563dd2f6fe1b464dad460c Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 19:10:54 +0530
Subject: [PATCH 053/122] bugfix

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 86b774b15..0083d9845 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -116,6 +116,7 @@ def validation_step(self, batch, batch_idx):
 
             preds = convert_raw_predictions(
                 batch=xb,
+                records=records,
                 raw_detection_preds=inference_det_preds,
                 raw_classification_preds=classification_preds,
                 classification_configs=extract_classifier_pred_cfgs(self.model),

From 08895b3aa262357b68bbd10003e3ca9a5d12d78d Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 19:14:12 +0530
Subject: [PATCH 054/122] forgot to log metrics....

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 0083d9845..cdb7ae2f8 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -107,6 +107,10 @@ def validation_step(self, batch, batch_idx):
                 for name, head in self.model.classifier_heads.items()
             }
             total_classification_loss = sum(classification_losses.values())
+            self.compute_and_log_classification_metrics(
+                classification_preds=classification_preds,
+                yb=classification_targets,
+            )
 
             # Run activation function on classification predictions
             classification_preds = {

From 701766186617cf2cf26190fb28109ec4de1f7c98 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 12:14:00 -0400
Subject: [PATCH 055/122] bugfixxxeessss

---
 .../yolov5/lightning/model_adapter.py         | 21 +++++--------------
 .../ultralytics/yolov5/yolo_hybrid.py         |  2 +-
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index cdb7ae2f8..ca869ad8b 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -107,16 +107,16 @@ def validation_step(self, batch, batch_idx):
                 for name, head in self.model.classifier_heads.items()
             }
             total_classification_loss = sum(classification_losses.values())
-            self.compute_and_log_classification_metrics(
-                classification_preds=classification_preds,
-                yb=classification_targets,
-            )
 
             # Run activation function on classification predictions
             classification_preds = {
                 name: head.postprocess(classification_preds[name])
                 for name, head in self.model.classifier_heads.items()
             }
+            self.compute_and_log_classification_metrics(
+                classification_preds=classification_preds,
+                yb=classification_targets,
+            )
 
             preds = convert_raw_predictions(
                 batch=xb,
@@ -152,18 +152,7 @@ def compute_and_log_classification_metrics(
         ):
             self.log(
                 f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
-                metric(preds, yb.type(torch.int)),
-                on_step=on_step,
-                on_epoch=True,
-            )
-
-        for name in self.model.classifier_heads.keys():
-            # for name, metric in self.classification_metrics.items():
-            metric = getattr(self, f"{name}_accuracy")
-            self.log(
-                f"{prefix}{metric.__class__.__name__.lower()}__{name}",  # accuracy__shot_framing
-                # metric(classification_preds[name], yb_classif[name]),
-                metric(classification_preds[name], yb_classif[name].type(torch.int)),
+                metric(preds, yb[name].type(torch.int)),
                 on_step=on_step,
                 on_epoch=True,
             )
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 2de73b339..5c80318fd 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -205,7 +205,7 @@ def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
             or step_type is ForwardType.EXPORT_TORCHSCRIPT
         ):
             self.eval()
-            self.forward_inference(x)
+            return self.forward_inference(x)
 
         else:
             raise RuntimeError(

From 6726b85b74c3b3df144f8d076df438af22be1e8a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 12:14:16 -0400
Subject: [PATCH 056/122] successful training example with lightning

---
 notebooks/multitask.ipynb | 278 ++++++++++++++++----------------------
 1 file changed, 114 insertions(+), 164 deletions(-)

diff --git a/notebooks/multitask.ipynb b/notebooks/multitask.ipynb
index d3bcc5245..3ca4d0bf7 100644
--- a/notebooks/multitask.ipynb
+++ b/notebooks/multitask.ipynb
@@ -2,8 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "married-network",
+   "execution_count": null,
+   "id": "9aa70ed4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,114 +13,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "colored-commercial",
+   "execution_count": null,
+   "id": "45afd0ba",
    "metadata": {},
    "outputs": [],
    "source": [
+    "from icevision.imports import *\n",
     "from icevision.models.multitask.ultralytics.yolov5 import *\n",
     "from icevision.data.data_splitter import *\n",
     "from icevision.visualize import *\n",
-    "from icevision.imports import *\n",
+    "from icevision.metrics import *\n",
+    "\n",
     "import icedata.datasets.exdark_trimmed as exdark"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "mobile-enterprise",
+   "execution_count": null,
+   "id": "718faafa",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dc584f4264b2495aa1a62060ba044b31",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4626 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[1m\u001b[1mINFO    \u001b[0m\u001b[1m\u001b[0m - \u001b[1m\u001b[34m\u001b[1mAutofixing records\u001b[0m\u001b[1m\u001b[34m\u001b[0m\u001b[1m\u001b[0m | \u001b[36micevision.parsers.parser\u001b[0m:\u001b[36mparse\u001b[0m:\u001b[36m136\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "29053f8f9d84454184833347ac40c72f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1850 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2178)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 428 to image height 427 (Before: <BBox (xmin:3, ymin:245, xmax:205, ymax:428)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 512)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:91, ymin:5, xmax:501, ymax:204)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 1237)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 536 to image height 528 (Before: <BBox (xmin:530, ymin:285, xmax:695, ymax:536)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2063)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 801 to image height 800 (Before: <BBox (xmin:399, ymin:657, xmax:758, ymax:801)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 323)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:441, ymin:95, xmax:501, ymax:132)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 199)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 481 to image height 480 (Before: <BBox (xmin:208, ymin:429, xmax:282, ymax:481)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 919)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 453 to image height 450 (Before: <BBox (xmin:240, ymin:146, xmax:452, ymax:453)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 361)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 428 to image height 427 (Before: <BBox (xmin:402, ymin:28, xmax:613, ymax:428)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 761)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 694 to image height 679 (Before: <BBox (xmin:771, ymin:546, xmax:930, ymax:694)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2048)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1028 to image width 1024 (Before: <BBox (xmin:517, ymin:418, xmax:1028, ymax:668)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2298)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1281 to image width 1280 (Before: <BBox (xmin:1035, ymin:466, xmax:1281, ymax:712)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 976)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:88, ymin:158, xmax:291, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 180)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 641 to image width 640 (Before: <BBox (xmin:563, ymin:357, xmax:641, ymax:400)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 896)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 775 to image height 774 (Before: <BBox (xmin:75, ymin:15, xmax:708, ymax:775)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2082)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmin from -1 to 0 (Before: <BBox (xmin:-1, ymin:177, xmax:104, ymax:276)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2138)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 1030 to image width 1024 (Before: <BBox (xmin:834, ymin:186, xmax:1030, ymax:556)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 1102)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmin from -2 to 0 (Before: <BBox (xmin:-2, ymin:294, xmax:199, ymax:417)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "772188a3021642ee98e755a529a5f8fb",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/463 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 391)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 641 to image width 640 (Before: <BBox (xmin:578, ymin:133, xmax:641, ymax:241)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2098)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:439, ymin:202, xmax:501, ymax:270)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2098)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:447, ymin:243, xmax:501, ymax:324)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 93)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:146, ymin:259, xmax:354, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 971)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox ymax from 376 to image height 375 (Before: <BBox (xmin:19, ymin:191, xmax:149, ymax:376)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n",
-      "\u001b[32m\u001b[1mAUTOFIX-SUCCESS\u001b[0m\u001b[32m\u001b[0m - \u001b[32m\u001b[1m\u001b[31m(record_id: 2182)\u001b[0m\u001b[32m\u001b[1m\u001b[0m\u001b[32m - Clipping bbox xmax from 501 to image width 500 (Before: <BBox (xmin:436, ymin:185, xmax:501, ymax:259)>)\u001b[0m | \u001b[36micevision.utils.logger_utils\u001b[0m:\u001b[36mautofix_log\u001b[0m:\u001b[36m17\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "IMG_SIZE=384\n",
-    "# data_dir = exdark.load_data()\n",
-    "data_dir = Path(\"/Users/rahulsomani/datasets/ExDark-Trimmed/\")\n",
+    "IMG_SIZE=512\n",
+    "data_dir = exdark.load_data()\n",
     "parser = exdark.parser(data_dir)\n",
+    "\n",
+    "train_records, valid_records = parser.parse(data_splitter=RandomSplitter([0.8, 0.2]))\n",
+    "train_tfms = tfms.A.Adapter(\n",
+    "    [\n",
+    "        *tfms.A.aug_tfms(size=IMG_SIZE, lightning=None),\n",
+    "        tfms.A.Normalize(),\n",
+    "    ]\n",
+    ")\n",
+    "valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(IMG_SIZE), tfms.A.Normalize()])\n",
+    "\n",
+    "train_ds = Dataset(train_records, tfm=train_tfms)\n",
+    "valid_ds = Dataset(valid_records, tfm=valid_tfms)"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "eaf8842c",
+   "metadata": {},
+   "source": [
     "train_ds, valid_ds = exdark.dataset(\n",
     "    data_dir = data_dir,\n",
     "    size = IMG_SIZE,\n",
@@ -130,36 +65,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "aggregate-protocol",
+   "execution_count": null,
+   "id": "cdfcbe51",
    "metadata": {},
    "outputs": [],
    "source": [
-    "dl_train = train_dl(train_ds)\n",
-    "dl_valid = valid_dl(valid_ds)"
+    "dl_train = train_dl(train_ds, batch_size=32)\n",
+    "dl_valid = valid_dl(valid_ds, batch_size=64)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "textile-reproduction",
+   "execution_count": null,
+   "id": "8a9171f9",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33m\u001b[1m\u001b[1mWARNING \u001b[0m\u001b[33m\u001b[1m\u001b[0m - \u001b[33m\u001b[1mIncompatible `num_fpn_features=512` detected in task 'lighting'. Replacing with the correct dimensions: 896\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m180\u001b[0m\n",
-      "\u001b[33m\u001b[1m\u001b[1mWARNING \u001b[0m\u001b[33m\u001b[1m\u001b[0m - \u001b[33m\u001b[1mIncompatible `num_fpn_features=512` detected in task 'location'. Replacing with the correct dimensions: 896\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m180\u001b[0m\n",
-      "\u001b[32m\u001b[1m\u001b[1mSUCCESS \u001b[0m\u001b[32m\u001b[1m\u001b[0m - \u001b[32m\u001b[1mBuilt classifier heads successfully\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36mbuild_classifier_heads\u001b[0m:\u001b[36m188\u001b[0m\n",
-      "\u001b[1m\u001b[1mINFO    \u001b[0m\u001b[1m\u001b[0m - \u001b[1mOverriding model.yaml nc=80 with nc=12\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m119\u001b[0m\n",
-      "\u001b[32m\u001b[1m\u001b[1mSUCCESS \u001b[0m\u001b[32m\u001b[1m\u001b[0m - \u001b[32m\u001b[1mBuilt *yolov5s* model successfully\u001b[0m | \u001b[36micevision.models.multitask.ultralytics.yolov5.yolo_hybrid\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m150\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "hybrid_model = model(\n",
-    "    backbone=backbones.small(),\n",
+    "    backbone=backbones.large(pretrained=True),\n",
     "    num_detection_classes=len(parser.CLASS_MAPS['detection']),\n",
     "    classifier_configs={\n",
     "        name: ClassifierConfig(out_classes=len(cm))\n",
@@ -171,8 +94,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "noticed-variation",
+   "execution_count": null,
+   "id": "25f8d008",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -181,53 +104,29 @@
     "\n",
     "class LightModel(lightning.HybridYOLOV5LightningAdapter):\n",
     "    def configure_optimizers(self):\n",
-    "        return optim.Adam(self.parameters(), lr=3e-4)\n",
+    "        return optim.Adam(self.parameters(), lr=1e-4)\n",
     "\n",
     "pl_model = LightModel(\n",
     "    model=hybrid_model,\n",
-    "    metrics=None,\n",
+    "    metrics=[COCOMetric(metric_type=COCOMetricType.bbox)],\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "reflected-section",
+   "execution_count": null,
+   "id": "317995e3",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "GPU available: False, used: False\n",
-      "TPU available: False, using: 0 TPU cores\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<pytorch_lightning.trainer.trainer.Trainer at 0x7ff0abd0fc90>"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "trainer = pl.Trainer(max_epochs=3, gpus=[0])\n",
-    "trainer = pl.Trainer(\n",
-    "    max_epochs=3,\n",
-    "    # limit_train_batches=10,\n",
-    "    # limit_val_batches=2,\n",
-    ")\n",
+    "trainer = pl.Trainer(max_epochs=20, gpus=[0])\n",
     "trainer"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "foreign-munich",
+   "id": "eeb20f86",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,55 +136,106 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "geological-immune",
+   "id": "ad93fcdd",
    "metadata": {},
    "outputs": [],
    "source": []
   },
+  {
+   "cell_type": "markdown",
+   "id": "ce718acc",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "australian-fantasy",
+   "id": "c1d7c7e9",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "from icevision.models.multitask.ultralytics.yolov5.prediction import *"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "political-referral",
+   "id": "a5ff993a",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "valid_ds = Dataset(valid_records[:20], tfm=valid_tfms)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "suspended-angle",
+   "id": "4ed84e62",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "preds = predict(\n",
+    "    model=pl_model.model,\n",
+    "    dataset=valid_ds,\n",
+    "    detection_threshold=0.4,\n",
+    "    keep_images=True,\n",
+    ")"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "structured-plate",
+   "id": "0782f8f9",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "draw_sample = partial(draw_sample, denormalize_fn=denormalize_imagenet, return_as_pil_img=True)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "northern-parcel",
+   "id": "6f2a8ec7",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import fastcore.all as fastcore\n",
+    "import PIL\n",
+    "import PIL.Image\n",
+    "\n",
+    "@fastcore.patch\n",
+    "def __or__(self: PIL.Image.Image, other: PIL.Image.Image):\n",
+    "    \"Horizontally stack two PIL Images\"\n",
+    "    assert isinstance(other, PIL.Image.Image)\n",
+    "    widths, heights = zip(*(i.size for i in [self, other]))\n",
+    "\n",
+    "    new_img = PIL.Image.new(\"RGB\", (sum(widths), max(heights)))\n",
+    "    x_offset = 0\n",
+    "    for img in [self, other]:\n",
+    "        new_img.paste(img, (x_offset, 0))\n",
+    "        x_offset += img.size[0]\n",
+    "    return new_img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5f826796",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred = preds[19]\n",
+    "p, gt = pred.pred, pred.ground_truth\n",
+    "\n",
+    "draw_sample(gt) | draw_sample(p)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "indirect-savannah",
+   "id": "f7c50992",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -293,9 +243,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "det",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "det"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -307,7 +257,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.8.8"
   },
   "varInspector": {
    "cols": {

From 3f62a73a93c6f0dbd177abefa1bf11b27e4835d2 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 15 Jun 2021 22:06:24 +0530
Subject: [PATCH 057/122] minor polishing

---
 .../models/multitask/mmdet/pl_adapter.py      | 47 +++++++++----------
 .../models/multitask/mmdet/single_stage.py    | 29 +++++++-----
 2 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index ff6d01b6d..525a77bcb 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -7,8 +7,12 @@
 from icevision.all import *
 from mmcv.utils import ConfigDict
 from loguru import logger
-from icevision.models.multitask.mmdet.single_stage import ForwardType
+from icevision.models.multitask.mmdet.single_stage import (
+    ForwardType,
+    HybridSingleStageDetector,
+)
 from icevision.models.multitask.mmdet.prediction import *
+from icevision.models.multitask.utils.dtypes import *
 
 
 __all__ = ["HybridSingleStageDetectorLightningAdapter"]
@@ -30,7 +34,7 @@ class HybridSingleStageDetectorLightningAdapter(pl.LightningModule, ABC):
 
     def __init__(
         self,
-        model: nn.Module,
+        model: HybridSingleStageDetector,
         metrics: List[Metric] = None,
         debug: bool = False,
     ):
@@ -39,15 +43,14 @@ def __init__(
         self.model = model
         self.debug = debug
 
-        # TODO: Convert to nn.ModuleDict
-        self.classification_metrics = {}
+        self.classification_metrics = nn.ModuleDict()
         for name, head in model.classifier_heads.items():
             if head.multilabel:
                 thresh = head.thresh if head.thresh is not None else 0.5
                 metric = tm.Accuracy(threshold=thresh, subset_accuracy=True)
             else:
                 metric = tm.Accuracy(threshold=0.01, top_k=1)
-            setattr(self, f"{name}_accuracy", metric)
+            self.classification_metrics[name] = metric
         self.post_init()
 
     def post_init(self):
@@ -76,8 +79,6 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         # Log losses
         self._log_vars(outputs["log_vars"], "train")
 
-        # NOTE: outputs["loss"] is not scaled in distributed training... ?
-        # Maybe we should return `outputs["log_vars"]["loss"]` instead?
         return outputs["loss"]
 
     def validation_step(self, batch, batch_idx):
@@ -91,16 +92,16 @@ def validation_step(self, batch, batch_idx):
             # get losses
             outputs = self.model.train_step(data=data, step_type=ForwardType.TRAIN)
             raw_preds = self.model(data=data, forward_type=ForwardType.EVAL)
-            self.log_classification_metrics(
+            self.compute_and_log_classification_metrics(
                 classification_preds=raw_preds["classification_results"],
-                yb_classif=data["gt_classification_labels"],
+                yb=data["gt_classification_labels"],
             )
 
         preds = self.convert_raw_predictions(
             batch=data, raw_preds=raw_preds, records=records
         )
         self.accumulate_metrics(preds)
-        self._log_vars(outputs["log_vars"], "valid")
+        self.log_losses(outputs["log_vars"], "valid")
 
         # TODO: is train and eval model automatically set by lighnting?
         self.model.train()
@@ -122,28 +123,26 @@ def convert_raw_predictions(self, batch, raw_preds, records):
             classification_configs=classification_configs,
         )
 
-    # TODO rename to `compute_and_log_classification_metrics`
-    # TODO refactor with dict, zip
-    def log_classification_metrics(
+    def compute_and_log_classification_metrics(
         self,
-        classification_preds: Dict[str, Tensor],
-        yb_classif: Dict[str, Tensor],
+        classification_preds: TensorDict,  # activated predictions
+        yb: TensorDict,
         on_step: bool = False,
-        prefix: str = "valid",
+        # prefix: str = "valid",
     ):
-        prefix = f"{prefix}_" if not prefix == "" else ""
-        for name in self.model.classifier_heads.keys():
-            # for name, metric in self.classification_metrics.items():
-            metric = getattr(self, f"{name}_accuracy")
+        # prefix = f"{prefix}/" if not prefix == "" else ""
+        prefix = "valid/"
+        for (name, metric), (_, preds) in zip(
+            self.classification_metrics.items(), classification_preds.items()
+        ):
             self.log(
-                f"{prefix}{metric.__class__.__name__.lower()}__{name}",  # accuracy__shot_framing
-                # metric(classification_preds[name], yb_classif[name]),
-                metric(classification_preds[name], yb_classif[name].type(torch.int)),
+                f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
+                metric(preds, yb[name].type(torch.int)),
                 on_step=on_step,
                 on_epoch=True,
             )
 
-    def _log_vars(self, log_vars: dict, mode: str):
+    def log_losses(self, log_vars: dict, mode: str):
         for k, v in log_vars.items():
             self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
 
diff --git a/icevision/models/multitask/mmdet/single_stage.py b/icevision/models/multitask/mmdet/single_stage.py
index fd1c0f946..e496b4a15 100644
--- a/icevision/models/multitask/mmdet/single_stage.py
+++ b/icevision/models/multitask/mmdet/single_stage.py
@@ -14,7 +14,7 @@
 from mmdet.models.builder import build_backbone, build_detector, build_head, build_neck
 from mmdet.models.detectors.single_stage import SingleStageDetector
 from mmdet.core.bbox import *
-from typing import Union, List, Dict, Tuple
+from typing import Union, List, Dict, Tuple, Optional
 
 from icevision.models.multitask.mmdet.dataloaders import (
     TensorDict,
@@ -44,12 +44,12 @@ def __init__(
         backbone: Union[dict, ConfigDict],
         neck: Union[dict, ConfigDict],
         bbox_head: Union[dict, ConfigDict],
-        classification_heads: Optional[dict, ConfigDict] = None,
+        classification_heads: Union[None, dict, ConfigDict] = None,
         # keypoint_heads=None,  # TODO Someday SOON.
-        train_cfg: Optional[dict, ConfigDict] = None,
-        test_cfg: Optional[dict, ConfigDict] = None,
+        train_cfg: Union[None, dict, ConfigDict] = None,
+        test_cfg: Union[None, dict, ConfigDict] = None,
         pretrained=None,
-        init_cfg: Optional[dict, ConfigDict] = None,
+        init_cfg: Union[None, dict, ConfigDict] = None,
     ):
         super(HybridSingleStageDetector, self).__init__(
             # Use `init_cfg` post mmdet 2.12
@@ -83,7 +83,7 @@ def train_step(
                 * `log_vars` <TensorDict> : variables to be logged
                 * `num_samples` <int> : batch size per GPU when using DDP
         """
-        losses = self(data=data, forward_type=step_type)
+        losses = self(data=data, step_type=step_type)
         loss, log_vars = self._parse_losses(losses)
 
         outputs = dict(
@@ -96,21 +96,26 @@ def train_step(
         return outputs
 
     # @auto_fp16(apply_to=("img",))
-    def forward(self, data: dict, forward_type: ForwardType):
+    def forward(self, data: dict, step_type: ForwardType):
         """
         Calls either `self.forward_train`, `self.forward_eval` or
-        `self.forward_multi_aug_train` depending on the value of `forward_type`
+        `self.forward_multi_aug_train` depending on the value of `step_type`
 
         No TTA supported unlike all other mmdet models
         """
-        if forward_type.value == ForwardType.TRAIN_MULTI_AUG.value:
+        if step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_aug_train(data)
-        elif forward_type.value == ForwardType.TRAIN.value:
+
+        elif step_type is ForwardType.TRAIN:
             return self.forward_train(data, gt_bboxes_ignore=None)
-        elif forward_type.value == ForwardType.EVAL.value:
+
+        elif step_type is ForwardType.EVAL:
             return self.forward_eval(data, rescale=False)
+
         else:
-            raise ValueError(f"{type(ForwardType)}, {type(forward_type)}")
+            raise RuntimeError(
+                f"Invalid `step_type`. Received: {type(step_type.__class__)}; Expected: {ForwardType.__class__}"
+            )
 
     fwd_multi_aug_train_data_keys = ["detection", "classification"]
     fwd_train_data_keys = [

From 8eb9cff71a2c507464cd1e1e8d25af6a0522f4ab Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 11:24:01 +0530
Subject: [PATCH 058/122] add tensortuple dtype

---
 icevision/models/multitask/utils/dtypes.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/utils/dtypes.py b/icevision/models/multitask/utils/dtypes.py
index ecb25432d..da4012ec8 100644
--- a/icevision/models/multitask/utils/dtypes.py
+++ b/icevision/models/multitask/utils/dtypes.py
@@ -3,11 +3,19 @@
 import numpy as np
 import torch
 
-__all__ = ["ImgMetadataDict", "TensorList", "TensorDict", "ArrayList", "ArrayDict"]
+__all__ = [
+    "ImgMetadataDict",
+    "TensorList",
+    "TensorTuple",
+    "TensorDict",
+    "ArrayList",
+    "ArrayDict",
+]
 
 ImgMetadataDict = Dict[str, Union[Tuple[int], np.ndarray]]
 TensorList = List[Tensor]
 TensorDict = Dict[str, Tensor]
+TensorTuple = Tuple[Tensor]
 ArrayList = List[np.ndarray]
 ArrayDict = Dict[str, np.ndarray]
 

From 5d12378a539c242b74041e368457ede37b1532fb Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 11:25:53 +0530
Subject: [PATCH 059/122] * modularise `forward` to skip classif / detection
 specific parts of the forward pass

* simplify export forward method, return correct output types

* some type annos, docstrings
---
 .../ultralytics/yolov5/yolo_hybrid.py         | 76 +++++++++++++------
 1 file changed, 54 insertions(+), 22 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 5c80318fd..424e17dfe 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -89,7 +89,6 @@ class HybridYOLOV5(nn.Module):
     _print_biases = Model._print_biases
     autoshape = Model.autoshape
     info = Model.info
-    in_export_mode = False
 
     def __init__(
         self,
@@ -154,9 +153,6 @@ def __init__(
     def post_init(self):
         pass
 
-    def set_export_mode(self, mode: bool):
-        self.in_export_mode = mode
-
     def build_classifier_heads(self):
         """
         Description:
@@ -187,10 +183,23 @@ def build_classifier_heads(self):
         )
         logger.success(f"Built classifier heads successfully")
 
-    def forward(self, x, profile=False, step_type=ForwardType.TRAIN):
+    def forward(
+        self,
+        x: Tensor,
+        profile=False,
+        forward_detection: bool = True,
+        forward_classification: bool = True,
+        step_type=ForwardType.TRAIN,
+    ) -> Tuple[Union[Tensor, TensorList], TensorDict]:
+
         if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
             # Assume that model is set to `.eval()` mode before calling this function...?
-            return self.forward_once(x=x, profile=profile)
+            return self.forward_once(
+                x,
+                profile=profile,
+                forward_detection=forward_detection,
+                forward_classification=forward_classification,
+            )
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x=x, profile=profile)
@@ -217,11 +226,26 @@ def forward_augment(self, x):
         raise NotImplementedError
 
     # TODO: multi-task multi-augmentation training
-    def forward_multi_augment(self, x: Dict[str, Tensor]):
+    def forward_multi_augment(self, x: TensorDict) -> Tuple[TensorList, TensorDict]:
         raise NotImplementedError
 
-    def forward_inference(self, x: Tensor):
-        "No nonsense forward method for inference / when exporting the model"
+    def forward_inference(
+        self, x: Tensor
+    ) -> Tuple[Union[Tensor, TensorList], TensorTuple]:
+        """
+        No nonsense method for inference / exporting a model. Returns ONNX / CoreML /
+        TorchScript friendly outputs.
+
+        Args:
+            x (Tensor): Input (N,C,H,W) tensor
+
+        Returns:
+            Tuple[Union[Tensor, TensorList], TensorTuple]: A tuple of two elements -
+            `(detection_preds, classification_preds)`
+            1) `detection_preds`: A TensorList if in training mode, else a Tuple[Tensor, TensorList]
+            where the first element is the inference output and the second the training output
+            2) `classification_preds`: A TensorTuple of all the classification heads' predictions
+        """
         y = []
         classification_preds: Dict[str, Tensor] = {}
         for m in self.model:
@@ -239,18 +263,25 @@ def forward_inference(self, x: Tensor):
             x = m(x)
             y.append(x if m.i in self.save else None)  # save output
 
-        return x, classification_preds
+        return x, tuple(classification_preds.values())
 
     def forward_once(
-        self, x, profile=False
+        self,
+        x,
+        profile=False,  # Will fail
+        forward_detection: bool = True,
+        forward_classification: bool = True,
     ) -> Tuple[Union[TensorList, Tuple[Tensor, TensorList]], TensorDict]:
         """
         Returns:
-            A tuple of 2 elements:
+            A tuple of two elements `(detection_preds, classification_preds)`:
             1) A TensorList in training mode, and a Tuple[Tensor, TensorList] in
             eval mode where the first element (Tensor) is the inference output and
-            second is the training output (for loss computation)
-            2) A TensorDict of classification predictions
+            second is the training output (for loss computation). If `forward_detection` is
+            False, the list of FPN features are returned right before feeding into the bbox
+            head i.e. the `Detect` module which can be accessed via `self.model[-1]`
+            2) A TensorDict of classification predictions. If `forward_classification` is
+            False, an empty dictionary is returned
         """
         y, dt = [], []  # outputs
         classification_preds: Dict[str, Tensor] = {}
@@ -286,8 +317,14 @@ def forward_once(
               safe to do.
             """
             if isinstance(m, Detect):
-                for name, head in self.classifier_heads.items():
-                    classification_preds[name] = head(x)
+                if forward_classification:
+                    for name, head in self.classifier_heads.items():
+                        classification_preds[name] = head(x)
+
+                if not forward_detection:
+                    if profile:
+                        logger.info("%.1fms total" % sum(dt))
+                    return x, classification_preds
 
             x = m(x)  # run
             y.append(x if m.i in self.save else None)  # save output
@@ -295,9 +332,4 @@ def forward_once(
         if profile:
             logger.info("%.1fms total" % sum(dt))
 
-        # TODO: Replace with `torch.jit.is_scripting()` if that works for tracing too
-        if self.in_export_mode:
-            # Return tuple in export mode
-            return x, tuple(classification_preds.values())
-        else:
-            return x, classification_preds
+        return x, classification_preds

From a09f981ad4009b6f6b3c5ee4748ee80237d4c533 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 13:33:50 +0530
Subject: [PATCH 060/122] multi aug forward for yolov5

---
 .../yolov5/lightning/model_adapter.py         | 15 ++---
 .../ultralytics/yolov5/yolo_hybrid.py         | 62 +++++++++++++++++--
 2 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index ca869ad8b..623b29041 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -58,18 +58,19 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         batch, _ = batch
         if isinstance(batch[0], torch.Tensor):
             (xb, detection_targets, classification_targets) = batch
-            step_type = ForwardType.TRAIN
+            detection_preds, classification_preds = self(
+                xb, step_type=ForwardType.TRAIN
+            )
 
         elif isinstance(batch[0], dict):
             # TODO: Model method not yet implemented
-            (detection_data, classification_data) = batch
-            detection_targets = detection_data["targets"]
-            classification_targets = classification_data["targets"]
+            detection_targets = batch["detection"]["targets"]
+            classification_targets = batch["classification"]["targets"]
 
-            step_type = ForwardType.TRAIN_MULTI_AUG
-            raise RuntimeError
+            detection_preds, classification_preds = self(
+                batch, step_type=ForwardType.TRAIN_MULTI_AUG
+            )
 
-        detection_preds, classification_preds = self(xb, step_type=step_type)
         detection_loss = self.compute_loss(detection_preds, detection_targets)[0]
 
         # Iterate through each head and compute classification losses
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 424e17dfe..e164bffc6 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -185,7 +185,7 @@ def build_classifier_heads(self):
 
     def forward(
         self,
-        x: Tensor,
+        x: Union[Tensor, dict],
         profile=False,
         forward_detection: bool = True,
         forward_classification: bool = True,
@@ -202,7 +202,7 @@ def forward(
             )
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
-            return self.forward_multi_augment(x=x, profile=profile)
+            return self.forward_multi_augment(x)
 
         elif step_type is ForwardType.EXPORT_COREML:
             self.train()
@@ -225,9 +225,61 @@ def forward(
     def forward_augment(self, x):
         raise NotImplementedError
 
-    # TODO: multi-task multi-augmentation training
-    def forward_multi_augment(self, x: TensorDict) -> Tuple[TensorList, TensorDict]:
-        raise NotImplementedError
+    def forward_multi_augment(self, data: dict) -> Tuple[TensorList, TensorDict]:
+        """
+        Description:
+            Multi augmentation training where we do multiple forward passes over the
+            same batch, going through different parts of the network each time.
+
+            Detection and classification are treated separately, and within classification,
+            you can group together different tasks. A `group` has multiple `tasks`, so we
+            extract features once per group, then iterate over each head for that group's
+            `tasks`, and compute the outputs from these features
+
+        Args:
+            data (dict): Input container with the following structure:
+            ```python
+            xb = torch.zeros(1, 3, 224, 224)
+            multi_aug_data = dict(
+                detection={"images": xb},
+                classification={
+                    "group_1": dict(
+                        tasks=["framing", "saturation"],
+                        images=x,
+                    )
+                }
+            )
+            ```
+            Each group in data["classification"]'s `tasks` must correspond to
+            a key in `self.classifier_heads`
+
+        Raises:
+            RuntimeError: If model is not in `training` mode (as a safety check)
+
+        Returns:
+            Tuple[TensorList, TensorDict]: Tuple of `(detection_preds, classification_preds)`
+        """
+        if not self.training:
+            raise RuntimeError(f"Can only run `forward_multi_augment` in training mode")
+
+        # Detection forward pass
+        xb = data["detection"]["images"]
+        detection_preds, _ = self.forward_once(
+            xb, forward_detection=True, forward_classification=False
+        )
+
+        # Classification forward pass
+        classification_preds = {}
+        for group, data in data["classification"].items():
+            xb = data["images"]
+            features, _ = self.forward_once(
+                xb, forward_detection=False, forward_classification=False
+            )
+            for name in data["tasks"]:
+                head = self.classifier_heads[name]
+                classification_preds[name] = head(features)
+
+        return detection_preds, classification_preds
 
     def forward_inference(
         self, x: Tensor

From d06fa630bad7d22c4928c5c487a2aa1e8a432583 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 17:41:08 +0530
Subject: [PATCH 061/122] minor __repr__ bugfix

---
 icevision/models/multitask/data/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index 8fe0bb485..892971f06 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -224,4 +224,4 @@ def __getitem__(self, i):
         return record
 
     def __repr__(self):
-        return f"<{self.__class__.__name__} with {len(self.records)} items and {len(self.group_tfms)+1} groups>"
+        return f"<{self.__class__.__name__} with {len(self.records)} items and {len(self.classification_transforms_groups)+1} groups>"

From 01231eab77b363511e30b13fc85d846b5fad2ccd Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 17:41:41 +0530
Subject: [PATCH 062/122] properly unpack multi aug data

---
 .../yolov5/lightning/model_adapter.py         | 25 ++++++++++++-------
 .../ultralytics/yolov5/yolo_hybrid.py         |  6 ++---
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 623b29041..ef508f228 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -55,20 +55,27 @@ def forward(self, *args, **kwargs):
         return self.model(*args, **kwargs)
 
     def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
-        batch, _ = batch
-        if isinstance(batch[0], torch.Tensor):
-            (xb, detection_targets, classification_targets) = batch
+        # batch will ALWAYS return a tuple of 2 elements - batched inputs, records
+        tupled_inputs, _ = batch
+        if isinstance(tupled_inputs[0], torch.Tensor):
+            (xb, detection_targets, classification_targets) = tupled_inputs
             detection_preds, classification_preds = self(
                 xb, step_type=ForwardType.TRAIN
             )
 
-        elif isinstance(batch[0], dict):
+        elif isinstance(tupled_inputs[0], dict):
             # TODO: Model method not yet implemented
-            detection_targets = batch["detection"]["targets"]
-            classification_targets = batch["classification"]["targets"]
+            data = dict(detection=tupled_inputs[0], classification=tupled_inputs[1])
+            detection_targets = data["detection"]["targets"]
+
+            # Go through (a nested dict) each task inside each group and fetch targets
+            classification_targets = {}
+            for group, datum in data["classification"].items():
+                for task in datum["tasks"]:
+                    classification_targets[task] = datum["targets"]
 
             detection_preds, classification_preds = self(
-                batch, step_type=ForwardType.TRAIN_MULTI_AUG
+                data, step_type=ForwardType.TRAIN_MULTI_AUG
             )
 
         detection_loss = self.compute_loss(detection_preds, detection_targets)[0]
@@ -90,8 +97,8 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         return detection_loss + total_classification_loss
 
     def validation_step(self, batch, batch_idx):
-        batch, records = batch
-        (xb, detection_targets, classification_targets) = batch
+        tupled_inputs, records = batch
+        (xb, detection_targets, classification_targets) = tupled_inputs
 
         with torch.no_grad():
             # Get bbox preds and unactivated classifier preds, ready to feed to loss funcs
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index e164bffc6..55e90b945 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -270,12 +270,12 @@ def forward_multi_augment(self, data: dict) -> Tuple[TensorList, TensorDict]:
 
         # Classification forward pass
         classification_preds = {}
-        for group, data in data["classification"].items():
-            xb = data["images"]
+        for group, datum in data["classification"].items():
+            xb = datum["images"]
             features, _ = self.forward_once(
                 xb, forward_detection=False, forward_classification=False
             )
-            for name in data["tasks"]:
+            for name in datum["tasks"]:
                 head = self.classifier_heads[name]
                 classification_preds[name] = head(features)
 

From 472e128c684bd3a327d6322d44456057a5d75bd7 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 16 Jun 2021 17:47:03 +0530
Subject: [PATCH 063/122] update w/ multi aug example

---
 notebooks/multitask.ipynb | 182 ++++++++++++++++++++++++++++++++++----
 1 file changed, 167 insertions(+), 15 deletions(-)

diff --git a/notebooks/multitask.ipynb b/notebooks/multitask.ipynb
index 3ca4d0bf7..b587a47d8 100644
--- a/notebooks/multitask.ipynb
+++ b/notebooks/multitask.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9aa70ed4",
+   "id": "3a909b36",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "45afd0ba",
+   "id": "9bcfe126",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -27,15 +27,24 @@
     "import icedata.datasets.exdark_trimmed as exdark"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "ce2db1e1",
+   "metadata": {},
+   "source": [
+    "#### Regular Dataset"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "718faafa",
+   "id": "02713d8d",
    "metadata": {},
    "outputs": [],
    "source": [
     "IMG_SIZE=512\n",
     "data_dir = exdark.load_data()\n",
+    "data_dir = Path(\"/Users/rahulsomani/datasets/ExDark-Trimmed/\")\n",
     "parser = exdark.parser(data_dir)\n",
     "\n",
     "train_records, valid_records = parser.parse(data_splitter=RandomSplitter([0.8, 0.2]))\n",
@@ -52,37 +61,180 @@
    ]
   },
   {
-   "cell_type": "raw",
-   "id": "eaf8842c",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7bd020e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl_train = train_dl(train_ds, batch_size=32)\n",
+    "dl_valid = valid_dl(valid_ds, batch_size=64)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f577bcc",
+   "metadata": {},
+   "source": [
+    "#### Multi Augmentation Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6af5e084",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from icevision.models.multitask.data.dataset import HybridAugmentationsRecordDataset\n",
+    "import torchvision.transforms as Tfms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60d54941",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "detection_train_transforms = tfms.A.Adapter(\n",
+    "    [\n",
+    "        # tfms.A.Normalize(),  # NOTE: Normalizing happens inside the `Dataset` itself\n",
+    "        tfms.A.Resize(height=IMG_SIZE, width=IMG_SIZE),\n",
+    "        tfms.A.RandomSizedBBoxSafeCrop(\n",
+    "            width=IMG_SIZE, height=IMG_SIZE, erosion_rate=0.2\n",
+    "        ),\n",
+    "        # tfms.A.PadIfNeeded(IMG_HEIGHT, IMG_WIDTH, border_mode=cv2.BORDER_CONSTANT),\n",
+    "        tfms.A.ChannelDropout(p=0.05),\n",
+    "        tfms.A.HorizontalFlip(p=0.5),\n",
+    "        tfms.A.VerticalFlip(p=0.2),\n",
+    "        tfms.A.ColorJitter(p=0.3),  # This may destroy some information for lighting\n",
+    "        tfms.A.JpegCompression(p=0.1),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "valid_transforms = tfms.A.Adapter(\n",
+    "    [\n",
+    "        tfms.A.Normalize(),\n",
+    "        tfms.A.Resize(height=IMG_SIZE, width=IMG_SIZE),\n",
+    "    ]\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99c68ac0",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "train_ds, valid_ds = exdark.dataset(\n",
-    "    data_dir = data_dir,\n",
-    "    size = IMG_SIZE,\n",
-    "    data_splitter = RandomSplitter([0.8, 0.2])\n",
+    "classification_tfms = dict(\n",
+    "    group_1=dict(\n",
+    "        tasks=[\"lighting\"],\n",
+    "        transforms=Tfms.Compose(\n",
+    "            [\n",
+    "                Tfms.RandomPerspective(),\n",
+    "                Tfms.Resize((IMG_SIZE, IMG_SIZE)),\n",
+    "                Tfms.RandomHorizontalFlip(),\n",
+    "                Tfms.RandomVerticalFlip(),\n",
+    "                Tfms.RandomAffine(degrees=20),\n",
+    "                Tfms.RandomAutocontrast(),\n",
+    "            ]\n",
+    "        )\n",
+    "    ),\n",
+    "    group_2=dict(\n",
+    "        tasks=[\"location\"],\n",
+    "        transforms=Tfms.Compose(\n",
+    "            [\n",
+    "                Tfms.RandomPerspective(),\n",
+    "                Tfms.Resize((IMG_SIZE, IMG_SIZE)),\n",
+    "                Tfms.RandomHorizontalFlip(),\n",
+    "                Tfms.RandomVerticalFlip(),\n",
+    "                Tfms.RandomAffine(degrees=20),\n",
+    "                Tfms.RandomAutocontrast(),\n",
+    "                Tfms.RandomChoice(\n",
+    "                    [Tfms.ColorJitter(), Tfms.RandomGrayscale(), Tfms.RandomEqualize()]\n",
+    "                ),\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cdfcbe51",
+   "id": "8f64157d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "dl_train = train_dl(train_ds, batch_size=32)\n",
-    "dl_valid = valid_dl(valid_ds, batch_size=64)"
+    "train_ds = HybridAugmentationsRecordDataset(\n",
+    "    records=train_records,\n",
+    "    classification_transforms_groups=classification_tfms,\n",
+    "    detection_transforms=detection_train_transforms,\n",
+    ")\n",
+    "valid_ds = Dataset(valid_records, tfm=valid_transforms)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8257640d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2659baa6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "119791cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "valid_ds[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0064494",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl_train = train_dl_multi_aug(train_ds, classification_tfms, batch_size=8)\n",
+    "dl_valid = valid_dl(valid_ds, batch_size=8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3840e81e",
+   "metadata": {},
+   "source": [
+    "### Model"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8a9171f9",
+   "id": "cac65c58",
    "metadata": {},
    "outputs": [],
    "source": [
     "hybrid_model = model(\n",
-    "    backbone=backbones.large(pretrained=True),\n",
+    "    backbone=backbones.small(pretrained=True),\n",
+    "    # backbone=backbones.large(pretrained=True),\n",
     "    num_detection_classes=len(parser.CLASS_MAPS['detection']),\n",
     "    classifier_configs={\n",
     "        name: ClassifierConfig(out_classes=len(cm))\n",
@@ -257,7 +409,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.7.10"
   },
   "varInspector": {
    "cols": {

From 52e27b2c8ee0f3450229e79d3591d42bf0f0c6dc Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Thu, 17 Jun 2021 08:31:25 +0530
Subject: [PATCH 064/122] bugfix

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index ef508f228..158c08ca8 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -71,8 +71,7 @@ def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
             # Go through (a nested dict) each task inside each group and fetch targets
             classification_targets = {}
             for group, datum in data["classification"].items():
-                for task in datum["tasks"]:
-                    classification_targets[task] = datum["targets"]
+                classification_targets.update(datum["targets"])
 
             detection_preds, classification_preds = self(
                 data, step_type=ForwardType.TRAIN_MULTI_AUG

From 32dd165f76e944d646f8acbc32b7e463059106fe Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Thu, 17 Jun 2021 16:15:45 +0530
Subject: [PATCH 065/122] simplify forward method

---
 .../ultralytics/yolov5/yolo_hybrid.py         | 88 +++++++------------
 1 file changed, 32 insertions(+), 56 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 55e90b945..9906b1776 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -31,7 +31,7 @@
 # from .yolo import *
 from yolov5.models.yolo import *
 
-from typing import Dict, Optional, List, Tuple, Union
+from typing import Collection, Dict, Optional, List, Tuple, Union
 from copy import deepcopy
 from loguru import logger
 
@@ -124,6 +124,7 @@ def __init__(
         self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
         self.inplace = self.yaml.get("inplace", True)
         # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+        self.post_layers_init()
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
@@ -150,6 +151,13 @@ def __init__(
 
         self.post_init()
 
+    def post_layers_init(self):
+        """
+        Run before doing test forward passes for determining the `Detect` (bbox_head) hparams.
+        If you want to inject custom modules into the model, this is the place to do it
+        """
+        pass
+
     def post_init(self):
         pass
 
@@ -187,34 +195,33 @@ def forward(
         self,
         x: Union[Tensor, dict],
         profile=False,
-        forward_detection: bool = True,
-        forward_classification: bool = True,
+        # forward_detection: bool = True,
+        # forward_classification: bool = True,
+        # activate_classification: bool = False,
         step_type=ForwardType.TRAIN,
     ) -> Tuple[Union[Tensor, TensorList], TensorDict]:
 
         if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
             # Assume that model is set to `.eval()` mode before calling this function...?
-            return self.forward_once(
-                x,
-                profile=profile,
-                forward_detection=forward_detection,
-                forward_classification=forward_classification,
-            )
+            return self.forward_once(x, profile=profile)
+
+        elif step_type is ForwardType.INFERENCE:
+            return self.forward_once(x, activate_classification=True)
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x)
 
-        elif step_type is ForwardType.EXPORT_COREML:
-            self.train()
-            self.classifier_heads.eval()
-            return self.forward_inference(x)
+        # elif step_type is ForwardType.EXPORT_COREML:
+        #     self.train()
+        #     self.classifier_heads.eval()
+        #     return self.forward_inference(x)
 
-        elif (
-            step_type is ForwardType.EXPORT_ONNX
-            or step_type is ForwardType.EXPORT_TORCHSCRIPT
-        ):
-            self.eval()
-            return self.forward_inference(x)
+        # elif (
+        #     step_type is ForwardType.EXPORT_ONNX
+        #     or step_type is ForwardType.EXPORT_TORCHSCRIPT
+        # ):
+        #     self.eval()
+        #     return self.forward_inference(x)
 
         else:
             raise RuntimeError(
@@ -281,48 +288,13 @@ def forward_multi_augment(self, data: dict) -> Tuple[TensorList, TensorDict]:
 
         return detection_preds, classification_preds
 
-    def forward_inference(
-        self, x: Tensor
-    ) -> Tuple[Union[Tensor, TensorList], TensorTuple]:
-        """
-        No nonsense method for inference / exporting a model. Returns ONNX / CoreML /
-        TorchScript friendly outputs.
-
-        Args:
-            x (Tensor): Input (N,C,H,W) tensor
-
-        Returns:
-            Tuple[Union[Tensor, TensorList], TensorTuple]: A tuple of two elements -
-            `(detection_preds, classification_preds)`
-            1) `detection_preds`: A TensorList if in training mode, else a Tuple[Tensor, TensorList]
-            where the first element is the inference output and the second the training output
-            2) `classification_preds`: A TensorTuple of all the classification heads' predictions
-        """
-        y = []
-        classification_preds: Dict[str, Tensor] = {}
-        for m in self.model:
-            if m.f != -1:  # if not from previous layer
-                x = (
-                    y[m.f]
-                    if isinstance(m.f, int)
-                    else [x if j == -1 else y[j] for j in m.f]
-                )  # from earlier layers
-
-            if isinstance(m, Detect):
-                for name, head in self.classifier_heads.items():
-                    classification_preds[name] = head.forward_activate(x)
-
-            x = m(x)
-            y.append(x if m.i in self.save else None)  # save output
-
-        return x, tuple(classification_preds.values())
-
     def forward_once(
         self,
         x,
         profile=False,  # Will fail
         forward_detection: bool = True,
         forward_classification: bool = True,
+        activate_classification: bool = False,
     ) -> Tuple[Union[TensorList, Tuple[Tensor, TensorList]], TensorDict]:
         """
         Returns:
@@ -371,7 +343,11 @@ def forward_once(
             if isinstance(m, Detect):
                 if forward_classification:
                     for name, head in self.classifier_heads.items():
-                        classification_preds[name] = head(x)
+                        classification_preds[name] = (
+                            head.forward_activate(x)
+                            if activate_classification
+                            else head(x)
+                        )
 
                 if not forward_detection:
                     if profile:

From dcbe55c0ef5db9a777d874ca59f57a5ff59eec2d Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Thu, 17 Jun 2021 17:08:50 +0530
Subject: [PATCH 066/122] param freezing scheme

---
 .../multitask/ultralytics/yolov5/model.py     | 19 -----
 .../ultralytics/yolov5/yolo_hybrid.py         | 72 +++++++++++++++++++
 2 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/model.py b/icevision/models/multitask/ultralytics/yolov5/model.py
index e8c9b360b..a8d46b927 100644
--- a/icevision/models/multitask/ultralytics/yolov5/model.py
+++ b/icevision/models/multitask/ultralytics/yolov5/model.py
@@ -111,23 +111,4 @@ def model(
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
 
-    def param_groups_fn(model: nn.Module) -> List[List[nn.Parameter]]:
-        spp_index = [
-            i + 2
-            for i, layer in enumerate(model.model.children())
-            if layer._get_name() == "SPP"
-        ][0]
-        backbone = list(model.model.children())[:spp_index]
-        neck = list(model.model.children())[spp_index:-1]
-        head = list(model.model.children())[-1]
-
-        layers = [nn.Sequential(*backbone), nn.Sequential(*neck), nn.Sequential(head)]
-
-        param_groups = [list(group.parameters()) for group in layers]
-        check_all_model_params_in_groups2(model.model, param_groups)
-
-        return param_groups
-
-    model.param_groups = MethodType(param_groups_fn, model)
-
     return model
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 9906b1776..d3417ef26 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -10,6 +10,7 @@
 
 __all__ = ["HybridYOLOV5", "ClassifierConfig"]
 
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -17,6 +18,7 @@
 
 from pathlib import Path
 from torch import Tensor
+from torch.nn.parameter import Parameter
 from icevision.models.multitask.classification_heads.head import (
     ClassifierConfig,
     ImageClassificationHead,
@@ -27,6 +29,8 @@
     build_classifier_heads_from_configs,
 )
 from icevision.models.multitask.utils.model import ForwardType
+from icevision.utils.torch_utils import params, check_all_model_params_in_groups2
+from icevision.utils.utils import flatten
 
 # from .yolo import *
 from yolov5.models.yolo import *
@@ -191,6 +195,74 @@ def build_classifier_heads(self):
         )
         logger.success(f"Built classifier heads successfully")
 
+    def param_groups(self) -> List[List[Parameter]]:
+        param_groups = [
+            flatten(self._get_params_stem()),
+            flatten(self._get_params_backbone()),
+            flatten(self._get_params_neck()),
+            flatten(self._get_params_bbox_head()),
+            flatten(self._get_params_classifier_heads()),
+        ]
+        check_all_model_params_in_groups2(self, param_groups=param_groups)
+        return param_groups
+
+    def _get_params_stem(self) -> List[nn.Parameter]:
+        return params(self.model[0])
+
+    def _get_params_backbone(self) -> List[List[Parameter]]:
+        return [params(m) for m in self.model[1:10]]
+
+    def _get_params_neck(self) -> List[List[Parameter]]:
+        return [params(m) for m in self.model[10:][:-1]]
+
+    def _get_params_bbox_head(self) -> List[List[Parameter]]:
+        return params(self.model[-1])
+
+    def _get_params_classifier_heads(self) -> List[List[Parameter]]:
+        return [params(self.classifier_heads)]
+
+    def freeze(
+        self,
+        freeze_stem: bool = True,
+        freeze_bbone_blocks_until: int = 0,  # between 0-9
+        freeze_neck: bool = False,
+        freeze_bbox_head: bool = False,
+        freeze_classifier_heads: bool = False,
+    ):
+        """
+        Freeze selected parts of the network
+
+        Args:
+            freeze_stem (bool, optional): Freeze the first conv layer. Defaults to True.
+            freeze_bbone_blocks_until (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. Defaults to 0.
+            freeze_neck (bool, optional): Freeze the neck (FPN). Defaults to False.
+            freeze_bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
+            freeze_classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
+        """
+        if freeze_stem:
+            for p in flatten(self._get_params_stem()):
+                p.requires_grad = False
+
+        assert 1 <= freeze_bbone_blocks_until <= 9
+        for i, pg in enumerate(self._get_params_backbone(), start=1):
+            if i > freeze_bbone_blocks_until:
+                break
+            else:
+                for p in pg:
+                    p.requires_grad = False
+
+        if freeze_neck:
+            for p in flatten(self._get_params_neck()):
+                p.requires_grad = False
+
+        if freeze_bbox_head:
+            for p in flatten(self._get_params_bbox_head()):
+                p.requires_grad = False
+
+        if freeze_classifier_heads:
+            for p in flatten(self._get_params_classifier_heads()):
+                p.requires_grad = False
+
     def forward(
         self,
         x: Union[Tensor, dict],

From c9d310d3b13aa6495a4ebc582897ef9403725d23 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 10:28:45 +0530
Subject: [PATCH 067/122] simplify forward modes

---
 icevision/models/multitask/utils/model.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
index d7188c252..2e4fad13b 100644
--- a/icevision/models/multitask/utils/model.py
+++ b/icevision/models/multitask/utils/model.py
@@ -7,6 +7,7 @@ class ForwardType(Enum):
     TRAIN_MULTI_AUG = 1
     TRAIN = 2
     EVAL = 3
-    EXPORT_ONNX = 4
-    EXPORT_TORCHSCRIPT = 5
-    EXPORT_COREML = 6
+    INFERENCE = 4
+    # EXPORT_ONNX = 5
+    # EXPORT_TORCHSCRIPT = 6
+    # EXPORT_COREML = 7

From 620ee516e3596b964b57b9fff650b8d5ac89b2b8 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 10:29:59 +0530
Subject: [PATCH 068/122] add `extrace_features`; minor cleanup

---
 .../multitask/ultralytics/yolov5/yolo_hybrid.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index d3417ef26..50af449ea 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -243,7 +243,7 @@ def freeze(
             for p in flatten(self._get_params_stem()):
                 p.requires_grad = False
 
-        assert 1 <= freeze_bbone_blocks_until <= 9
+        assert 0 <= freeze_bbone_blocks_until <= 9, "Num blocks must be between 0-9"
         for i, pg in enumerate(self._get_params_backbone(), start=1):
             if i > freeze_bbone_blocks_until:
                 break
@@ -272,13 +272,21 @@ def forward(
         # activate_classification: bool = False,
         step_type=ForwardType.TRAIN,
     ) -> Tuple[Union[Tensor, TensorList], TensorDict]:
+        "Forward method that is dispatched based on `step_type`"
 
         if step_type is ForwardType.TRAIN or step_type is ForwardType.EVAL:
             # Assume that model is set to `.eval()` mode before calling this function...?
             return self.forward_once(x, profile=profile)
 
         elif step_type is ForwardType.INFERENCE:
-            return self.forward_once(x, activate_classification=True)
+            # You may export model in training mode?
+            if not self.training:
+                (det_out, _), clf_out = self.forward_once(
+                    x, activate_classification=True
+                )
+            if self.training:
+                det_out, clf_out = self.forward_once(x, activate_classification=True)
+            return det_out, tuple(clf_out.values())
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x)
@@ -304,6 +312,11 @@ def forward(
     def forward_augment(self, x):
         raise NotImplementedError
 
+    def extract_features(self, x: Tensor):
+        return self.forward_once(
+            x, forward_detection=False, forward_classification=False
+        )[0]
+
     def forward_multi_augment(self, data: dict) -> Tuple[TensorList, TensorDict]:
         """
         Description:

From 8033f517311b54f4cf4e9ed51f186234ddec5a96 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 10:30:18 +0530
Subject: [PATCH 069/122] super awkard test scaffolding

---
 tests/multitask/__init__.py                   |  0
 tests/multitask/ultralytics/__init__.py       |  0
 .../multitask/ultralytics/yolov5/__init__.py  |  0
 .../ultralytics/yolov5/test_yolo_hybrid.py    | 48 +++++++++++++++++++
 4 files changed, 48 insertions(+)
 create mode 100644 tests/multitask/__init__.py
 create mode 100644 tests/multitask/ultralytics/__init__.py
 create mode 100644 tests/multitask/ultralytics/yolov5/__init__.py
 create mode 100644 tests/multitask/ultralytics/yolov5/test_yolo_hybrid.py

diff --git a/tests/multitask/__init__.py b/tests/multitask/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/multitask/ultralytics/__init__.py b/tests/multitask/ultralytics/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/multitask/ultralytics/yolov5/__init__.py b/tests/multitask/ultralytics/yolov5/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/multitask/ultralytics/yolov5/test_yolo_hybrid.py b/tests/multitask/ultralytics/yolov5/test_yolo_hybrid.py
new file mode 100644
index 000000000..f4314cedf
--- /dev/null
+++ b/tests/multitask/ultralytics/yolov5/test_yolo_hybrid.py
@@ -0,0 +1,48 @@
+# from numpy.lib.arraysetops import isin
+# import pytest
+# from icevision.imports import *
+# from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import *
+# from icevision.models.multitask.utils import *
+
+
+# @pytest.fixture
+# def model():
+#     return HybridYOLOV5(
+#         cfg="models/yolov5m.yaml",
+#         classifier_configs=dict(
+#             framing=ClassifierConfig(out_classes=10, num_fpn_features=10),
+#             saturation=ClassifierConfig(out_classes=20, num_fpn_features=None),
+#         ),
+#     )
+
+
+# def x():
+#     return torch.rand(1, 3, 224, 224)
+
+
+# def test_forward(model, x):
+#     det_out, clf_out = model.forward_once(x)
+#     assert isinstance(det_out, TensorList)
+#     assert isinstance(clf_out, TensorDict)
+#     assert det_out[0].ndim == 5
+
+
+# def test_forward_eval(model, x):
+#     det_out, clf_out = model.forward_once(x)
+
+#     assert len(det_out == 2)
+#     assert isinstance(det_out[0], Tensor)
+#     assert isinstance(det_out[1], TensorList)
+
+
+# def test_feature_extraction(model, x):
+#     det_out, clf_out = model.forward_once(
+#         forward_detection=False, forward_classification=False
+#     )
+#     assert det_out[0].ndim == 3
+#     assert clf_out == {}
+
+
+# def test_fwd_inference(model, x):
+#     det_out, clf_out = model.forward_once(activate_classification=True)
+#     torch.allclose(clf_out["framing"].sum(), tensor(1.0))

From 3195385791c48deabe2c3106316e7e60f34f9ddc Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 11:29:08 +0530
Subject: [PATCH 070/122] rename `build_classifier_heads` ->
 `build_classification_modules`

---
 icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 50af449ea..440d7a43c 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -114,7 +114,7 @@ def __init__(
                 self.yaml = yaml.safe_load(f)  # model dict
 
         self.classifier_configs = classifier_configs
-        self.build_classifier_heads()
+        self.build_classification_modules()
 
         # Define model
         ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
@@ -165,7 +165,7 @@ def post_layers_init(self):
     def post_init(self):
         pass
 
-    def build_classifier_heads(self):
+    def build_classification_modules(self):
         """
         Description:
             Build classifier heads from `self.classifier_configs`.

From 3d820bae71694fc7ffb39bad285d320310ac4756 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 17:12:39 +0530
Subject: [PATCH 071/122] move classifiers init location; add verbose option

---
 .../ultralytics/yolov5/yolo_hybrid.py         | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 440d7a43c..08e541de1 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -113,9 +113,6 @@ def __init__(
             with open(cfg) as f:
                 self.yaml = yaml.safe_load(f)  # model dict
 
-        self.classifier_configs = classifier_configs
-        self.build_classification_modules()
-
         # Define model
         ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
         if nc and nc != self.yaml["nc"]:
@@ -128,6 +125,9 @@ def __init__(
         self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
         self.inplace = self.yaml.get("inplace", True)
         # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        self.classifier_configs = classifier_configs
+        self.build_classification_modules()
         self.post_layers_init()
 
         # Build strides, anchors
@@ -165,7 +165,7 @@ def post_layers_init(self):
     def post_init(self):
         pass
 
-    def build_classification_modules(self):
+    def build_classification_modules(self, verbose: bool = True):
         """
         Description:
             Build classifier heads from `self.classifier_configs`.
@@ -184,16 +184,18 @@ def build_classification_modules(self):
                 cfg.num_fpn_features = num_fpn_features
 
             elif cfg.num_fpn_features != num_fpn_features:
-                logger.warning(
-                    f"Incompatible `num_fpn_features={cfg.num_fpn_features}` detected in task '{task}'. "
-                    f"Replacing with the correct dimensions: {num_fpn_features}"
-                )
+                if verbose:
+                    logger.warning(
+                        f"Incompatible `num_fpn_features={cfg.num_fpn_features}` detected in task '{task}'. "
+                        f"Replacing with the correct dimensions: {num_fpn_features}"
+                    )
                 cfg.num_fpn_features = num_fpn_features
 
         self.classifier_heads = build_classifier_heads_from_configs(
             self.classifier_configs
         )
-        logger.success(f"Built classifier heads successfully")
+        if verbose:
+            logger.success(f"Built classifier heads successfully")
 
     def param_groups(self) -> List[List[Parameter]]:
         param_groups = [

From 899369470518b7dde5f6c7f9107669ac3ec0be1e Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 18 Jun 2021 17:34:10 +0530
Subject: [PATCH 072/122] modularise `forward_inference`

---
 .../ultralytics/yolov5/yolo_hybrid.py          | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
index 08e541de1..3a8915cf9 100644
--- a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
@@ -281,14 +281,7 @@ def forward(
             return self.forward_once(x, profile=profile)
 
         elif step_type is ForwardType.INFERENCE:
-            # You may export model in training mode?
-            if not self.training:
-                (det_out, _), clf_out = self.forward_once(
-                    x, activate_classification=True
-                )
-            if self.training:
-                det_out, clf_out = self.forward_once(x, activate_classification=True)
-            return det_out, tuple(clf_out.values())
+            return self.forward_inference(x)
 
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x)
@@ -310,6 +303,15 @@ def forward(
                 f"Invalid `step_type`. Received: {type(step_type.__class__)}; Expected: {ForwardType.__class__}"
             )
 
+    def forward_inference(self, x):
+        # You may export model in training mode?
+        if not self.training:
+            (det_out, _), clf_out = self.forward_once(x, activate_classification=True)
+        if self.training:
+            self.classifier_heads.eval()  # Turn off dropout
+            det_out, clf_out = self.forward_once(x, activate_classification=True)
+        return det_out, tuple(clf_out.values())
+
     # This is here for API compatibility with the main repo; will likely not be used
     def forward_augment(self, x):
         raise NotImplementedError

From 0d3e08ac9dc2a529c87b38bf8bdf55af47fa309a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 19 Jun 2021 12:49:26 +0530
Subject: [PATCH 073/122] make pooling inputs optional when not using fpn
 inputs

---
 .../multitask/classification_heads/head.py       | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index a875c5cd1..0cd352ca1 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -44,6 +44,7 @@ class ClassifierConfig:
     num_fpn_features: int = 512
     fpn_keys: Union[List[str], List[int], None] = None
     dropout: Optional[float] = 0.2
+    pool_inputs: bool = True
     # Loss function args
     loss_func: Optional[nn.Module] = None
     activation: Optional[nn.Module] = None
@@ -91,6 +92,7 @@ def __init__(
         num_fpn_features: int,
         fpn_keys: Union[List[str], List[int], None] = None,
         dropout: Optional[float] = 0.2,
+        pool_inputs: bool = True,  # ONLY for advanced use cases where input feature maps are already pooled
         # Loss function args
         loss_func: Optional[nn.Module] = None,
         activation: Optional[nn.Module] = None,
@@ -111,15 +113,19 @@ def __init__(
             loss_weight,
         )
         self.activation = activation
+        self.pool_inputs = pool_inputs
         self.thresh, self.topk = thresh, topk
 
         # Setup head
         self.fpn_keys = fpn_keys
-        self.classifier = nn.Sequential(
-            nn.Flatten(1),
+
+        layers = [
             nn.Dropout(dropout) if dropout else Passthrough(),
             nn.Linear(num_fpn_features, out_classes),
-        )
+        ]
+        layers.insert(0, nn.Flatten(1)) if self.pool_inputs else None
+        self.classifier = nn.Sequential(*layers)
+
         self.setup_loss_function()
         self.setup_postprocessing()
 
@@ -173,7 +179,9 @@ def forward(self, features: Union[Tensor, TensorDict, TensorList]):
         # If doing regular (non-FPN) feature extraction, we don't need `fpn_keys` and
         # just avg. pool the last layer's features
         elif isinstance(features, Tensor):
-            pooled_features = F.adaptive_avg_pool2d(features, 1)
+            pooled_features = (
+                F.adaptive_avg_pool2d(features, 1) if self.pool_inputs else features
+            )
         else:
             raise TypeError(
                 f"Expected TensorList|TensorDict|Tensor|tuple, got {type(features)}"

From 054a1cc4b6bfa013b0a64ba93c646651be24d6de Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 20 Jun 2021 18:45:12 +0530
Subject: [PATCH 074/122] move to `arch` folder

---
 icevision/models/multitask/ultralytics/yolov5/arch/__init__.py  | 0
 .../multitask/ultralytics/yolov5/{ => arch}/yolo_hybrid.py      | 0
 icevision/models/multitask/ultralytics/yolov5/model.py          | 2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/arch/__init__.py
 rename icevision/models/multitask/ultralytics/yolov5/{ => arch}/yolo_hybrid.py (100%)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/__init__.py b/icevision/models/multitask/ultralytics/yolov5/arch/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
similarity index 100%
rename from icevision/models/multitask/ultralytics/yolov5/yolo_hybrid.py
rename to icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
diff --git a/icevision/models/multitask/ultralytics/yolov5/model.py b/icevision/models/multitask/ultralytics/yolov5/model.py
index a8d46b927..9e923ad17 100644
--- a/icevision/models/multitask/ultralytics/yolov5/model.py
+++ b/icevision/models/multitask/ultralytics/yolov5/model.py
@@ -20,7 +20,7 @@
 from icevision.models.multitask.ultralytics.yolov5.utils import *
 from icevision.models.ultralytics.yolov5.backbones import *
 
-from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import HybridYOLOV5
+from icevision.models.multitask.ultralytics.yolov5.arch.yolo_hybrid import HybridYOLOV5
 from icevision.models.multitask.classification_heads import ClassifierConfig
 
 yolo_dir = get_root_dir() / "yolo"

From 462f85b0e09ef47643283c4ed16e8a10e007deeb Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 20 Jun 2021 19:26:16 +0530
Subject: [PATCH 075/122] add unfreezing; modularise freezing, param groups as
 pseudo mixins - I call them "Extensions"

---
 .../ultralytics/yolov5/arch/model_freezing.py | 113 ++++++++++++++++++
 .../ultralytics/yolov5/arch/param_groups.py   |  43 +++++++
 .../ultralytics/yolov5/arch/yolo_hybrid.py    |  90 ++------------
 3 files changed, 163 insertions(+), 83 deletions(-)
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
 create mode 100644 icevision/models/multitask/ultralytics/yolov5/arch/param_groups.py

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
new file mode 100644
index 000000000..b1e98d372
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -0,0 +1,113 @@
+import torch
+import torch.nn as nn
+
+from torch import Tensor
+from torch.nn import Parameter
+
+from typing import Union, List
+from icevision.utils.torch_utils import params
+from icevision.utils.utils import flatten
+from loguru import logger
+
+logger = logger.opt(colors=True)
+
+__all__ = ["FreezingInterfaceExtension"]
+
+
+class FreezingInterfaceExtension:
+    """
+    Model freezing and unfreezing extensions for `HybridYOLOV5`
+    """
+
+    def _get_params_stem(self) -> List[nn.Parameter]:
+        return params(self.model[0])
+
+    def _get_params_backbone(self) -> List[List[Parameter]]:
+        return [params(m) for m in self.model[1:10]]
+
+    def _get_params_neck(self) -> List[List[Parameter]]:
+        return [params(m) for m in self.model[10:][:-1]]
+
+    def _get_params_bbox_head(self) -> List[List[Parameter]]:
+        return params(self.model[-1])
+
+    def _get_params_classifier_heads(self) -> List[List[Parameter]]:
+        return [params(self.classifier_heads)]
+
+    def freeze(
+        self,
+        freeze_stem: bool = True,
+        freeze_bbone_blocks_until: int = 0,  # between 0-9
+        freeze_neck: bool = False,
+        freeze_bbox_head: bool = False,
+        freeze_classifier_heads: bool = False,
+        _grad: bool = False,  # Don't modify.
+    ):
+        """
+        Freeze selected parts of the network
+
+        Args:
+            freeze_stem (bool, optional): Freeze the first conv layer. Defaults to True.
+            freeze_bbone_blocks_until (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. Defaults to 0.
+            freeze_neck (bool, optional): Freeze the neck (FPN). Defaults to False.
+            freeze_bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
+            freeze_classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
+        """
+        if freeze_stem:
+            for p in flatten(self._get_params_stem()):
+                p.requires_grad = _grad
+
+        assert 0 <= freeze_bbone_blocks_until <= 9, "Num blocks must be between 0-9"
+        for i, pg in enumerate(self._get_params_backbone(), start=1):
+            if i > freeze_bbone_blocks_until:
+                break
+            else:
+                for p in pg:
+                    p.requires_grad = _grad
+
+        if freeze_neck:
+            for p in flatten(self._get_params_neck()):
+                p.requires_grad = _grad
+
+        if freeze_bbox_head:
+            for p in flatten(self._get_params_bbox_head()):
+                p.requires_grad = _grad
+
+        if freeze_classifier_heads:
+            for p in flatten(self._get_params_classifier_heads()):
+                p.requires_grad = _grad
+
+    def unfreeze(
+        self,
+        freeze_stem: bool = True,
+        freeze_bbone_blocks_until: int = 0,  # between 0-9
+        freeze_neck: bool = False,
+        freeze_bbox_head: bool = False,
+        freeze_classifier_heads: bool = False,
+    ):
+        self.freeze(
+            freeze_stem=freeze_stem,
+            freeze_bbone_blocks_until=freeze_bbone_blocks_until,
+            freeze_neck=freeze_neck,
+            freeze_bbox_head=freeze_bbox_head,
+            freeze_classifier_heads=freeze_classifier_heads,
+            _grad=True,
+        )
+
+    def freeze_specific_classifier_heads(
+        self, names: Union[str, List[str], None] = None, _grad: bool = False
+    ):
+        "Freeze all, one or a few classifier heads"
+        if isinstance(names, str):
+            names = []
+        if names is None:
+            names = list(self.classifier_heads.keys())
+
+        for name in names:
+            for p in flatten(params(self.classifier_heads[name])):
+                p.requires_grad = _grad
+
+    def unfreeze_specific_classifier_heads(
+        self, names: Union[str, List[str], None] = None
+    ):
+        self.freeze_specific_classifier_heads(names=names, _grad=True)
diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/param_groups.py b/icevision/models/multitask/ultralytics/yolov5/arch/param_groups.py
new file mode 100644
index 000000000..1845c441f
--- /dev/null
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/param_groups.py
@@ -0,0 +1,43 @@
+"""
+This file defines how to get parameter groups from the `HybridYOLOV5`
+model. It is expected to be used along with the other classes in this
+submodule, but is defined in a distinct file for easier referencing
+and if one wanted to define a custom param_groups functions
+"""
+
+from typing import List
+from torch.nn import Parameter
+from icevision.utils.utils import flatten
+from icevision.utils.torch_utils import check_all_model_params_in_groups2
+
+__all__ = ["ParamGroupsExtension"]
+
+
+class ParamGroupsExtension:
+    """
+    Splits the model into distinct parameter groups to pass differential
+    learning rates to. Given the structure of the model, you must note
+    that the param groups are not returned sequentially. The last returned
+    group is the classifier heads, and the second last is bbox head, and you
+    may want to apply the same LR to both. The `lr=slice(1e-3)` syntax will not
+    work for that and you'd have to manually pass in a sequence of
+    `len(param_groups)` (5) learning rates instead
+
+    Param Groups:
+        1. Stem - The first conv layer
+        2. Backbone - Layers 1:10
+        3. Neck - The FPN layers i.e. layers 10:23 (24?)
+        4. BBox Head - The `Detect` module, which is the last layer in `self.model`
+        5. Classifier Heads
+    """
+
+    def param_groups(self) -> List[List[Parameter]]:
+        param_groups = [
+            flatten(self._get_params_stem()),
+            flatten(self._get_params_backbone()),
+            flatten(self._get_params_neck()),
+            flatten(self._get_params_bbox_head()),
+            flatten(self._get_params_classifier_heads()),
+        ]
+        check_all_model_params_in_groups2(self, param_groups=param_groups)
+        return param_groups
diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 3a8915cf9..6e155dd46 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -29,8 +29,8 @@
     build_classifier_heads_from_configs,
 )
 from icevision.models.multitask.utils.model import ForwardType
-from icevision.utils.torch_utils import params, check_all_model_params_in_groups2
-from icevision.utils.utils import flatten
+from icevision.models.multitask.ultralytics.yolov5.arch.model_freezing import *
+from icevision.models.multitask.ultralytics.yolov5.arch.param_groups import *
 
 # from .yolo import *
 from yolov5.models.yolo import *
@@ -68,7 +68,11 @@
 # fmt: on
 
 
-class HybridYOLOV5(nn.Module):
+class HybridYOLOV5(
+    nn.Module,
+    FreezingInterfaceExtension,
+    ParamGroupsExtension,
+):
     """
     Info:
         Create a multitask variant of any YOLO model from ultralytics
@@ -197,74 +201,6 @@ def build_classification_modules(self, verbose: bool = True):
         if verbose:
             logger.success(f"Built classifier heads successfully")
 
-    def param_groups(self) -> List[List[Parameter]]:
-        param_groups = [
-            flatten(self._get_params_stem()),
-            flatten(self._get_params_backbone()),
-            flatten(self._get_params_neck()),
-            flatten(self._get_params_bbox_head()),
-            flatten(self._get_params_classifier_heads()),
-        ]
-        check_all_model_params_in_groups2(self, param_groups=param_groups)
-        return param_groups
-
-    def _get_params_stem(self) -> List[nn.Parameter]:
-        return params(self.model[0])
-
-    def _get_params_backbone(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[1:10]]
-
-    def _get_params_neck(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[10:][:-1]]
-
-    def _get_params_bbox_head(self) -> List[List[Parameter]]:
-        return params(self.model[-1])
-
-    def _get_params_classifier_heads(self) -> List[List[Parameter]]:
-        return [params(self.classifier_heads)]
-
-    def freeze(
-        self,
-        freeze_stem: bool = True,
-        freeze_bbone_blocks_until: int = 0,  # between 0-9
-        freeze_neck: bool = False,
-        freeze_bbox_head: bool = False,
-        freeze_classifier_heads: bool = False,
-    ):
-        """
-        Freeze selected parts of the network
-
-        Args:
-            freeze_stem (bool, optional): Freeze the first conv layer. Defaults to True.
-            freeze_bbone_blocks_until (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. Defaults to 0.
-            freeze_neck (bool, optional): Freeze the neck (FPN). Defaults to False.
-            freeze_bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
-            freeze_classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
-        """
-        if freeze_stem:
-            for p in flatten(self._get_params_stem()):
-                p.requires_grad = False
-
-        assert 0 <= freeze_bbone_blocks_until <= 9, "Num blocks must be between 0-9"
-        for i, pg in enumerate(self._get_params_backbone(), start=1):
-            if i > freeze_bbone_blocks_until:
-                break
-            else:
-                for p in pg:
-                    p.requires_grad = False
-
-        if freeze_neck:
-            for p in flatten(self._get_params_neck()):
-                p.requires_grad = False
-
-        if freeze_bbox_head:
-            for p in flatten(self._get_params_bbox_head()):
-                p.requires_grad = False
-
-        if freeze_classifier_heads:
-            for p in flatten(self._get_params_classifier_heads()):
-                p.requires_grad = False
-
     def forward(
         self,
         x: Union[Tensor, dict],
@@ -286,18 +222,6 @@ def forward(
         elif step_type is ForwardType.TRAIN_MULTI_AUG:
             return self.forward_multi_augment(x)
 
-        # elif step_type is ForwardType.EXPORT_COREML:
-        #     self.train()
-        #     self.classifier_heads.eval()
-        #     return self.forward_inference(x)
-
-        # elif (
-        #     step_type is ForwardType.EXPORT_ONNX
-        #     or step_type is ForwardType.EXPORT_TORCHSCRIPT
-        # ):
-        #     self.eval()
-        #     return self.forward_inference(x)
-
         else:
             raise RuntimeError(
                 f"Invalid `step_type`. Received: {type(step_type.__class__)}; Expected: {ForwardType.__class__}"

From d5037b54f30900dd61f9551cbb9273aee9408867 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 21 Jun 2021 11:06:25 +0530
Subject: [PATCH 076/122] fix import path

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 158c08ca8..cdb901032 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -12,7 +12,7 @@
 from icevision.core import *
 
 from loguru import logger
-from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import HybridYOLOV5
+from icevision.models.multitask.ultralytics.yolov5.arch.yolo_hybrid import HybridYOLOV5
 from icevision.models.multitask.utils.prediction import *
 from icevision.models.multitask.ultralytics.yolov5.prediction import (
     convert_raw_predictions,

From f271a38c3a6a04e247c081f7dfbda64738a47798 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 21 Jun 2021 11:30:40 +0530
Subject: [PATCH 077/122] fix import path... again

---
 icevision/models/multitask/ultralytics/yolov5/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/__init__.py b/icevision/models/multitask/ultralytics/yolov5/__init__.py
index 7db056ccf..d2cabe4f0 100644
--- a/icevision/models/multitask/ultralytics/yolov5/__init__.py
+++ b/icevision/models/multitask/ultralytics/yolov5/__init__.py
@@ -21,7 +21,7 @@
 from icevision.models.multitask.ultralytics.yolov5.prediction import *
 from icevision.models.multitask.ultralytics.yolov5.utils import *
 from icevision.models.multitask.ultralytics.yolov5.backbones import *
-from icevision.models.multitask.ultralytics.yolov5.yolo_hybrid import *
+from icevision.models.multitask.ultralytics.yolov5.arch.yolo_hybrid import *
 
 
 from icevision.soft_dependencies import SoftDependencies

From ffca2f0c2454dc74e5a3a75f8bfb0e1a142864e6 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 21 Jun 2021 15:50:55 +0530
Subject: [PATCH 078/122] batchnorm freezing

---
 .../ultralytics/yolov5/arch/model_freezing.py       |  4 +++-
 .../ultralytics/yolov5/arch/yolo_hybrid.py          |  7 ++++++-
 icevision/models/multitask/utils/model.py           | 13 ++++++++++++-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index b1e98d372..03944481d 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -17,6 +17,8 @@
 class FreezingInterfaceExtension:
     """
     Model freezing and unfreezing extensions for `HybridYOLOV5`
+    Note that the BatchNorm layers are also frozen, but that part is not
+    defined here, but in the main module's `.train()` method directly
     """
 
     def _get_params_stem(self) -> List[nn.Parameter]:
@@ -99,7 +101,7 @@ def freeze_specific_classifier_heads(
     ):
         "Freeze all, one or a few classifier heads"
         if isinstance(names, str):
-            names = []
+            names = [names]
         if names is None:
             names = list(self.classifier_heads.keys())
 
diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 6e155dd46..326ab69b2 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -28,7 +28,7 @@
 from icevision.models.multitask.classification_heads.builder import (
     build_classifier_heads_from_configs,
 )
-from icevision.models.multitask.utils.model import ForwardType
+from icevision.models.multitask.utils.model import ForwardType, set_bn_eval
 from icevision.models.multitask.ultralytics.yolov5.arch.model_freezing import *
 from icevision.models.multitask.ultralytics.yolov5.arch.param_groups import *
 
@@ -169,6 +169,11 @@ def post_layers_init(self):
     def post_init(self):
         pass
 
+    def train(self, mode: bool = True):
+        "Set model to training mode, while freezing non trainable layers' BN statistics"
+        super(HybridYOLOV5, self).train(mode)
+        set_bn_eval(self)
+
     def build_classification_modules(self, verbose: bool = True):
         """
         Description:
diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
index 2e4fad13b..dff65d5af 100644
--- a/icevision/models/multitask/utils/model.py
+++ b/icevision/models/multitask/utils/model.py
@@ -1,6 +1,8 @@
 from enum import Enum
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch import nn
 
-__all__ = ["ForwardType"]
+__all__ = ["ForwardType", "set_bn_eval"]
 
 
 class ForwardType(Enum):
@@ -11,3 +13,12 @@ class ForwardType(Enum):
     # EXPORT_ONNX = 5
     # EXPORT_TORCHSCRIPT = 6
     # EXPORT_COREML = 7
+
+
+# Taken from from https://github.com/fastai/fastai/blob/4decc673ba811a41c6e3ab648aab96dd27244ff7/fastai/callback/training.py#L43-L49
+def set_bn_eval(m: nn.Module, use_eval=True) -> None:
+    "Set bn layers in eval mode for all recursive, non-trainable children of `m`."
+    for l in m.children():
+        if isinstance(l, _BatchNorm) and not next(l.parameters()).requires_grad:
+            l.eval()
+        set_bn_eval(l)
\ No newline at end of file

From a63136b4c71bb3348197b159273a3286ab9eeaf4 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 21 Jun 2021 15:58:22 +0530
Subject: [PATCH 079/122] add warning

---
 .../models/multitask/ultralytics/yolov5/arch/model_freezing.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 03944481d..3afc0771c 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -54,6 +54,7 @@ def freeze(
             freeze_neck (bool, optional): Freeze the neck (FPN). Defaults to False.
             freeze_bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
             freeze_classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
+            _grad (bool): DO NOT MODIFY this argument. It is used internally for `.unfreeze()`
         """
         if freeze_stem:
             for p in flatten(self._get_params_stem()):

From 8948a0eb46b981fb1e1f15c806a53f14429ee84f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 14:29:39 +0530
Subject: [PATCH 080/122] store fpn dims as an attribute

---
 .../models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 326ab69b2..d183e601e 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -152,6 +152,10 @@ def __init__(
             self._initialize_biases()  # only run once
             # logger.info('Strides: %s' % m.stride.tolist())
 
+        self.fpn_dims = YOLO_FEATURE_MAP_DIMS[Path(model.yaml_file).stem]
+        # self.fpn_dims = self.extract_features(torch.rand(1, 3, 224, 224))
+        self.num_fpn_dims = len(self.fpn_dims)
+
         # Init weights, biases
         initialize_weights(self)
         self.info()

From d08d4a66596eabc03c651eb21ae6fb6050bd3f42 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 15:38:41 +0530
Subject: [PATCH 081/122] safety mechanism

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index cdb901032..63b1c8f9d 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -152,6 +152,11 @@ def compute_and_log_classification_metrics(
         on_step: bool = False,
         # prefix: str = "valid",
     ):
+        if not set(classification_preds.keys()) == set(yb.keys()):
+            raise RuntimeError(
+                f"Mismatch between prediction and target items. Predictions have "
+                f"{classification_preds.keys} keys and targets have {yb.keys()} keys"
+            )
         # prefix = f"{prefix}/" if not prefix == "" else ""
         prefix = "valid/"
         for (name, metric), (_, preds) in zip(

From ecd693b95db1b5ecdb9ca26bf834275f70905742 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 15:40:57 +0530
Subject: [PATCH 082/122] dumb bugfix

---
 .../models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index d183e601e..cb8119474 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -152,7 +152,7 @@ def __init__(
             self._initialize_biases()  # only run once
             # logger.info('Strides: %s' % m.stride.tolist())
 
-        self.fpn_dims = YOLO_FEATURE_MAP_DIMS[Path(model.yaml_file).stem]
+        self.fpn_dims = YOLO_FEATURE_MAP_DIMS[Path(self.yaml_file).stem]
         # self.fpn_dims = self.extract_features(torch.rand(1, 3, 224, 224))
         self.num_fpn_dims = len(self.fpn_dims)
 

From 19ea13e2ab7d691ffb7475c31b608ec1d6241591 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 15:42:54 +0530
Subject: [PATCH 083/122] klsvbaolskfbvjklfb WTF

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index 63b1c8f9d..cf4c69617 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -155,7 +155,7 @@ def compute_and_log_classification_metrics(
         if not set(classification_preds.keys()) == set(yb.keys()):
             raise RuntimeError(
                 f"Mismatch between prediction and target items. Predictions have "
-                f"{classification_preds.keys} keys and targets have {yb.keys()} keys"
+                f"{classification_preds.keys()} keys and targets have {yb.keys()} keys"
             )
         # prefix = f"{prefix}/" if not prefix == "" else ""
         prefix = "valid/"

From 2a198fb4b3655011f97d38157ec001854a876cd1 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 17:27:13 +0530
Subject: [PATCH 084/122] model unfreezing bugfix

---
 .../ultralytics/yolov5/arch/model_freezing.py | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 3afc0771c..f7c8a617c 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -82,18 +82,24 @@ def freeze(
 
     def unfreeze(
         self,
-        freeze_stem: bool = True,
-        freeze_bbone_blocks_until: int = 0,  # between 0-9
-        freeze_neck: bool = False,
-        freeze_bbox_head: bool = False,
-        freeze_classifier_heads: bool = False,
+        unfreeze_stem: bool = False,
+        unfreeze_bbone_blocks_until: int = 9,  # either 0-9 TODO FIXME
+        unfreeze_neck: bool = True,
+        unfreeze_bbox_head: bool = True,
+        unfreeze_classifier_heads: bool = True,
     ):
+        "Unfreeze specific parts of the model. By default all parts but the stem are unfrozen"
+        if not unfreeze_bbone_blocks_until in [0, 9]:
+            raise RuntimeError(
+                f"Currently we can only unfreeze all or no blocks at once. Pass `unfreeze_bbone_blocks_until=9 | 0` to do so"
+            )
+
         self.freeze(
-            freeze_stem=freeze_stem,
-            freeze_bbone_blocks_until=freeze_bbone_blocks_until,
-            freeze_neck=freeze_neck,
-            freeze_bbox_head=freeze_bbox_head,
-            freeze_classifier_heads=freeze_classifier_heads,
+            freeze_stem=not unfreeze_stem,
+            freeze_bbone_blocks_until=unfreeze_bbone_blocks_until,
+            freeze_neck=not unfreeze_neck,
+            freeze_bbox_head=not unfreeze_bbox_head,
+            freeze_classifier_heads=not unfreeze_classifier_heads,
             _grad=True,
         )
 

From a65c8be029cbbd5b7cda5433ababb5461958bf75 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 22 Jun 2021 17:34:06 +0530
Subject: [PATCH 085/122] **hangs head in shame**

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index f7c8a617c..911ac597a 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -95,11 +95,11 @@ def unfreeze(
             )
 
         self.freeze(
-            freeze_stem=not unfreeze_stem,
+            freeze_stem=unfreeze_stem,
             freeze_bbone_blocks_until=unfreeze_bbone_blocks_until,
-            freeze_neck=not unfreeze_neck,
-            freeze_bbox_head=not unfreeze_bbox_head,
-            freeze_classifier_heads=not unfreeze_classifier_heads,
+            freeze_neck=unfreeze_neck,
+            freeze_bbox_head=unfreeze_bbox_head,
+            freeze_classifier_heads=unfreeze_classifier_heads,
             _grad=True,
         )
 

From 1200f58a70859069c388f897a6033c8493448e5f Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 26 Jun 2021 11:45:35 +0530
Subject: [PATCH 086/122] freezing interface

---
 .../ultralytics/yolov5/arch/model_freezing.py | 121 ++++++++++--------
 1 file changed, 68 insertions(+), 53 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 911ac597a..fa8e68d37 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -1,10 +1,11 @@
 import torch
 import torch.nn as nn
+import numpy as np
 
 from torch import Tensor
 from torch.nn import Parameter
 
-from typing import Union, List
+from typing import Collection, Union, List, Tuple
 from icevision.utils.torch_utils import params
 from icevision.utils.utils import flatten
 from loguru import logger
@@ -36,71 +37,85 @@ def _get_params_bbox_head(self) -> List[List[Parameter]]:
     def _get_params_classifier_heads(self) -> List[List[Parameter]]:
         return [params(self.classifier_heads)]
 
-    def freeze(
+    def set_param_grad_state(
         self,
-        freeze_stem: bool = True,
-        freeze_bbone_blocks_until: int = 0,  # between 0-9
-        freeze_neck: bool = False,
-        freeze_bbox_head: bool = False,
-        freeze_classifier_heads: bool = False,
-        _grad: bool = False,  # Don't modify.
+        stem: bool,
+        bbone_blocks: Tuple[Collection[int], bool],
+        neck: bool,
+        bbox_head: bool,
+        classifier_heads: bool,
     ):
+        error_msg = f"""
+        `bbone_blocks` must be a list|tuple where the second value is the gradient state to be set, and the
+        first value is a List[int] between 0-9 specifying which blocks to set this state for
         """
-        Freeze selected parts of the network
+        if not (isinstance(bbone_blocks, (list, tuple)) and len(bbone_blocks) == 2):
+            raise TypeError(error_msg)
+        if not isinstance(bbone_blocks[0], (list, tuple)):
+            raise TypeError(error_msg)
+        if not all(isinstance(x, int) for x in bbone_blocks[0]):
+            raise TypeError(error_msg)
+        if not 0 <= bbone_blocks[0] <= 9:
+            raise ValueError(error_msg)
 
-        Args:
-            freeze_stem (bool, optional): Freeze the first conv layer. Defaults to True.
-            freeze_bbone_blocks_until (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. Defaults to 0.
-            freeze_neck (bool, optional): Freeze the neck (FPN). Defaults to False.
-            freeze_bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
-            freeze_classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
-            _grad (bool): DO NOT MODIFY this argument. It is used internally for `.unfreeze()`
-        """
-        if freeze_stem:
-            for p in flatten(self._get_params_stem()):
-                p.requires_grad = _grad
+        for p in flatten(self._get_params_stem()):
+            p.requires_grad = stem
 
-        assert 0 <= freeze_bbone_blocks_until <= 9, "Num blocks must be between 0-9"
-        for i, pg in enumerate(self._get_params_backbone(), start=1):
-            if i > freeze_bbone_blocks_until:
-                break
-            else:
-                for p in pg:
-                    p.requires_grad = _grad
+        target_blocks, grad_state = bbone_blocks
+        pgs = np.array(self._get_params_backbone())
+        for p in flatten(pgs[target_blocks]):
+            p.requires_grad = grad_state
 
-        if freeze_neck:
-            for p in flatten(self._get_params_neck()):
-                p.requires_grad = _grad
+        for p in flatten(self._get_params_neck()):
+            p.requires_grad = neck
 
-        if freeze_bbox_head:
-            for p in flatten(self._get_params_bbox_head()):
-                p.requires_grad = _grad
+        for p in flatten(self._get_params_bbox_head()):
+            p.requires_grad = bbox_head
 
-        if freeze_classifier_heads:
-            for p in flatten(self._get_params_classifier_heads()):
-                p.requires_grad = _grad
+        for p in flatten(self._get_params_classifier_heads()):
+            p.requires_grad = classifier_heads
+
+    def freeze(
+        self,
+        stem: bool = True,
+        bbone_blocks: int = 0,  # between 0-9
+        neck: bool = False,
+        bbox_head: bool = False,
+        classifier_heads: bool = False,
+    ):
+        """
+        Freeze selected parts of the network
+
+        Args:
+            stem (bool, optional): Freeze the first conv layer. Defaults to True.
+            bbone_blocks (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. If 3, the first 3 blocks are frozen
+            neck (bool, optional): Freeze the neck (FPN). Defaults to False.
+            bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
+            classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
+        """
+        self.set_param_grad_state(
+            stem=not stem,  # If `stem==True`, set requires_grad to False
+            bbone_blocks=([i for i in range(bbone_blocks)], False),
+            neck=not neck,
+            bbox_head=not bbox_head,
+            classifier_heads=not classifier_heads,
+        )
 
     def unfreeze(
         self,
-        unfreeze_stem: bool = False,
-        unfreeze_bbone_blocks_until: int = 9,  # either 0-9 TODO FIXME
-        unfreeze_neck: bool = True,
-        unfreeze_bbox_head: bool = True,
-        unfreeze_classifier_heads: bool = True,
+        stem: bool = False,
+        bbone_blocks: int = 9,
+        neck: bool = True,
+        bbox_head: bool = True,
+        classifier_heads: bool = True,
     ):
         "Unfreeze specific parts of the model. By default all parts but the stem are unfrozen"
-        if not unfreeze_bbone_blocks_until in [0, 9]:
-            raise RuntimeError(
-                f"Currently we can only unfreeze all or no blocks at once. Pass `unfreeze_bbone_blocks_until=9 | 0` to do so"
-            )
-
-        self.freeze(
-            freeze_stem=unfreeze_stem,
-            freeze_bbone_blocks_until=unfreeze_bbone_blocks_until,
-            freeze_neck=unfreeze_neck,
-            freeze_bbox_head=unfreeze_bbox_head,
-            freeze_classifier_heads=unfreeze_classifier_heads,
-            _grad=True,
+        self.set_param_grad_state(
+            stem=stem,
+            bbone_blocks=([i for i in range(9 - bbone_blocks, 9)], True),
+            neck=neck,
+            bbox_head=bbox_head,
+            classifier_heads=classifier_heads,
         )
 
     def freeze_specific_classifier_heads(

From f932b100448a6646c26df3df3dcdc22c708b78c9 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 26 Jun 2021 13:04:01 +0530
Subject: [PATCH 087/122] rename func

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index fa8e68d37..8c44b8904 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -37,7 +37,7 @@ def _get_params_bbox_head(self) -> List[List[Parameter]]:
     def _get_params_classifier_heads(self) -> List[List[Parameter]]:
         return [params(self.classifier_heads)]
 
-    def set_param_grad_state(
+    def set_param_requires_grad(
         self,
         stem: bool,
         bbone_blocks: Tuple[Collection[int], bool],
@@ -93,7 +93,7 @@ def freeze(
             bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
             classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
         """
-        self.set_param_grad_state(
+        self.set_param_requires_grad(
             stem=not stem,  # If `stem==True`, set requires_grad to False
             bbone_blocks=([i for i in range(bbone_blocks)], False),
             neck=not neck,
@@ -110,7 +110,7 @@ def unfreeze(
         classifier_heads: bool = True,
     ):
         "Unfreeze specific parts of the model. By default all parts but the stem are unfrozen"
-        self.set_param_grad_state(
+        self.set_param_requires_grad(
             stem=stem,
             bbone_blocks=([i for i in range(9 - bbone_blocks, 9)], True),
             neck=neck,

From 69e4f82d7563fa240eea2332b8903459311b4585 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 27 Jun 2021 20:47:27 +0530
Subject: [PATCH 088/122] bugfix

---
 .../ultralytics/yolov5/arch/model_freezing.py       | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 8c44b8904..808b43acd 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -55,14 +55,15 @@ def set_param_requires_grad(
             raise TypeError(error_msg)
         if not all(isinstance(x, int) for x in bbone_blocks[0]):
             raise TypeError(error_msg)
-        if not 0 <= bbone_blocks[0] <= 9:
-            raise ValueError(error_msg)
+        if not bbone_blocks[0] == []:
+            if not 0 <= bbone_blocks[0][0] <= 9:
+                raise ValueError(error_msg)
 
         for p in flatten(self._get_params_stem()):
             p.requires_grad = stem
 
         target_blocks, grad_state = bbone_blocks
-        pgs = np.array(self._get_params_backbone())
+        pgs = np.array(self._get_params_backbone(), dtype="object")
         for p in flatten(pgs[target_blocks]):
             p.requires_grad = grad_state
 
@@ -109,7 +110,11 @@ def unfreeze(
         bbox_head: bool = True,
         classifier_heads: bool = True,
     ):
-        "Unfreeze specific parts of the model. By default all parts but the stem are unfrozen"
+        """
+        Unfreeze specific parts of the model. By default all parts but the stem are unfrozen.
+        Note that `bbone_blocks` works differently from `.freeze()`. `bbone_blocks=3` will unfreeze
+        the _last 3_ blocks, and `bbone_blocks=9` will unfreeze _all_ the blocks
+        """
         self.set_param_requires_grad(
             stem=stem,
             bbone_blocks=([i for i in range(9 - bbone_blocks, 9)], True),

From 31dddac6e424a88ff5ac0c80736dd40adefa3e3c Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sun, 27 Jun 2021 20:53:24 +0530
Subject: [PATCH 089/122] * higher level freeze/unfreeze detector * better
 defaults

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 808b43acd..34c795fcb 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -79,7 +79,7 @@ def set_param_requires_grad(
     def freeze(
         self,
         stem: bool = True,
-        bbone_blocks: int = 0,  # between 0-9
+        bbone_blocks: int = 1,  # between 0-9
         neck: bool = False,
         bbox_head: bool = False,
         classifier_heads: bool = False,
@@ -123,6 +123,12 @@ def unfreeze(
             classifier_heads=classifier_heads,
         )
 
+    def freeze_detector(self):
+        self.freeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
+
+    def unfreeze_detector(self):
+        self.unfreeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
+
     def freeze_specific_classifier_heads(
         self, names: Union[str, List[str], None] = None, _grad: bool = False
     ):

From 64282455813281b100f3920d4ce287cb6ab17eb6 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 08:23:05 +0530
Subject: [PATCH 090/122] move wts to gpu if available (reqd)

---
 icevision/models/multitask/classification_heads/head.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index 0cd352ca1..68689f5df 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -59,6 +59,10 @@ def __post_init__(self):
         if isinstance(self.fpn_keys, int):
             self.fpn_keys = [self.fpn_keys]
 
+        if self.loss_func_wts is not None:
+            if torch.cuda.is_available():
+                self.loss_func_wts = self.loss_func_wts.cuda()
+
         if self.multilabel:
             if self.topk is None and self.thresh is None:
                 self.thresh = 0.5

From f3b94f8b1e3c29c8981d6f35acd29d81dd22c9d6 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 08:52:44 +0530
Subject: [PATCH 091/122] fix formatting

---
 icevision/models/multitask/utils/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
index dff65d5af..a8f46f359 100644
--- a/icevision/models/multitask/utils/model.py
+++ b/icevision/models/multitask/utils/model.py
@@ -21,4 +21,4 @@ def set_bn_eval(m: nn.Module, use_eval=True) -> None:
     for l in m.children():
         if isinstance(l, _BatchNorm) and not next(l.parameters()).requires_grad:
             l.eval()
-        set_bn_eval(l)
\ No newline at end of file
+        set_bn_eval(l)

From 89b4fec7765717a8cd5a56e73fc5c273341ee0ef Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 11:00:20 +0530
Subject: [PATCH 092/122] iterate on freezing interface

---
 .../ultralytics/yolov5/arch/model_freezing.py | 40 +++++++++++++++----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 34c795fcb..0b4c74b53 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -78,14 +78,16 @@ def set_param_requires_grad(
 
     def freeze(
         self,
-        stem: bool = True,
-        bbone_blocks: int = 1,  # between 0-9
+        stem: bool = False,
+        bbone_blocks: int = 0,  # between 0-9
         neck: bool = False,
         bbox_head: bool = False,
         classifier_heads: bool = False,
     ):
         """
-        Freeze selected parts of the network
+        Freeze selected parts of the network. By default, none of the parts are frozen, you need
+        to manually set each arg's value to `True` if you want to freeze it. If you don't want
+        this fine grained control, see `.freeze_detector()`, `.freeze_backbone()`, `.freeze_classifier_heads()`
 
         Args:
             stem (bool, optional): Freeze the first conv layer. Defaults to True.
@@ -105,13 +107,17 @@ def freeze(
     def unfreeze(
         self,
         stem: bool = False,
-        bbone_blocks: int = 9,
-        neck: bool = True,
-        bbox_head: bool = True,
-        classifier_heads: bool = True,
+        bbone_blocks: int = 0,
+        neck: bool = False,
+        bbox_head: bool = False,
+        classifier_heads: bool = False,
     ):
         """
-        Unfreeze specific parts of the model. By default all parts but the stem are unfrozen.
+        Unfreeze specific parts of the model. By default all parts are kept frozen.
+        You need to manually set whichever part you want to unfreeze by passing that arg as `True`.
+        See `.unfreeze_detector()`, `.unfreeze_backbone()`, `.unfreeze_classifier_heads()` methods if you
+        don't want this fine grained control.
+
         Note that `bbone_blocks` works differently from `.freeze()`. `bbone_blocks=3` will unfreeze
         the _last 3_ blocks, and `bbone_blocks=9` will unfreeze _all_ the blocks
         """
@@ -124,11 +130,29 @@ def unfreeze(
         )
 
     def freeze_detector(self):
+        "Freezes the entire detector i.e. stem, bbone, neck, bbox head"
         self.freeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
 
     def unfreeze_detector(self):
+        "Unfreezes the entire detector i.e. stem, bbone, neck, bbox head"
         self.unfreeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
 
+    def freeze_backbone(self, fpn=True):
+        "Freezes the entire backbone, optionally without the neck/fpn"
+        self.freeze(stem=True, bbone_blocks=9, neck=True if fpn else False)
+
+    def unfreeze_backbone(self, fpn=True):
+        "Unfreezes the entire backbone, optionally without the neck/fpn"
+        self.unfreeze(stem=True, bbone_blocks=9, neck=True if fpn else False)
+
+    def freeze_classifier_heads(self):
+        "Freezes just the classification heads"
+        self.freeze(classifier_heads=True)
+
+    def unfreeze_classifier_heads(self):
+        "Unfreezes just the classification heads"
+        self.unfreeze(classifier_heads=True)
+
     def freeze_specific_classifier_heads(
         self, names: Union[str, List[str], None] = None, _grad: bool = False
     ):

From a253d5848b0f08140d490f4e384b5ae5da404068 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 20:08:09 +0530
Subject: [PATCH 093/122] cast loss func weights to fp32 (double by default)

---
 icevision/models/multitask/classification_heads/head.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index 68689f5df..dd1104550 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -60,6 +60,7 @@ def __post_init__(self):
             self.fpn_keys = [self.fpn_keys]
 
         if self.loss_func_wts is not None:
+            self.loss_func_wts = self.loss_func_wts.to(torch.float32)
             if torch.cuda.is_available():
                 self.loss_func_wts = self.loss_func_wts.cuda()
 

From b60ee28c7991a542b6bcce935e1c2109cf42b05c Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 22:21:51 +0530
Subject: [PATCH 094/122] try moving away from functional approach to avoid
 cryptic errors

---
 .../multitask/classification_heads/head.py    | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
index dd1104550..0b987de26 100644
--- a/icevision/models/multitask/classification_heads/head.py
+++ b/icevision/models/multitask/classification_heads/head.py
@@ -60,7 +60,8 @@ def __post_init__(self):
             self.fpn_keys = [self.fpn_keys]
 
         if self.loss_func_wts is not None:
-            self.loss_func_wts = self.loss_func_wts.to(torch.float32)
+            if not self.multilabel:
+                self.loss_func_wts = self.loss_func_wts.to(torch.float32)
             if torch.cuda.is_available():
                 self.loss_func_wts = self.loss_func_wts.cuda()
 
@@ -145,15 +146,18 @@ def setup_postprocessing(self):
     def setup_loss_function(self):
         if self.loss_func is None:
             if self.multilabel:
-                # self.loss_func = nn.BCEWithLogitsLoss(self.loss_func_wts)
-                self.loss_func = partial(
-                    F.binary_cross_entropy_with_logits, pos_weight=self.loss_func_wts
-                )
-                self.activation = torch.sigmoid  # nn.Sigmoid()
+                self.loss_func = nn.BCEWithLogitsLoss(pos_weight=self.loss_func_wts)
+                # self.loss_func = partial(
+                #     F.binary_cross_entropy_with_logits, pos_weight=self.loss_func_wts
+                # )
+                self.activation = nn.Sigmoid()
+                # self.activation = torch.sigmoid  # nn.Sigmoid()
             else:
                 # self.loss_func = nn.CrossEntropyLoss(self.loss_func_wts)
-                self.loss_func = partial(F.cross_entropy, weight=self.loss_func_wts)
-                self.activation = partial(F.softmax, dim=-1)  # nn.Softmax(-1)
+                self.loss_func = nn.CrossEntropyLoss(weight=self.loss_func_wts)
+                # self.loss_func = partial(F.cross_entropy, weight=self.loss_func_wts)
+                self.activation = nn.Softmax(-1)
+                # self.activation = partial(F.softmax, dim=-1)  # nn.Softmax(-1)
 
     @classmethod
     def from_config(cls, config: ClassifierConfig):

From 18e7737b24ba71aec4c0ba9f9c47c3daa7eed101 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 22:57:42 +0530
Subject: [PATCH 095/122] bugfix

---
 icevision/models/multitask/ultralytics/yolov5/dataloaders.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index bae5a4a92..fa58fa9f6 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -137,7 +137,9 @@ def build_multi_aug_batch(
         detection_images.append(detection_image)
 
         # See file header for more info on why this is done
-        detection_target[:, 0] = i if detection_target.numel() > 0 else None
+        if detection_target.numel() > 0:
+            detection_target[:, 0] = i
+        # detection_target[:, 0] = i if detection_target.numel() > 0 else None
         detection_targets.append(detection_target)
 
         for key, group in classification_transform_groups.items():

From ee5ef8e15828a0f5d991e5dfc7bb2b3aff2935de Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 23:02:30 +0530
Subject: [PATCH 096/122] ...

---
 icevision/models/multitask/ultralytics/yolov5/dataloaders.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index fa58fa9f6..5c698865f 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -61,7 +61,9 @@ def build_single_aug_batch(
         images.append(image)
 
         # See file header for more info on why this is done
-        detection_target[:, 0] = i if detection_target.numel() > 0 else None
+        if detection_target.numel() > 0:
+            detection_target[:, 0] = i
+        # detection_target[:, 0] = i if detection_target.numel() > 0 else None
         detection_targets.append(detection_target)
 
         # Classification

From 4002c4d101baff16a73aa8fe18004197c1595a56 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 28 Jun 2021 23:44:58 +0530
Subject: [PATCH 097/122] patch

---
 .../models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py     | 1 +
 icevision/models/multitask/ultralytics/yolov5/prediction.py     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index cb8119474..8c951da29 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -177,6 +177,7 @@ def train(self, mode: bool = True):
         "Set model to training mode, while freezing non trainable layers' BN statistics"
         super(HybridYOLOV5, self).train(mode)
         set_bn_eval(self)
+        return self
 
     def build_classification_modules(self, verbose: bool = True):
         """
diff --git a/icevision/models/multitask/ultralytics/yolov5/prediction.py b/icevision/models/multitask/ultralytics/yolov5/prediction.py
index 6abb0b634..e984c0c20 100644
--- a/icevision/models/multitask/ultralytics/yolov5/prediction.py
+++ b/icevision/models/multitask/ultralytics/yolov5/prediction.py
@@ -43,7 +43,7 @@ def _predict_batch(
     batch = batch[0].to(device)
     model = model.eval().to(device)
 
-    (det_preds, _), classif_preds = model(batch, step_type=ForwardType.EXPORT_ONNX)
+    (det_preds, _), classif_preds = model(batch, step_type=ForwardType.INFERENCE)
     classification_configs = extract_classifier_pred_cfgs(model)
 
     return convert_raw_predictions(

From ca52794ba726d1b8c28c2d3f925c8bc8701c46ab Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 30 Jun 2021 11:23:36 +0530
Subject: [PATCH 098/122] log `total_loss` for easier model checkpointing

---
 .../multitask/ultralytics/yolov5/lightning/model_adapter.py      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index cf4c69617..bd43bb99a 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -177,6 +177,7 @@ def log_losses(
         classification_losses: TensorDict,
     ):
         log_vars = dict(
+            total_loss=detection_loss + classification_total_loss,
             detection_loss=detection_loss,
             classification_total_loss=classification_total_loss,
             **{

From 0cdfdcb931defe654bfc47561d30a9f0a351df06 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 30 Jun 2021 17:13:05 +0530
Subject: [PATCH 099/122] mystical bugfix

---
 icevision/models/multitask/data/dataset.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index 892971f06..8b54e88f4 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -197,13 +197,12 @@ def __getitem__(self, i):
                 print(f"  Group: {group['tasks']}, ID: {id(tfmd_img)}")
 
             # NOTE:
-            # * We need to add the img component dynamically here to
-            #   play nice with the albumentations adapter 🤬
-            # * Setting the same img twice (to diff parts in memory),
-            #   but it's ok cuz we will unload the record in DataLoader
+            # Setting the same img twice (to diff parts in memory) but it's ok cuz we will unload the record later
             for task in group["tasks"]:
-                record.add_component(ImageRecordComponent(Task(task)))
-                getattr(record, task).set_img(tfmd_img)
+                # record.add_component(ImageRecordComponent(Task(task))) # TODO FIXME: This throws a weird error idk why
+                comp = getattr(record, task)
+                comp.add_component(ImageRecordComponent())
+                comp.set_img(tfmd_img)
                 if self.debug:
                     print(f"   - Task: {task}, ID: {id(tfmd_img)}")
 

From a2740a078c201486602eac01e69fbfea8622f73d Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 30 Jun 2021 17:29:29 +0530
Subject: [PATCH 100/122] flexibility to define custom record loading logic

---
 icevision/models/multitask/data/dataset.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index 8b54e88f4..a1f2eb20d 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -171,8 +171,15 @@ def validate(self):
     def __len__(self):
         return len(self.records)
 
+    def load_record(self, i: int):
+        """
+        Simple record loader. Externalised for easy subclassing for custom behavior
+        like loading cached records from disk
+        """
+        return self.records[i].load()
+
     def __getitem__(self, i):
-        record = self.records[i].load()
+        record = self.load_record(i)
 
         # Keep a copy of the orig img as it gets modified by albu
         original_img = deepcopy(record.img)

From 1ba47e8b2dca4a711b155d8b9b4cccd47ea18563 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 30 Jun 2021 22:11:32 +0530
Subject: [PATCH 101/122] model frezinggg buuugggggfix.. i hope

---
 .../ultralytics/yolov5/arch/model_freezing.py | 78 +++++++++----------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 0b4c74b53..f61577b66 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -37,44 +37,38 @@ def _get_params_bbox_head(self) -> List[List[Parameter]]:
     def _get_params_classifier_heads(self) -> List[List[Parameter]]:
         return [params(self.classifier_heads)]
 
-    def set_param_requires_grad(
-        self,
-        stem: bool,
-        bbone_blocks: Tuple[Collection[int], bool],
-        neck: bool,
-        bbox_head: bool,
-        classifier_heads: bool,
-    ):
+    def _set_param_grad_stem(self, mode: bool):
+        for p in flatten(self._get_params_stem()):
+            p.requires_grad = mode
+
+    def _set_param_grad_backbone(self, mode: bool, bbone_blocks: Collection[int]):
         error_msg = f"""
-        `bbone_blocks` must be a list|tuple where the second value is the gradient state to be set, and the
-        first value is a List[int] between 0-9 specifying which blocks to set this state for
+        `bbone_blocks` must be a list|tuple of values between 0-9 specifying which blocks to set this state for
         """
-        if not (isinstance(bbone_blocks, (list, tuple)) and len(bbone_blocks) == 2):
-            raise TypeError(error_msg)
-        if not isinstance(bbone_blocks[0], (list, tuple)):
+
+        if not isinstance(bbone_blocks, (list, tuple)):
             raise TypeError(error_msg)
-        if not all(isinstance(x, int) for x in bbone_blocks[0]):
+        if not all(isinstance(x, int) for x in bbone_blocks):
             raise TypeError(error_msg)
-        if not bbone_blocks[0] == []:
-            if not 0 <= bbone_blocks[0][0] <= 9:
+        if not bbone_blocks == []:
+            if not 0 <= bbone_blocks[0] <= 9:
                 raise ValueError(error_msg)
 
-        for p in flatten(self._get_params_stem()):
-            p.requires_grad = stem
-
-        target_blocks, grad_state = bbone_blocks
         pgs = np.array(self._get_params_backbone(), dtype="object")
-        for p in flatten(pgs[target_blocks]):
-            p.requires_grad = grad_state
+        for p in flatten(pgs[bbone_blocks]):
+            p.requires_grad = mode
 
+    def _set_param_grad_neck(self, mode: bool):
         for p in flatten(self._get_params_neck()):
-            p.requires_grad = neck
+            p.requires_grad = mode
 
+    def _set_param_grad_bbox_head(self, mode: bool):
         for p in flatten(self._get_params_bbox_head()):
-            p.requires_grad = bbox_head
+            p.requires_grad = mode
 
+    def _set_param_grad_classifier_heads(self, mode: bool):
         for p in flatten(self._get_params_classifier_heads()):
-            p.requires_grad = classifier_heads
+            p.requires_grad = mode
 
     def freeze(
         self,
@@ -96,13 +90,16 @@ def freeze(
             bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
             classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
         """
-        self.set_param_requires_grad(
-            stem=not stem,  # If `stem==True`, set requires_grad to False
-            bbone_blocks=([i for i in range(bbone_blocks)], False),
-            neck=not neck,
-            bbox_head=not bbox_head,
-            classifier_heads=not classifier_heads,
-        )
+        if stem:
+            self._set_param_grad_stem(False)
+        if bbone_blocks:
+            self._set_param_grad_backbone(False, [i for i in range(bbone_blocks)])
+        if neck:
+            self._set_param_grad_neck(False)
+        if bbox_head:
+            self._set_param_grad_bbox_head(False)
+        if classifier_heads:
+            self._set_param_grad_classifier_heads(False)
 
     def unfreeze(
         self,
@@ -121,13 +118,16 @@ def unfreeze(
         Note that `bbone_blocks` works differently from `.freeze()`. `bbone_blocks=3` will unfreeze
         the _last 3_ blocks, and `bbone_blocks=9` will unfreeze _all_ the blocks
         """
-        self.set_param_requires_grad(
-            stem=stem,
-            bbone_blocks=([i for i in range(9 - bbone_blocks, 9)], True),
-            neck=neck,
-            bbox_head=bbox_head,
-            classifier_heads=classifier_heads,
-        )
+        if stem:
+            self._set_param_grad_stem(True)
+        if bbone_blocks:
+            self._set_param_grad_backbone(True, [i for i in range(9 - bbone_blocks, 9)])
+        if neck:
+            self._set_param_grad_neck(True)
+        if bbox_head:
+            self._set_param_grad_bbox_head(True)
+        if classifier_heads:
+            self._set_param_grad_classifier_heads(True)
 
     def freeze_detector(self):
         "Freezes the entire detector i.e. stem, bbone, neck, bbox head"

From f4a59d7dcaf679a40c5bb4c214696b8a16040bb6 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Thu, 1 Jul 2021 21:14:24 +0530
Subject: [PATCH 102/122] use `load_record` for data validation

---
 icevision/models/multitask/data/dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index a1f2eb20d..a862daa3b 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -157,10 +157,11 @@ def validate(self):
             ), f"Invalid keys in `classification_transforms_groups`"
 
         missing_tasks = []
+        record = self.load_record(0)
         for attr in flatten(
             [g["tasks"] for g in self.classification_transforms_groups.values()]
         ):
-            if not hasattr(self.records[0], attr):
+            if not hasattr(records, attr):
                 missing_tasks += [attr]
         if not missing_tasks == []:
             raise ValueError(

From 67bf4633dbdc9521a60f3449538bb84bd44fcd2a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Thu, 1 Jul 2021 21:17:34 +0530
Subject: [PATCH 103/122] typo

---
 icevision/models/multitask/data/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index a862daa3b..e74aefc47 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -161,7 +161,7 @@ def validate(self):
         for attr in flatten(
             [g["tasks"] for g in self.classification_transforms_groups.values()]
         ):
-            if not hasattr(records, attr):
+            if not hasattr(record, attr):
                 missing_tasks += [attr]
         if not missing_tasks == []:
             raise ValueError(

From 9900a38cb0c39a9542a78570dbad75b4e49f9fb6 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 3 Jul 2021 08:21:56 +0530
Subject: [PATCH 104/122] extra safe record unloading (experiment)

---
 icevision/models/multitask/ultralytics/yolov5/dataloaders.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index 5c698865f..b212cdd09 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -158,6 +158,7 @@ def build_multi_aug_batch(
                 else:
                     labels = comp.label_ids
                     classification_targets[name].extend(labels)
+        record.unload()  # NOTE: Safety mechanism
 
     # Massage data
     for group in classification_data.values():

From 8de99b2909f5f1919b161e47e052370eb6c7829a Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 6 Jul 2021 11:18:27 +0530
Subject: [PATCH 105/122] generalise to all yolo architectures

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index f61577b66..b9cf501f3 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -26,10 +26,10 @@ def _get_params_stem(self) -> List[nn.Parameter]:
         return params(self.model[0])
 
     def _get_params_backbone(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[1:10]]
+        return [params(m) for m in self.model[1 : len(self.yaml["backbone"])]]
 
     def _get_params_neck(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[10:][:-1]]
+        return [params(m) for m in self.model[len(self.yaml["backbone"]) :][:-1]]
 
     def _get_params_bbox_head(self) -> List[List[Parameter]]:
         return params(self.model[-1])
@@ -51,7 +51,7 @@ def _set_param_grad_backbone(self, mode: bool, bbone_blocks: Collection[int]):
         if not all(isinstance(x, int) for x in bbone_blocks):
             raise TypeError(error_msg)
         if not bbone_blocks == []:
-            if not 0 <= bbone_blocks[0] <= 9:
+            if not 0 <= bbone_blocks[0] <= len(self.yaml["backbone"]) - 1:
                 raise ValueError(error_msg)
 
         pgs = np.array(self._get_params_backbone(), dtype="object")

From fc52a6ffc35a8a7fe9895b9417fa88bae0026c60 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 12 Jul 2021 13:17:07 +0530
Subject: [PATCH 106/122] parametrise `num_bbone_blocks`

---
 .../ultralytics/yolov5/arch/model_freezing.py | 40 +++++++++++++------
 .../ultralytics/yolov5/arch/yolo_hybrid.py    |  4 ++
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index b9cf501f3..50ea215fb 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -26,10 +26,10 @@ def _get_params_stem(self) -> List[nn.Parameter]:
         return params(self.model[0])
 
     def _get_params_backbone(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[1 : len(self.yaml["backbone"])]]
+        return [params(m) for m in self.model[1 : self.num_bbone_blocks]]
 
     def _get_params_neck(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[len(self.yaml["backbone"]) :][:-1]]
+        return [params(m) for m in self.model[self.num_bbone_blocks :][:-1]]
 
     def _get_params_bbox_head(self) -> List[List[Parameter]]:
         return params(self.model[-1])
@@ -43,7 +43,7 @@ def _set_param_grad_stem(self, mode: bool):
 
     def _set_param_grad_backbone(self, mode: bool, bbone_blocks: Collection[int]):
         error_msg = f"""
-        `bbone_blocks` must be a list|tuple of values between 0-9 specifying which blocks to set this state for
+        `bbone_blocks` must be a list|tuple of values between 0-{self.num_bbone_blocks} specifying which blocks to set this state for
         """
 
         if not isinstance(bbone_blocks, (list, tuple)):
@@ -51,7 +51,7 @@ def _set_param_grad_backbone(self, mode: bool, bbone_blocks: Collection[int]):
         if not all(isinstance(x, int) for x in bbone_blocks):
             raise TypeError(error_msg)
         if not bbone_blocks == []:
-            if not 0 <= bbone_blocks[0] <= len(self.yaml["backbone"]) - 1:
+            if not 0 <= bbone_blocks[0] <= self.num_bbone_blocks - 1:
                 raise ValueError(error_msg)
 
         pgs = np.array(self._get_params_backbone(), dtype="object")
@@ -73,7 +73,7 @@ def _set_param_grad_classifier_heads(self, mode: bool):
     def freeze(
         self,
         stem: bool = False,
-        bbone_blocks: int = 0,  # between 0-9
+        bbone_blocks: int = 0,  # between 0 to self.num_bbone_blocks
         neck: bool = False,
         bbox_head: bool = False,
         classifier_heads: bool = False,
@@ -85,7 +85,7 @@ def freeze(
 
         Args:
             stem (bool, optional): Freeze the first conv layer. Defaults to True.
-            bbone_blocks (int, optional): Number of blocks to freeze. If 0, none are frozen; if 9, all are frozen. If 3, the first 3 blocks are frozen
+            bbone_blocks (int, optional): Number of blocks to freeze. If 0, none are frozen; if ==self.num_bbone_blocks, all are frozen.
             neck (bool, optional): Freeze the neck (FPN). Defaults to False.
             bbox_head (bool, optional): Freeze the bounding box head (the `Detect` module). Defaults to False.
             classifier_heads (bool, optional): Freeze all the classification heads. Defaults to False.
@@ -116,12 +116,20 @@ def unfreeze(
         don't want this fine grained control.
 
         Note that `bbone_blocks` works differently from `.freeze()`. `bbone_blocks=3` will unfreeze
-        the _last 3_ blocks, and `bbone_blocks=9` will unfreeze _all_ the blocks
+        the _last 3_ blocks, and `bbone_blocks=self.num_bbone_blocks` will unfreeze _all_ the blocks
         """
         if stem:
             self._set_param_grad_stem(True)
         if bbone_blocks:
-            self._set_param_grad_backbone(True, [i for i in range(9 - bbone_blocks, 9)])
+            self._set_param_grad_backbone(
+                True,
+                [
+                    i
+                    for i in range(
+                        self.num_bbone_blocks - bbone_blocks, self.num_bbone_blocks
+                    )
+                ],
+            )
         if neck:
             self._set_param_grad_neck(True)
         if bbox_head:
@@ -131,19 +139,27 @@ def unfreeze(
 
     def freeze_detector(self):
         "Freezes the entire detector i.e. stem, bbone, neck, bbox head"
-        self.freeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
+        self.freeze(
+            stem=True, bbone_blocks=self.num_bbone_blocks, neck=True, bbox_head=True
+        )
 
     def unfreeze_detector(self):
         "Unfreezes the entire detector i.e. stem, bbone, neck, bbox head"
-        self.unfreeze(stem=True, bbone_blocks=9, neck=True, bbox_head=True)
+        self.unfreeze(
+            stem=True, bbone_blocks=self.num_bbone_blocks, neck=True, bbox_head=True
+        )
 
     def freeze_backbone(self, fpn=True):
         "Freezes the entire backbone, optionally without the neck/fpn"
-        self.freeze(stem=True, bbone_blocks=9, neck=True if fpn else False)
+        self.freeze(
+            stem=True, bbone_blocks=self.num_bbone_blocks, neck=True if fpn else False
+        )
 
     def unfreeze_backbone(self, fpn=True):
         "Unfreezes the entire backbone, optionally without the neck/fpn"
-        self.unfreeze(stem=True, bbone_blocks=9, neck=True if fpn else False)
+        self.unfreeze(
+            stem=True, bbone_blocks=self.num_bbone_blocks, neck=True if fpn else False
+        )
 
     def freeze_classifier_heads(self):
         "Freezes just the classification heads"
diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 8c951da29..3519abb70 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -179,6 +179,10 @@ def train(self, mode: bool = True):
         set_bn_eval(self)
         return self
 
+    @property
+    def num_bbone_blocks(self) -> int:
+        return len(self.yaml["backbone"])
+
     def build_classification_modules(self, verbose: bool = True):
         """
         Description:

From aa4b51589802f92bddaccf1d5aefd2fd38da8d71 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 12 Jul 2021 13:49:06 +0530
Subject: [PATCH 107/122] fix subtle bbone block idxs bug

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py   | 2 +-
 .../multitask/ultralytics/yolov5/arch/yolo_hybrid.py      | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 50ea215fb..979d91c92 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -26,7 +26,7 @@ def _get_params_stem(self) -> List[nn.Parameter]:
         return params(self.model[0])
 
     def _get_params_backbone(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[1 : self.num_bbone_blocks]]
+        return [params(m) for m in self.model[1 : self.bbone_blocks_end_idx]]
 
     def _get_params_neck(self) -> List[List[Parameter]]:
         return [params(m) for m in self.model[self.num_bbone_blocks :][:-1]]
diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 3519abb70..10f1b983d 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -181,6 +181,14 @@ def train(self, mode: bool = True):
 
     @property
     def num_bbone_blocks(self) -> int:
+        return len(self.yaml["backbone"]) - 1
+
+    @property
+    def bbone_blocks_start_idx(self) -> int:
+        return 1
+
+    @property
+    def bbone_blocks_end_idx(self) -> int:
         return len(self.yaml["backbone"])
 
     def build_classification_modules(self, verbose: bool = True):

From cfe306bc3276ee5be5f4a92e10a0d9b0a4936d7b Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 12 Jul 2021 13:55:12 +0530
Subject: [PATCH 108/122] #$%^&*!!!! really need some tests

---
 .../models/multitask/ultralytics/yolov5/arch/model_freezing.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 979d91c92..82eeedf6a 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -29,7 +29,7 @@ def _get_params_backbone(self) -> List[List[Parameter]]:
         return [params(m) for m in self.model[1 : self.bbone_blocks_end_idx]]
 
     def _get_params_neck(self) -> List[List[Parameter]]:
-        return [params(m) for m in self.model[self.num_bbone_blocks :][:-1]]
+        return [params(m) for m in self.model[self.bbone_blocks_end_idx :][:-1]]
 
     def _get_params_bbox_head(self) -> List[List[Parameter]]:
         return params(self.model[-1])

From 56913dd5de15b8f590a4e5eeabb06a6186bb4a9d Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 24 Jul 2021 13:17:06 +0530
Subject: [PATCH 109/122] add `freeze_neck/fpn`

---
 .../multitask/ultralytics/yolov5/arch/model_freezing.py   | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
index 82eeedf6a..dd13c6211 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/model_freezing.py
@@ -155,6 +155,14 @@ def freeze_backbone(self, fpn=True):
             stem=True, bbone_blocks=self.num_bbone_blocks, neck=True if fpn else False
         )
 
+    def freeze_neck(self):
+        "Freeze the FPN/Neck"
+        self.freeze(neck=True)
+
+    def freeze_fpn(self):
+        "Freeze the FPN/Neck"
+        self.freeze_neck()
+
     def unfreeze_backbone(self, fpn=True):
         "Unfreezes the entire backbone, optionally without the neck/fpn"
         self.unfreeze(

From fcf2d3dc63a3b06e5d1bf1bb538ab80f3df98770 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 26 Jul 2021 10:49:31 +0530
Subject: [PATCH 110/122] refactor classification dataloading

---
 .../multitask/data/dataloading_utils.py       | 15 +++++++-
 .../models/multitask/mmdet/dataloaders.py     | 38 ++++++++++---------
 .../ultralytics/yolov5/dataloaders.py         | 38 ++++++++++---------
 3 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
index 2bd2e4b60..9be12ca50 100644
--- a/icevision/models/multitask/data/dataloading_utils.py
+++ b/icevision/models/multitask/data/dataloading_utils.py
@@ -3,11 +3,12 @@
 as it only slightly modifies an existing function.
 """
 
-__all__ = ["unload_records"]
+__all__ = ["unload_records", "assign_classification_targets_from_record"]
 
 
 from icevision.core.record_type import RecordType
 from typing import Any, Dict, Optional, Callable, Sequence, Tuple
+from icevision.core.record_components import ClassificationLabelsRecordComponent
 
 
 def unload_records(
@@ -38,3 +39,15 @@ def inner(records):
         return tupled_output, records
 
     return inner
+
+
+def assign_classification_targets_from_record(classification_labels: dict, record):
+    for comp in record.components:
+        name = comp.task.name
+        if isinstance(comp, ClassificationLabelsRecordComponent):
+            if comp.is_multilabel:
+                labels = comp.one_hot_encoded()
+                classification_labels[name].append(labels)
+            else:
+                labels = comp.label_ids
+                classification_labels[name].extend(labels)
diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index 204f2a92e..e0dfb9112 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -90,15 +90,16 @@ def build_multi_aug_batch(
             classification_data[key]["images"].append(_img_tensor(task))
 
         # Get classification labels for each group
-        for comp in record.components:
-            name = comp.task.name
-            if isinstance(comp, ClassificationLabelsRecordComponent):
-                if comp.is_multilabel:
-                    labels = comp.one_hot_encoded()
-                    classification_labels[name].append(labels)
-                else:
-                    labels = comp.label_ids
-                    classification_labels[name].extend(labels)
+        assign_classification_targets_from_record(classification_labels, record)
+        # for comp in record.components:
+        #     name = comp.task.name
+        #     if isinstance(comp, ClassificationLabelsRecordComponent):
+        #         if comp.is_multilabel:
+        #             labels = comp.one_hot_encoded()
+        #             classification_labels[name].append(labels)
+        #         else:
+        #             labels = comp.label_ids
+        #             classification_labels[name].extend(labels)
 
     # Massage data
     for group in classification_data.values():
@@ -134,15 +135,16 @@ def build_single_aug_batch(records: Sequence[RecordType]):
         bboxes.append(_bboxes(record))
 
         # Loop through and create classifier dict of inputs
-        for comp in record.components:
-            name = comp.task.name
-            if isinstance(comp, ClassificationLabelsRecordComponent):
-                if comp.is_multilabel:
-                    labels = comp.one_hot_encoded()
-                    classification_labels[name].append(labels)
-                else:
-                    labels = comp.label_ids
-                    classification_labels[name].extend(labels)
+        assign_classification_targets_from_record(classification_labels, record)
+        # for comp in record.components:
+        #     name = comp.task.name
+        #     if isinstance(comp, ClassificationLabelsRecordComponent):
+        #         if comp.is_multilabel:
+        #             labels = comp.one_hot_encoded()
+        #             classification_labels[name].append(labels)
+        #         else:
+        #             labels = comp.label_ids
+        #             classification_labels[name].extend(labels)
 
     classification_labels = {k: tensor(v) for k, v in classification_labels.items()}
 
diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index b212cdd09..22ebc0373 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -67,15 +67,16 @@ def build_single_aug_batch(
         detection_targets.append(detection_target)
 
         # Classification
-        for comp in record.components:
-            name = comp.task.name
-            if isinstance(comp, ClassificationLabelsRecordComponent):
-                if comp.is_multilabel:
-                    labels = comp.one_hot_encoded()
-                    classification_targets[name].append(labels)
-                else:
-                    labels = comp.label_ids
-                    classification_targets[name].extend(labels)
+        assign_classification_targets_from_record(classification_targets, record)
+        # for comp in record.components:
+        #     name = comp.task.name
+        #     if isinstance(comp, ClassificationLabelsRecordComponent):
+        #         if comp.is_multilabel:
+        #             labels = comp.one_hot_encoded()
+        #             classification_targets[name].append(labels)
+        #         else:
+        #             labels = comp.label_ids
+        #             classification_targets[name].extend(labels)
 
     classification_targets = {k: tensor(v) for k, v in classification_targets.items()}
 
@@ -149,15 +150,16 @@ def build_multi_aug_batch(
             classification_data[key]["tasks"] = group["tasks"]
             classification_data[key]["images"].append(im2tensor(task.img))
 
-        for comp in record.components:
-            name = comp.task.name
-            if isinstance(comp, ClassificationLabelsRecordComponent):
-                if comp.is_multilabel:
-                    labels = comp.one_hot_encoded()
-                    classification_targets[name].append(labels)
-                else:
-                    labels = comp.label_ids
-                    classification_targets[name].extend(labels)
+        assign_classification_targets_from_record(classification_targets, record)
+        # for comp in record.components:
+        #     name = comp.task.name
+        #     if isinstance(comp, ClassificationLabelsRecordComponent):
+        #         if comp.is_multilabel:
+        #             labels = comp.one_hot_encoded()
+        #             classification_targets[name].append(labels)
+        #         else:
+        #             labels = comp.label_ids
+        #             classification_targets[name].extend(labels)
         record.unload()  # NOTE: Safety mechanism
 
     # Massage data

From 2c34448c74d640bdacb46e0bc82e00e513e0abfd Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 26 Jul 2021 11:04:15 +0530
Subject: [PATCH 111/122] some more refactoring. TODO: test

---
 .../multitask/data/dataloading_utils.py       | 20 ++++++++++++++++++-
 .../models/multitask/mmdet/dataloaders.py     | 15 ++++++++------
 .../ultralytics/yolov5/dataloaders.py         | 15 ++++++++------
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
index 9be12ca50..0665e0a1c 100644
--- a/icevision/models/multitask/data/dataloading_utils.py
+++ b/icevision/models/multitask/data/dataloading_utils.py
@@ -3,12 +3,18 @@
 as it only slightly modifies an existing function.
 """
 
-__all__ = ["unload_records", "assign_classification_targets_from_record"]
+__all__ = [
+    "unload_records",
+    "assign_classification_targets_from_record",
+    "massage_multi_aug_classification_data",
+]
 
 
+import torch
 from icevision.core.record_type import RecordType
 from typing import Any, Dict, Optional, Callable, Sequence, Tuple
 from icevision.core.record_components import ClassificationLabelsRecordComponent
+from torch import tensor
 
 
 def unload_records(
@@ -51,3 +57,15 @@ def assign_classification_targets_from_record(classification_labels: dict, recor
             else:
                 labels = comp.label_ids
                 classification_labels[name].extend(labels)
+
+
+def massage_multi_aug_classification_data(
+    classification_data, classification_targets, target_key: str
+):
+    for group in classification_data.values():
+        group[target_key] = {
+            task: tensor(classification_targets[task]) for task in group["tasks"]
+        }
+        group["images"] = torch.stack(group["images"])
+
+    return {k: dict(v) for k, v in classification_data.items()}
diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index e0dfb9112..6bf6588dc 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -102,12 +102,15 @@ def build_multi_aug_batch(
         #             classification_labels[name].extend(labels)
 
     # Massage data
-    for group in classification_data.values():
-        group["classification_labels"] = {
-            task: tensor(classification_labels[task]) for task in group["tasks"]
-        }
-        group["images"] = torch.stack(group["images"])
-    classification_data = {k: dict(v) for k, v in classification_data.items()}
+    classification_data = massage_multi_aug_classification_data(
+        classification_data, classification_labels, "classification_labels"
+    )
+    # for group in classification_data.values():
+    #     group["classification_labels"] = {
+    #         task: tensor(classification_labels[task]) for task in group["tasks"]
+    #     }
+    #     group["images"] = torch.stack(group["images"])
+    # classification_data = {k: dict(v) for k, v in classification_data.items()}
 
     detection_data = {
         "img": torch.stack(det_images),
diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index 22ebc0373..b679caddc 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -163,12 +163,15 @@ def build_multi_aug_batch(
         record.unload()  # NOTE: Safety mechanism
 
     # Massage data
-    for group in classification_data.values():
-        group["targets"] = {
-            task: tensor(classification_targets[task]) for task in group["tasks"]
-        }
-        group["images"] = torch.stack(group["images"])
-    classification_data = {k: dict(v) for k, v in classification_data.items()}
+    classification_data = massage_multi_aug_classification_data(
+        classification_data, classification_targets, "targets"
+    )
+    # for group in classification_data.values():
+    #     group["targets"] = {
+    #         task: tensor(classification_targets[task]) for task in group["tasks"]
+    #     }
+    #     group["images"] = torch.stack(group["images"])
+    # classification_data = {k: dict(v) for k, v in classification_data.items()}
 
     detection_data = dict(
         images=torch.stack(detection_images, 0),

From 4561be1344a546ce730b3d1aa278f09497a43204 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 26 Jul 2021 19:12:01 +0530
Subject: [PATCH 112/122] refactor logging methods

---
 .../models/multitask/engines/__init__.py      |  0
 .../multitask/engines/lightning/__init__.py   |  1 +
 .../lightning/lightning_model_adapter.py      | 55 +++++++++++++++++
 .../models/multitask/mmdet/pl_adapter.py      | 49 ++-------------
 .../yolov5/lightning/model_adapter.py         | 60 +------------------
 5 files changed, 62 insertions(+), 103 deletions(-)
 create mode 100644 icevision/models/multitask/engines/__init__.py
 create mode 100644 icevision/models/multitask/engines/lightning/__init__.py
 create mode 100644 icevision/models/multitask/engines/lightning/lightning_model_adapter.py

diff --git a/icevision/models/multitask/engines/__init__.py b/icevision/models/multitask/engines/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/engines/lightning/__init__.py b/icevision/models/multitask/engines/lightning/__init__.py
new file mode 100644
index 000000000..9a5eeb3d9
--- /dev/null
+++ b/icevision/models/multitask/engines/lightning/__init__.py
@@ -0,0 +1 @@
+from .lightning_model_adapter import *
diff --git a/icevision/models/multitask/engines/lightning/lightning_model_adapter.py b/icevision/models/multitask/engines/lightning/lightning_model_adapter.py
new file mode 100644
index 000000000..aae6d06e3
--- /dev/null
+++ b/icevision/models/multitask/engines/lightning/lightning_model_adapter.py
@@ -0,0 +1,55 @@
+__all__ = ["MultiTaskLightningModelAdapter"]
+
+import pytorch_lightning as pl
+from icevision.imports import *
+from icevision.metrics import *
+from icevision.engines.lightning import LightningModelAdapter
+from icevision.models.multitask.utils.dtypes import *
+
+
+class MultiTaskLightningModelAdapter(LightningModelAdapter):
+    def compute_and_log_classification_metrics(
+        self,
+        classification_preds: TensorDict,  # activated predictions
+        yb: TensorDict,
+        on_step: bool = False,
+        # prefix: str = "valid",
+    ):
+        if not set(classification_preds.keys()) == set(yb.keys()):
+            raise RuntimeError(
+                f"Mismatch between prediction and target items. Predictions have "
+                f"{classification_preds.keys()} keys and targets have {yb.keys()} keys"
+            )
+        # prefix = f"{prefix}/" if not prefix == "" else ""
+        prefix = "valid/"
+        for (name, metric), (_, preds) in zip(
+            self.classification_metrics.items(), classification_preds.items()
+        ):
+            self.log(
+                f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
+                metric(preds, yb[name].type(torch.int)),
+                on_step=on_step,
+                on_epoch=True,
+            )
+
+    def log_losses(
+        self,
+        mode: str,
+        detection_loss: Tensor,
+        classification_total_loss: Tensor,
+        classification_losses: TensorDict,
+    ):
+        log_vars = dict(
+            total_loss=detection_loss + classification_total_loss,
+            detection_loss=detection_loss,
+            classification_total_loss=classification_total_loss,
+            **{
+                f"classification_loss_{name}": loss
+                for name, loss in classification_losses.items()
+            },
+        )
+        for k, v in log_vars.items():
+            self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
+
+    def validation_epoch_end(self, outs):
+        self.finalize_metrics()
diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index 525a77bcb..8bfc08727 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -3,6 +3,7 @@
 # We could in theory also do `pl.metrics`
 
 # import pytorch_lightning.metrics as tm
+from icevision.models.multitask.utils.prediction import extract_classifier_pred_cfgs
 import torchmetrics as tm
 from icevision.all import *
 from mmcv.utils import ConfigDict
@@ -13,12 +14,12 @@
 )
 from icevision.models.multitask.mmdet.prediction import *
 from icevision.models.multitask.utils.dtypes import *
-
+from icevision.models.multitask.engines.lightning import MultiTaskLightningModelAdapter
 
 __all__ = ["HybridSingleStageDetectorLightningAdapter"]
 
 
-class HybridSingleStageDetectorLightningAdapter(pl.LightningModule, ABC):
+class HybridSingleStageDetectorLightningAdapter(MultiTaskLightningModelAdapter):
     """Lightning module specialized for EfficientDet, with metrics support.
 
     The methods `forward`, `training_step`, `validation_step`, `validation_epoch_end`
@@ -109,52 +110,10 @@ def validation_step(self, batch, batch_idx):
     # ======================== LOGGING METHODS ======================== #
 
     def convert_raw_predictions(self, batch, raw_preds, records):
-        classification_configs = {
-            name: ConfigDict(
-                multilabel=head.multilabel, topk=head.topk, thresh=head.thresh
-            )
-            for name, head in self.model.classifier_heads.items()
-        }
         return convert_raw_predictions(
             batch=batch,
             raw_preds=raw_preds,
             records=records,
             detection_threshold=0.0,
-            classification_configs=classification_configs,
+            classification_configs=extract_classifier_pred_cfgs(self.model),
         )
-
-    def compute_and_log_classification_metrics(
-        self,
-        classification_preds: TensorDict,  # activated predictions
-        yb: TensorDict,
-        on_step: bool = False,
-        # prefix: str = "valid",
-    ):
-        # prefix = f"{prefix}/" if not prefix == "" else ""
-        prefix = "valid/"
-        for (name, metric), (_, preds) in zip(
-            self.classification_metrics.items(), classification_preds.items()
-        ):
-            self.log(
-                f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
-                metric(preds, yb[name].type(torch.int)),
-                on_step=on_step,
-                on_epoch=True,
-            )
-
-    def log_losses(self, log_vars: dict, mode: str):
-        for k, v in log_vars.items():
-            self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
-
-    def validation_epoch_end(self, outs):
-        self.finalize_metrics()
-
-    def accumulate_metrics(self, preds):
-        for metric in self.metrics:
-            metric.accumulate(preds=preds)
-
-    def finalize_metrics(self) -> None:
-        for metric in self.metrics:
-            metric_logs = metric.finalize()
-            for k, v in metric_logs.items():
-                self.log(f"{metric.name}/{k}", v)
diff --git a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
index bd43bb99a..233e7c9b8 100644
--- a/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
+++ b/icevision/models/multitask/ultralytics/yolov5/lightning/model_adapter.py
@@ -18,10 +18,11 @@
     convert_raw_predictions,
 )
 from icevision.models.multitask.utils.model import ForwardType
+from icevision.models.multitask.engines.lightning import MultiTaskLightningModelAdapter
 from yolov5.utils.loss import ComputeLoss
 
 
-class HybridYOLOV5LightningAdapter(pl.LightningModule, ABC):
+class HybridYOLOV5LightningAdapter(MultiTaskLightningModelAdapter):
     """ """
 
     def __init__(
@@ -140,60 +141,3 @@ def validation_step(self, batch, batch_idx):
         self.log_losses(
             "valid", detection_loss, total_classification_loss, classification_losses
         )
-
-    def validation_epoch_end(self, outs):
-        self.finalize_metrics()
-
-    # ======================== LOGGING METHODS ======================== #
-    def compute_and_log_classification_metrics(
-        self,
-        classification_preds: TensorDict,  # activated predictions
-        yb: TensorDict,
-        on_step: bool = False,
-        # prefix: str = "valid",
-    ):
-        if not set(classification_preds.keys()) == set(yb.keys()):
-            raise RuntimeError(
-                f"Mismatch between prediction and target items. Predictions have "
-                f"{classification_preds.keys()} keys and targets have {yb.keys()} keys"
-            )
-        # prefix = f"{prefix}/" if not prefix == "" else ""
-        prefix = "valid/"
-        for (name, metric), (_, preds) in zip(
-            self.classification_metrics.items(), classification_preds.items()
-        ):
-            self.log(
-                f"{prefix}{metric.__class__.__name__.lower()}_{name}",  # accuracy_{task_name}
-                metric(preds, yb[name].type(torch.int)),
-                on_step=on_step,
-                on_epoch=True,
-            )
-
-    def log_losses(
-        self,
-        mode: str,
-        detection_loss: Tensor,
-        classification_total_loss: Tensor,
-        classification_losses: TensorDict,
-    ):
-        log_vars = dict(
-            total_loss=detection_loss + classification_total_loss,
-            detection_loss=detection_loss,
-            classification_total_loss=classification_total_loss,
-            **{
-                f"classification_loss_{name}": loss
-                for name, loss in classification_losses.items()
-            },
-        )
-        for k, v in log_vars.items():
-            self.log(f"{mode}/{k}", v.item() if isinstance(v, torch.Tensor) else v)
-
-    def accumulate_metrics(self, preds):
-        for metric in self.metrics:
-            metric.accumulate(preds=preds)
-
-    def finalize_metrics(self) -> None:
-        for metric in self.metrics:
-            metric_logs = metric.finalize()
-            for k, v in metric_logs.items():
-                self.log(f"{metric.name}/{k}", v)

From 72092cda2503010c5f24c1c9cd7b207a88f9f3d7 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 27 Jul 2021 12:39:14 +0530
Subject: [PATCH 113/122] add speedup

---
 .../multitask/engines/lightning/lightning_model_adapter.py    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/icevision/models/multitask/engines/lightning/lightning_model_adapter.py b/icevision/models/multitask/engines/lightning/lightning_model_adapter.py
index aae6d06e3..e3eb0424c 100644
--- a/icevision/models/multitask/engines/lightning/lightning_model_adapter.py
+++ b/icevision/models/multitask/engines/lightning/lightning_model_adapter.py
@@ -53,3 +53,7 @@ def log_losses(
 
     def validation_epoch_end(self, outs):
         self.finalize_metrics()
+
+    # Modest speedup (See https://pytorch-lightning.readthedocs.io/en/stable/benchmarking/performance.html#zero-grad-set-to-none-true)
+    def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx):
+        optimizer.zero_grad(set_to_none=True)

From 718fb52aca2c7d124d72b7ada5e0070cc6d601fe Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 28 Jul 2021 05:23:54 +0530
Subject: [PATCH 114/122] move stuff around

---
 icevision/models/multitask/mmdet/dataloaders.py     |  5 +----
 icevision/models/multitask/mmdet/dtypes.py          |  8 ++++++++
 icevision/models/multitask/mmdet/fastai/__init__.py |  0
 .../models/multitask/mmdet/lightning/__init__.py    |  0
 icevision/models/multitask/mmdet/models/__init__.py |  0
 icevision/models/multitask/mmdet/pl_adapter.py      | 13 +------------
 icevision/models/multitask/utils/dtypes.py          |  6 ------
 7 files changed, 10 insertions(+), 22 deletions(-)
 create mode 100644 icevision/models/multitask/mmdet/dtypes.py
 create mode 100644 icevision/models/multitask/mmdet/fastai/__init__.py
 create mode 100644 icevision/models/multitask/mmdet/lightning/__init__.py
 create mode 100644 icevision/models/multitask/mmdet/models/__init__.py

diff --git a/icevision/models/multitask/mmdet/dataloaders.py b/icevision/models/multitask/mmdet/dataloaders.py
index 6bf6588dc..e2f2b02d3 100644
--- a/icevision/models/multitask/mmdet/dataloaders.py
+++ b/icevision/models/multitask/mmdet/dataloaders.py
@@ -2,10 +2,7 @@
 from icevision.imports import *
 from icevision.core import *
 from icevision.models.multitask.utils.dtypes import *
-from icevision.models.multitask.utils.dtypes import (
-    DataDictClassification,  # Not imported in __all__ as they are mmdet specific
-    DataDictDetection,
-)
+from icevision.models.multitask.mmdet.dtypes import *
 from icevision.models.mmdet.common.utils import convert_background_from_zero_to_last
 from icevision.models.utils import unload_records
 from icevision.models.mmdet.common.bbox.dataloaders import (
diff --git a/icevision/models/multitask/mmdet/dtypes.py b/icevision/models/multitask/mmdet/dtypes.py
new file mode 100644
index 000000000..feac21d0b
--- /dev/null
+++ b/icevision/models/multitask/mmdet/dtypes.py
@@ -0,0 +1,8 @@
+from icevision.imports import *
+from icevision.models.multitask.utils.dtypes import *
+
+ClassificationGroupDataDict = Dict[str, Union[List[str], Tensor, TensorDict]]
+DataDictClassification = Dict[str, ClassificationGroupDataDict]
+DataDictDetection = Union[
+    TensorDict, ArrayDict, Dict[str, Union[Tuple[int], ImgMetadataDict]]
+]
diff --git a/icevision/models/multitask/mmdet/fastai/__init__.py b/icevision/models/multitask/mmdet/fastai/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/mmdet/lightning/__init__.py b/icevision/models/multitask/mmdet/lightning/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/mmdet/models/__init__.py b/icevision/models/multitask/mmdet/models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index 8bfc08727..a795c88e4 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -20,18 +20,7 @@
 
 
 class HybridSingleStageDetectorLightningAdapter(MultiTaskLightningModelAdapter):
-    """Lightning module specialized for EfficientDet, with metrics support.
-
-    The methods `forward`, `training_step`, `validation_step`, `validation_epoch_end`
-    are already overriden.
-
-    # Arguments
-        model: The pytorch model to use.
-        metrics: `Sequence` of metrics to use.
-
-    # Returns
-        A `LightningModule`.
-    """
+    """"""
 
     def __init__(
         self,
diff --git a/icevision/models/multitask/utils/dtypes.py b/icevision/models/multitask/utils/dtypes.py
index da4012ec8..bc305fa6f 100644
--- a/icevision/models/multitask/utils/dtypes.py
+++ b/icevision/models/multitask/utils/dtypes.py
@@ -18,9 +18,3 @@
 TensorTuple = Tuple[Tensor]
 ArrayList = List[np.ndarray]
 ArrayDict = Dict[str, np.ndarray]
-
-ClassificationGroupDataDict = Dict[str, Union[List[str], Tensor, TensorDict]]
-DataDictClassification = Dict[str, ClassificationGroupDataDict]
-DataDictDetection = Union[
-    TensorDict, ArrayDict, Dict[str, Union[Tuple[int], ImgMetadataDict]]
-]

From c2b958c100bd8ae3ccdf468bb6cc7bf11932a71e Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Wed, 28 Jul 2021 05:45:42 +0530
Subject: [PATCH 115/122] remove debug mode

---
 icevision/models/multitask/mmdet/pl_adapter.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/icevision/models/multitask/mmdet/pl_adapter.py b/icevision/models/multitask/mmdet/pl_adapter.py
index a795c88e4..8b2e7d831 100644
--- a/icevision/models/multitask/mmdet/pl_adapter.py
+++ b/icevision/models/multitask/mmdet/pl_adapter.py
@@ -22,16 +22,10 @@
 class HybridSingleStageDetectorLightningAdapter(MultiTaskLightningModelAdapter):
     """"""
 
-    def __init__(
-        self,
-        model: HybridSingleStageDetector,
-        metrics: List[Metric] = None,
-        debug: bool = False,
-    ):
+    def __init__(self, model: HybridSingleStageDetector, metrics: List[Metric] = None):
         super().__init__()
         self.metrics = metrics or []
         self.model = model
-        self.debug = debug
 
         self.classification_metrics = nn.ModuleDict()
         for name, head in model.classifier_heads.items():
@@ -54,16 +48,12 @@ def forward(self, *args, **kwargs):
     def training_step(self, batch: Tuple[dict, Sequence[RecordType]], batch_idx):
         # Unpack batch into dict + list of records
         data, samples = batch
+
         # Get model outputs - dict of losses and vars to log
         step_type = ForwardType.TRAIN_MULTI_AUG
         if "img_metas" in data.keys():
             step_type = ForwardType.TRAIN
 
-        if self.debug:
-            logger.info(f"Training Step: {data.keys()}")
-            logger.info(f"Batch Idx: {batch_idx}")
-            logger.info(f"Training Mode: {step_type}")
-
         outputs = self.model.train_step(data=data, step_type=step_type)
 
         # Log losses

From 5b50c4bfef96d21210a4f9a5dc7302b173411a97 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 30 Jul 2021 10:28:00 +0530
Subject: [PATCH 116/122] temp `im2tensor` patch while we discuss on Discord

---
 icevision/imports.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/icevision/imports.py b/icevision/imports.py
index 1f19a4669..6ee9b658d 100644
--- a/icevision/imports.py
+++ b/icevision/imports.py
@@ -51,7 +51,7 @@
     CosineAnnealingWarmRestarts,
 )
 
-from torchvision.transforms.functional import to_tensor as im2tensor
+from torchvision.transforms.functional import to_tensor
 
 from loguru import logger
 
@@ -92,3 +92,14 @@ def __str__(self):
 
     def __repr__(self):
         return str(self)
+
+
+def im2tensor(pic: Union[np.ndarray, PIL.Image.Image, torch.Tensor]):
+    if isinstance(pic, torch.Tensor):
+        return pic
+    elif isinstance(pic, (np.ndarray, PIL.Image.Image)):
+        return to_tensor(pic)
+    else:
+        raise TypeError(
+            f"Expected {np.ndarray} | {PIL.Image.Image} | {torch.Tensor}, got {type(pic)}"
+        )

From e33f9a1d44772d6fd8a6089dee98fa1af97e6367 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 30 Jul 2021 10:28:19 +0530
Subject: [PATCH 117/122] vastly simplified pipeline

---
 icevision/models/multitask/data/dataset.py | 62 +++++++++-------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index e74aefc47..3ffa085f4 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -130,16 +130,16 @@ def __init__(
         records: List[dict],
         classification_transforms_groups: dict,
         detection_transforms: Optional[tfms.Transform] = None,
-        norm_mean: Collection[float] = [0.485, 0.456, 0.406],
-        norm_std: Collection[float] = [0.229, 0.224, 0.225],
+        # norm_mean: Collection[float] = [0.485, 0.456, 0.406],
+        # norm_std: Collection[float] = [0.229, 0.224, 0.225],
         debug: bool = False,
     ):
         "Return `PIL.Image` when `debug=True`"
         self.records = records
         self.classification_transforms_groups = classification_transforms_groups
         self.detection_transforms = detection_transforms
-        self.norm_mean = norm_mean
-        self.norm_std = norm_std
+        # self.norm_mean = norm_mean
+        # self.norm_std = norm_std
         self.debug = debug
         self.validate()
 
@@ -183,50 +183,36 @@ def __getitem__(self, i):
         record = self.load_record(i)
 
         # Keep a copy of the orig img as it gets modified by albu
-        original_img = deepcopy(record.img)
-        if isinstance(original_img, np.ndarray):
-            original_img = PIL.Image.fromarray(original_img)
+        original_img: PIL.Image.Image = deepcopy(record.img)
 
         # Do detection transform and assign it to the detection task
         if self.detection_transforms is not None:
             record = self.detection_transforms(record)
-
-        record.add_component(ImageRecordComponent(Task("detection")))
-        record.detection.set_img(record.img)
+            record.add_component(ImageRecordComponent(Task("detection")))
+            record.detection.set_img(record.img)
 
         if self.debug:
             print(f"Fetching Item #{i}")
 
         # Do classification transforms
-        for group in self.classification_transforms_groups.values():
-            img_tfms = group["transforms"]
-            tfmd_img = img_tfms(original_img)
-            if self.debug:
-                print(f"  Group: {group['tasks']}, ID: {id(tfmd_img)}")
-
-            # NOTE:
-            # Setting the same img twice (to diff parts in memory) but it's ok cuz we will unload the record later
-            for task in group["tasks"]:
-                # record.add_component(ImageRecordComponent(Task(task))) # TODO FIXME: This throws a weird error idk why
-                comp = getattr(record, task)
-                comp.add_component(ImageRecordComponent())
-                comp.set_img(tfmd_img)
+        if self.classification_transforms_groups is not None:
+            for group in self.classification_transforms_groups.values():
+                img_tfms = group["transforms"]
+                tfmd_img = img_tfms(original_img)
                 if self.debug:
-                    print(f"   - Task: {task}, ID: {id(tfmd_img)}")
-
-        # This is a bit verbose, but allows us to return PIL images for easy debugging.
-        # Else, it returns normalized numpy arrays, like usual icevision datasets
-        for comp in record.components:
-            if isinstance(comp, ImageRecordComponent):
-                # Convert to `np.ndarray` if it isn't already
-                if isinstance(comp.img, PIL.Image.Image):
-                    comp.set_img(np.array(comp.img))
-                if self.debug:  # for debugging only
-                    comp.set_img(PIL.Image.fromarray(comp.img))
-                else:
-                    comp.set_img(
-                        normalize(comp.img, mean=self.norm_mean, std=self.norm_std)
-                    )
+                    print(f"  Group: {group['tasks']}, ID: {id(tfmd_img)}")
+
+                # NOTE:
+                # Setting the same img twice (to diff parts in memory) but it's ok cuz we will unload the record later
+                for task in group["tasks"]:
+                    # TODO FIXME: This adds a component but doesn't display it when printing
+                    # Also, doing `set_img` overrides the base  `record.img`
+                    # record.add_component(ImageRecordComponent(Task(task)))
+                    comp = getattr(record, task)
+                    comp.add_component(ImageRecordComponent())
+                    comp.set_img(tfmd_img)
+                    if self.debug:
+                        print(f"   - Task: {task}, ID: {id(tfmd_img)}")
 
         return record
 

From 2539faf8fb1ae028d4aa1014e1a62aead32fc5c9 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 30 Jul 2021 10:40:00 +0530
Subject: [PATCH 118/122] patch `set_img` to include `torch.Tensor`

---
 icevision/core/record_components.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/icevision/core/record_components.py b/icevision/core/record_components.py
index 911bb1d1f..3df79ab25 100644
--- a/icevision/core/record_components.py
+++ b/icevision/core/record_components.py
@@ -111,14 +111,16 @@ def __init__(self, task=tasks.common):
         super().__init__(task=task)
         self.img = None
 
-    def set_img(self, img: Union[PIL.Image.Image, np.ndarray]):
-        assert isinstance(img, (PIL.Image.Image, np.ndarray))
+    def set_img(self, img: Union[PIL.Image.Image, np.ndarray, torch.Tensor]):
+        assert isinstance(img, (PIL.Image.Image, np.ndarray, torch.Tensor))
         self.img = img
         if isinstance(img, PIL.Image.Image):
             height, width = img.shape
         elif isinstance(img, np.ndarray):
             # else:
             height, width, _ = self.img.shape
+        elif isinstance(img, torch.Tensor):
+            _, height, width = self.img.shape
         # this should set on SizeRecordComponent
         self.composite.set_img_size(ImgSize(width=width, height=height), original=True)
 

From 6b17cb89f8d6b51d257cd36ee6f309c203fae0c7 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Fri, 30 Jul 2021 13:25:27 +0530
Subject: [PATCH 119/122] proper tfm dispatching

---
 icevision/models/multitask/data/dataset.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/data/dataset.py b/icevision/models/multitask/data/dataset.py
index 3ffa085f4..67c8cab61 100644
--- a/icevision/models/multitask/data/dataset.py
+++ b/icevision/models/multitask/data/dataset.py
@@ -7,6 +7,7 @@
 
 import icevision.tfms as tfms
 import torchvision.transforms as Tfms
+import albumentations as A
 
 __all__ = ["HybridAugmentationsRecordDataset", "RecordDataset"]
 
@@ -179,6 +180,19 @@ def load_record(self, i: int):
         """
         return self.records[i].load()
 
+    @staticmethod
+    def dispatch_classification_tfms(
+        tfm: Union[A.Compose, Tfms.Compose], image: PIL.Image.Image
+    ):
+        "Dispatch albu / torchvision transforms with appropriate inp / out formats"
+        assert isinstance(image, PIL.Image.Image)
+        if isinstance(tfm, A.Compose):
+            return tfm(image=np.array(image))["image"]
+        elif isinstance(tfm, Tfms.Compose):
+            return tfm(image)
+        else:
+            raise TypeError(f"Only Albu | Torchvision transforms supported")
+
     def __getitem__(self, i):
         record = self.load_record(i)
 
@@ -198,7 +212,7 @@ def __getitem__(self, i):
         if self.classification_transforms_groups is not None:
             for group in self.classification_transforms_groups.values():
                 img_tfms = group["transforms"]
-                tfmd_img = img_tfms(original_img)
+                tfmd_img = self.dispatch_classification_tfms(img_tfms, original_img)
                 if self.debug:
                     print(f"  Group: {group['tasks']}, ID: {id(tfmd_img)}")
 

From 4ebc006b4f8ba4dfed7e5b24dceb34c50d875332 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Sat, 31 Jul 2021 14:12:34 +0530
Subject: [PATCH 120/122] cleanup

---
 .../ultralytics/yolov5/dataloaders.py         | 31 ++-----------------
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
index b679caddc..405576dc8 100644
--- a/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
+++ b/icevision/models/multitask/ultralytics/yolov5/dataloaders.py
@@ -63,20 +63,9 @@ def build_single_aug_batch(
         # See file header for more info on why this is done
         if detection_target.numel() > 0:
             detection_target[:, 0] = i
-        # detection_target[:, 0] = i if detection_target.numel() > 0 else None
-        detection_targets.append(detection_target)
 
-        # Classification
+        detection_targets.append(detection_target)
         assign_classification_targets_from_record(classification_targets, record)
-        # for comp in record.components:
-        #     name = comp.task.name
-        #     if isinstance(comp, ClassificationLabelsRecordComponent):
-        #         if comp.is_multilabel:
-        #             labels = comp.one_hot_encoded()
-        #             classification_targets[name].append(labels)
-        #         else:
-        #             labels = comp.label_ids
-        #             classification_targets[name].extend(labels)
 
     classification_targets = {k: tensor(v) for k, v in classification_targets.items()}
 
@@ -142,36 +131,20 @@ def build_multi_aug_batch(
         # See file header for more info on why this is done
         if detection_target.numel() > 0:
             detection_target[:, 0] = i
-        # detection_target[:, 0] = i if detection_target.numel() > 0 else None
-        detection_targets.append(detection_target)
 
+        detection_targets.append(detection_target)
         for key, group in classification_transform_groups.items():
             task = getattr(record, group["tasks"][0])
             classification_data[key]["tasks"] = group["tasks"]
             classification_data[key]["images"].append(im2tensor(task.img))
 
         assign_classification_targets_from_record(classification_targets, record)
-        # for comp in record.components:
-        #     name = comp.task.name
-        #     if isinstance(comp, ClassificationLabelsRecordComponent):
-        #         if comp.is_multilabel:
-        #             labels = comp.one_hot_encoded()
-        #             classification_targets[name].append(labels)
-        #         else:
-        #             labels = comp.label_ids
-        #             classification_targets[name].extend(labels)
         record.unload()  # NOTE: Safety mechanism
 
     # Massage data
     classification_data = massage_multi_aug_classification_data(
         classification_data, classification_targets, "targets"
     )
-    # for group in classification_data.values():
-    #     group["targets"] = {
-    #         task: tensor(classification_targets[task]) for task in group["tasks"]
-    #     }
-    #     group["images"] = torch.stack(group["images"])
-    # classification_data = {k: dict(v) for k, v in classification_data.items()}
 
     detection_data = dict(
         images=torch.stack(detection_images, 0),

From 8cc54743f79f24d19464289b35584256d586245c Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Mon, 9 Aug 2021 20:12:58 +0530
Subject: [PATCH 121/122] remove unused arg

---
 icevision/models/multitask/utils/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icevision/models/multitask/utils/model.py b/icevision/models/multitask/utils/model.py
index a8f46f359..710313bc5 100644
--- a/icevision/models/multitask/utils/model.py
+++ b/icevision/models/multitask/utils/model.py
@@ -15,8 +15,8 @@ class ForwardType(Enum):
     # EXPORT_COREML = 7
 
 
-# Taken from from https://github.com/fastai/fastai/blob/4decc673ba811a41c6e3ab648aab96dd27244ff7/fastai/callback/training.py#L43-L49
-def set_bn_eval(m: nn.Module, use_eval=True) -> None:
+# Modified from from https://github.com/fastai/fastai/blob/4decc673ba811a41c6e3ab648aab96dd27244ff7/fastai/callback/training.py#L43-L49
+def set_bn_eval(m: nn.Module) -> None:
     "Set bn layers in eval mode for all recursive, non-trainable children of `m`."
     for l in m.children():
         if isinstance(l, _BatchNorm) and not next(l.parameters()).requires_grad:

From d14a453c8a53a6ad72f5364cba4a65f8878e0091 Mon Sep 17 00:00:00 2001
From: Rahul Somani <rsomani95@gmail.com>
Date: Tue, 10 Aug 2021 13:29:15 +0530
Subject: [PATCH 122/122] auto calculate fpn dims

---
 .../models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
index 10f1b983d..c4e0f2c6e 100644
--- a/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
+++ b/icevision/models/multitask/ultralytics/yolov5/arch/yolo_hybrid.py
@@ -199,7 +199,10 @@ def build_classification_modules(self, verbose: bool = True):
               correct for each classifier config, and corrects them if not
         """
         arch = Path(self.yaml_file).stem
-        fpn_dims = np.array(YOLO_FEATURE_MAP_DIMS[arch])
+        # fpn_dims = np.array(YOLO_FEATURE_MAP_DIMS[arch])
+        fpn_dims = [
+            o.shape[1] for o in self.extract_features(torch.rand(1, 3, 640, 640))
+        ]
 
         for task, cfg in self.classifier_configs.items():
             num_fpn_features = (