airctic · rsomani95 · May 31, 2021 · May 31, 2021 · May 31, 2021 · May 31, 2021
diff --git a/icevision/core/record_components.py b/icevision/core/record_components.py
@@ -111,14 +111,16 @@ def __init__(self, task=tasks.common):
         super().__init__(task=task)
         self.img = None
 
-    def set_img(self, img: Union[PIL.Image.Image, np.ndarray]):
-        assert isinstance(img, (PIL.Image.Image, np.ndarray))
+    def set_img(self, img: Union[PIL.Image.Image, np.ndarray, torch.Tensor]):
+        assert isinstance(img, (PIL.Image.Image, np.ndarray, torch.Tensor))
         self.img = img
         if isinstance(img, PIL.Image.Image):
             height, width = img.shape
         elif isinstance(img, np.ndarray):
             # else:
             height, width, _ = self.img.shape
+        elif isinstance(img, torch.Tensor):
+            _, height, width = self.img.shape
         # this should set on SizeRecordComponent
         self.composite.set_img_size(ImgSize(width=width, height=height), original=True)
 

diff --git a/icevision/imports.py b/icevision/imports.py
@@ -51,7 +51,7 @@
     CosineAnnealingWarmRestarts,
 )
 
-from torchvision.transforms.functional import to_tensor as im2tensor
+from torchvision.transforms.functional import to_tensor
 
 from loguru import logger
 
@@ -92,3 +92,14 @@ def __str__(self):
 
     def __repr__(self):
         return str(self)
+
+
+def im2tensor(pic: Union[np.ndarray, PIL.Image.Image, torch.Tensor]):
+    if isinstance(pic, torch.Tensor):
+        return pic
+    elif isinstance(pic, (np.ndarray, PIL.Image.Image)):
+        return to_tensor(pic)
+    else:
+        raise TypeError(
+            f"Expected {np.ndarray} | {PIL.Image.Image} | {torch.Tensor}, got {type(pic)}"
+        )
diff --git a/icevision/models/multitask/__init__.py b/icevision/models/multitask/__init__.py
diff --git a/icevision/models/multitask/classification_heads/__init__.py b/icevision/models/multitask/classification_heads/__init__.py
@@ -0,0 +1,2 @@
+from .builder import *
+from .head import *
diff --git a/icevision/models/multitask/classification_heads/builder.py b/icevision/models/multitask/classification_heads/builder.py
@@ -0,0 +1,47 @@
+from typing import Dict
+from .head import CLASSIFICATION_HEADS, ImageClassificationHead, ClassifierConfig
+import torch.nn as nn
+
+__all__ = ["build_classifier_heads", "build_classifier_heads_from_configs"]
+
+# Enter dict of dicts as `cfg`
+def build_classifier_heads(configs: Dict[str, Dict[str, dict]]) -> nn.ModuleDict:
+    """
+    Build classification head from a config which is a dict of dicts.
+    A head is created for each key in the input dictionary.
+
+    Expected to be used with `mmdet` models as it uses the
+    `CLASSIFICATION_HEADS` registry internally
+
+    Returns:
+        a `nn.ModuleDict()` mapping keys from `configs` to classifier heads
+    """
+    heads = nn.ModuleDict()
+    # if configs is not None:
+    for name, config in configs.items():
+        head = CLASSIFICATION_HEADS.build(config)
+        heads.update({name: head})
+    return heads
+
+
+def build_classifier_heads_from_configs(
+    configs: Dict[str, ClassifierConfig] = None
+) -> nn.ModuleDict:
+    """
+    Build a `nn.ModuleDict` of `ImageClassificationHead`s from a list of `ClassifierConfig`s
+    """
+    if configs is None:
+        return nn.ModuleDict()
+
+    assert isinstance(configs, dict), f"Expected a `dict`, got {type(configs)}"
+    if not all(isinstance(cfg, ClassifierConfig) for cfg in configs.values()):
+        raise ValueError(
+            f"Expected a `list` of `ClassifierConfig`s \n"
+            f"Either one or more elements in the list are not of type `ClassifierConfig`"
+        )
+
+    heads = nn.ModuleDict()
+    for name, config in configs.items():
+        head = ImageClassificationHead.from_config(config)
+        heads.update({name: head})
+    return heads
diff --git a/icevision/models/multitask/classification_heads/head.py b/icevision/models/multitask/classification_heads/head.py
@@ -0,0 +1,216 @@
+# Hacked together by Rahul & Farid
+
+from mmcv.cnn import MODELS as MMCV_MODELS
+from mmcv.utils import Registry
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from typing import List, Union, Optional, Dict
+from torch import Tensor
+from functools import partial
+from collections import namedtuple
+from dataclasses import dataclass
+
+TensorList = List[Tensor]
+TensorDict = Dict[str, Tensor]
+
+MODELS = Registry("models", parent=MMCV_MODELS)
+CLASSIFICATION_HEADS = MODELS
+
+__all__ = ["ImageClassificationHead", "ClassifierConfig"]
+
+
+class Passthrough(nn.Module):
+    def forward(self, x):
+        return x
+
+
+"""
+`ClassifierConfig` is useful to instantiate `ImageClassificationHead`
+in different settings. If using `mmdet`, we don't use this as the config
+is then a regular dictionary.
+
+When using yolov5, we can easily pass around this config to create the model
+Often, it'll be used inside a dictionary of configs
+"""
+
+
+@dataclass
+class ClassifierConfig:
+    # classifier_name: str
+    out_classes: int
+    num_fpn_features: int = 512
+    fpn_keys: Union[List[str], List[int], None] = None
+    dropout: Optional[float] = 0.2
+    pool_inputs: bool = True
+    # Loss function args
+    loss_func: Optional[nn.Module] = None
+    activation: Optional[nn.Module] = None
+    multilabel: bool = False
+    loss_func_wts: Optional[Tensor] = None
+    loss_weight: float = 1.0
+    # Post activation processing
+    thresh: Optional[float] = None
+    topk: Optional[int] = None
+
+    def __post_init__(self):
+        if isinstance(self.fpn_keys, int):
+            self.fpn_keys = [self.fpn_keys]
+
+        if self.loss_func_wts is not None:
+            if not self.multilabel:
+                self.loss_func_wts = self.loss_func_wts.to(torch.float32)
+            if torch.cuda.is_available():
+                self.loss_func_wts = self.loss_func_wts.cuda()
+
+        if self.multilabel:
+            if self.topk is None and self.thresh is None:
+                self.thresh = 0.5
+        else:
+            if self.topk is None and self.thresh is None:
+                self.topk = 1
+
+
+@CLASSIFICATION_HEADS.register_module(name="ImageClassificationHead")
+class ImageClassificationHead(nn.Module):
+    """
+    Image classification head that optionally takes `fpn_keys` features from
+    an FPN, average pools and concatenates them into a single tensor
+    of shape `num_features` and then runs a linear layer to `out_classes
+
+    fpn_features: [List[Tensor]] => AvgPool => Flatten => Linear`
+
+    Also includes `compute_loss` to match the design of other
+    components of object detection systems.
+    To use your own loss function, pass it into `loss_func`.
+    If `loss_func` is None (by default), we create one based on other args:
+    If `multilabel` is true, one-hot encoded targets are expected and
+    nn.BCEWithLogitsLoss is used, else nn.CrossEntropyLoss is used
+    and targets are expected to be integers
+    NOTE: Not all loss function args are exposed
+    """
+
+    def __init__(
+        self,
+        out_classes: int,
+        num_fpn_features: int,
+        fpn_keys: Union[List[str], List[int], None] = None,
+        dropout: Optional[float] = 0.2,
+        pool_inputs: bool = True,  # ONLY for advanced use cases where input feature maps are already pooled
+        # Loss function args
+        loss_func: Optional[nn.Module] = None,
+        activation: Optional[nn.Module] = None,
+        multilabel: bool = False,
+        loss_func_wts: Optional[Tensor] = None,
+        loss_weight: float = 1.0,
+        # Final postprocessing args
+        thresh: Optional[float] = None,
+        topk: Optional[int] = None,
+    ):
+        super().__init__()
+
+        # Setup loss function & activation
+        self.multilabel = multilabel
+        self.loss_func, self.loss_func_wts, self.loss_weight = (
+            loss_func,
+            loss_func_wts,
+            loss_weight,
+        )
+        self.activation = activation
+        self.pool_inputs = pool_inputs
+        self.thresh, self.topk = thresh, topk
+
+        # Setup head
+        self.fpn_keys = fpn_keys
+
+        layers = [
+            nn.Dropout(dropout) if dropout else Passthrough(),
+            nn.Linear(num_fpn_features, out_classes),
+        ]
+        layers.insert(0, nn.Flatten(1)) if self.pool_inputs else None
+        self.classifier = nn.Sequential(*layers)
+
+        self.setup_loss_function()
+        self.setup_postprocessing()
+
+    def setup_postprocessing(self):
+        if self.multilabel:
+            if self.topk is None and self.thresh is None:
+                self.thresh = 0.5
+        else:
+            if self.topk is None and self.thresh is None:
+                self.topk = 1
+
+    def setup_loss_function(self):
+        if self.loss_func is None:
+            if self.multilabel:
+                self.loss_func = nn.BCEWithLogitsLoss(pos_weight=self.loss_func_wts)
+                # self.loss_func = partial(
+                #     F.binary_cross_entropy_with_logits, pos_weight=self.loss_func_wts
+                # )
+                self.activation = nn.Sigmoid()
+                # self.activation = torch.sigmoid  # nn.Sigmoid()
+            else:
+                # self.loss_func = nn.CrossEntropyLoss(self.loss_func_wts)
+                self.loss_func = nn.CrossEntropyLoss(weight=self.loss_func_wts)
+                # self.loss_func = partial(F.cross_entropy, weight=self.loss_func_wts)
+                self.activation = nn.Softmax(-1)
+                # self.activation = partial(F.softmax, dim=-1)  # nn.Softmax(-1)
+
+    @classmethod
+    def from_config(cls, config: ClassifierConfig):
+        return cls(**config.__dict__)
+
+    # TODO: Make it run with regular features as well
+    def forward(self, features: Union[Tensor, TensorDict, TensorList]):
+        """
+        Sequence of outputs from an FPN or regular feature extractor
+        => Avg. Pool each into 1 dimension
+        => Concatenate into single tensor
+        => Linear layer -> output classes
+
+        If `self.fpn_keys` is specified, it grabs the specific (int|str) indices from
+        `features` for the pooling layer, else it takes _all_ of them
+        """
+        if isinstance(features, (list, dict, tuple)):
+            # Grab specific features if specified
+            if self.fpn_keys is not None:
+                pooled_features = [
+                    F.adaptive_avg_pool2d(features[k], 1) for k in self.fpn_keys
+                ]
+            # If no `fpn_keys` exist, concat all the feature maps (could be expensive)
+            else:
+                pooled_features = [F.adaptive_avg_pool2d(feat, 1) for feat in features]
+            pooled_features = torch.cat(pooled_features, dim=1)
+
+        # If doing regular (non-FPN) feature extraction, we don't need `fpn_keys` and
+        # just avg. pool the last layer's features
+        elif isinstance(features, Tensor):
+            pooled_features = (
+                F.adaptive_avg_pool2d(features, 1) if self.pool_inputs else features
+            )
+        else:
+            raise TypeError(
+                f"Expected TensorList|TensorDict|Tensor|tuple, got {type(features)}"
+            )
+
+        return self.classifier(pooled_features)
+
+    # TorchVision style API
+    def compute_loss(self, predictions, targets):
+        return self.loss_weight * self.loss_func(predictions, targets)
+
+    def postprocess(self, predictions):
+        return self.activation(predictions)
+
+    # MMDet style API
+    def forward_train(self, x, gt_label) -> Tensor:
+        preds = self(x)
+        return self.loss_weight * self.loss_func(preds, gt_label)
+
+    def forward_activate(self, x):
+        "Run forward pass with activation function"
+        x = self(x)
+        return self.activation(x)
diff --git a/icevision/models/multitask/data/__init__.py b/icevision/models/multitask/data/__init__.py
diff --git a/icevision/models/multitask/data/dataloading_utils.py b/icevision/models/multitask/data/dataloading_utils.py
@@ -0,0 +1,71 @@
+"""
+This may be a temporary file that may eventually be removed,
+as it only slightly modifies an existing function.
+"""
+
+__all__ = [
+    "unload_records",
+    "assign_classification_targets_from_record",
+    "massage_multi_aug_classification_data",
+]
+
+
+import torch
+from icevision.core.record_type import RecordType
+from typing import Any, Dict, Optional, Callable, Sequence, Tuple
+from icevision.core.record_components import ClassificationLabelsRecordComponent
+from torch import tensor
+
+
+def unload_records(
+    build_batch: Callable, build_batch_kwargs: Optional[Dict] = None
+) -> Tuple[Tuple[Any, ...], Sequence[RecordType]]:
+    """
+    This decorator function unloads records to not carry them around after batch creation.
+      It also optionally accepts `build_batch_kwargs` that are to be passed into
+      `build_batch`. These aren't accepted as keyword arguments as those are reserved
+      for PyTorch's DataLoader class which is used later in this chain of function calls
+
+    Args:
+        build_batch (Callable): A collate function that describes how to mash records
+                                into a batch of inputs for a model
+        build_batch_kwargs (Optional[Dict], optional): Keyword arguments to pass into
+                                                       `build_batch`. Defaults to None.
+
+    Returns:
+        Tuple[Tuple[Any, ...], Sequence[RecordType]]: [description]
+    """
+    build_batch_kwargs = build_batch_kwargs or {}
+    assert isinstance(build_batch_kwargs, dict)
+
+    def inner(records):
+        tupled_output, records = build_batch(records, **build_batch_kwargs)
+        for record in records:
+            record.unload()
+        return tupled_output, records
+
+    return inner
+
+
+def assign_classification_targets_from_record(classification_labels: dict, record):
+    for comp in record.components:
+        name = comp.task.name
+        if isinstance(comp, ClassificationLabelsRecordComponent):
+            if comp.is_multilabel:
+                labels = comp.one_hot_encoded()
+                classification_labels[name].append(labels)
+            else:
+                labels = comp.label_ids
+                classification_labels[name].extend(labels)
+
+
+def massage_multi_aug_classification_data(
+    classification_data, classification_targets, target_key: str
+):
+    for group in classification_data.values():
+        group[target_key] = {
+            task: tensor(classification_targets[task]) for task in group["tasks"]
+        }
+        group["images"] = torch.stack(group["images"])
+
+    return {k: dict(v) for k, v in classification_data.items()}