FR-DC · Eve-ning · Feb 26, 2024 · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ label-studio-sdk = "^0.0.32"
 #torchvision = {version="^0.16.0", source="pytorch"}
 #torchaudio = {version="^2.1.0", source="pytorch"}
 #lightning = "^2.0.9.post0"
+python-dotenv = "^1.0.1"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.2"

diff --git a/src/frdc/conf.py b/src/frdc/conf.py
@@ -5,11 +5,14 @@
 
 import label_studio_sdk as label_studio
 import requests
+from dotenv import load_dotenv
 from google.cloud import storage as gcs
 
 logger = logging.getLogger(__name__)
 
 ROOT_DIR = Path(__file__).parents[2]
+
+load_dotenv(ROOT_DIR / ".env")
 LOCAL_DATASET_ROOT_DIR = ROOT_DIR / "rsc"
 os.environ["GOOGLE_CLOUD_PROJECT"] = "frmodel"
 GCS_PROJECT_ID = "frmodel"
@@ -53,7 +56,7 @@
     )
     GCS_BUCKET = GCS_CLIENT.bucket(GCS_BUCKET_NAME)
     logger.info("Connected to GCS.")
-except Exception as e:
+except Exception:
     logger.warning(
         "Could not connect to GCS. Will not be able to download files. "
         "Check that you've (1) Installed the GCS CLI and (2) Set up the"
@@ -76,11 +79,11 @@
         LABEL_STUDIO_CLIENT.get_project(1)
     except requests.exceptions.HTTPError:
         logger.warning(
-            f"Could not get main annotation project. "
-            f"Pulling annotations may not work. "
-            f"It's possible that your API Key is incorrect, "
-            f"or somehow your .netrc is preventing you from "
-            f"accessing the project. "
+            "Could not get main annotation project. "
+            "Pulling annotations may not work. "
+            "It's possible that your API Key is incorrect, "
+            "or somehow your .netrc is preventing you from "
+            "accessing the project. "
         )
 except requests.exceptions.ConnectionError:
     logger.warning(

diff --git a/src/frdc/evaluate/__init__.py b/src/frdc/evaluate/__init__.py
@@ -1 +0,0 @@
-

diff --git a/src/frdc/load/preset.py b/src/frdc/load/preset.py
@@ -152,10 +152,10 @@ class FRDCDatasetPreset:
         "chestnut_nature_park", "20210510", "90deg60m84.5pct255deg"
     )
     casuarina_20220418_183deg = FRDCDatasetPartial(
-        "casuarina_nature_park", "20220418", "183deg"
+        "casuarina", "20220418", "183deg"
     )
     casuarina_20220418_93deg = FRDCDatasetPartial(
-        "casuarina_nature_park", "20220418", "93deg"
+        "casuarina", "20220418", "93deg"
     )
     DEBUG = lambda resize=299: FRDCDatasetPartial(
         site="DEBUG", date="0", version=None

diff --git a/src/frdc/models/efficientnetb1.py b/src/frdc/models/efficientnetb1.py
@@ -0,0 +1,136 @@
+from copy import deepcopy
+
+import torch
+from sklearn.preprocessing import OrdinalEncoder, StandardScaler
+from torch import nn
+from torchvision.models import (
+    EfficientNet,
+    efficientnet_b1,
+    EfficientNet_B1_Weights,
+)
+
+from frdc.train.mixmatch_module import MixMatchModule
+from frdc.utils.ema import EMA
+
+
+class EfficientNetB1MixMatchModule(MixMatchModule):
+    MIN_SIZE = 320
+    EFF_OUT_DIMS = 1280
+
+    def __init__(
+        self,
+        *,
+        in_channels: int,
+        n_classes: int,
+        lr: float,
+        x_scaler: StandardScaler,
+        y_encoder: OrdinalEncoder,
+        ema_lr: float = 0.001,
+        weight_decay: float = 1e-5,
+        frozen: bool = True,
+    ):
+        """Initialize the EfficientNet model.
+
+        Args:
+            in_channels: The number of input channels.
+            n_classes: The number of classes.
+            lr: The learning rate.
+            x_scaler: The X input StandardScaler.
+            y_encoder: The Y input OrdinalEncoder.
+            ema_lr: The learning rate for the EMA model.
+            weight_decay: The weight decay.
+            frozen: Whether to freeze the base model.
+
+        Notes:
+            - Min input size: 320 x 320
+        """
+        self.lr = lr
+        self.weight_decay = weight_decay
+
+        super().__init__(
+            n_classes=n_classes,
+            x_scaler=x_scaler,
+            y_encoder=y_encoder,
+            sharpen_temp=0.5,
+            mix_beta_alpha=0.75,
+        )
+
+        self.eff = efficientnet_b1(
+            weights=EfficientNet_B1_Weights.IMAGENET1K_V2
+        )
+
+        # Remove the final layer
+        self.eff.classifier = nn.Identity()
+
+        if frozen:
+            for param in self.eff.parameters():
+                param.requires_grad = False
+
+        # Adapt the first layer to accept the number of channels
+        self.eff = self.adapt_efficient_multi_channel(self.eff, in_channels)
+
+        self.fc = nn.Sequential(
+            nn.Linear(self.EFF_OUT_DIMS, n_classes),
+            nn.Softmax(dim=1),
+        )
+
+        # The problem is that the deep copy runs even before the module is
+        # initialized, which means ema_model is empty.
+        ema_model = deepcopy(self)
+        for param in ema_model.parameters():
+            param.detach_()
+
+        self._ema_model = ema_model
+        self.ema_updater = EMA(model=self, ema_model=self.ema_model)
+        self.ema_lr = ema_lr
+
+    @staticmethod
+    def adapt_efficient_multi_channel(
+        eff: EfficientNet,
+        in_channels: int,
+    ) -> EfficientNet:
+        """Adapt the EfficientNet model to accept a different number of
+        input channels.
+
+        Notes:
+            This operation is in-place, however will still return the model
+
+        Args:
+            eff: The EfficientNet model
+            in_channels: The number of input channels
+
+        Returns:
+            The adapted EfficientNet model.
+        """
+        old_conv = eff.features[0][0]
+        new_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            bias=old_conv.bias,
+        )
+        new_conv.weight.data[:, :3] = old_conv.weight.data
+        new_conv.weight.data[:, 3:] = old_conv.weight.data[:, 1:2].repeat(
+            1, 5, 1, 1
+        )
+        eff.features[0][0] = new_conv
+
+        return eff
+
+    @property
+    def ema_model(self):
+        return self._ema_model
+
+    def update_ema(self):
+        self.ema_updater.update(self.ema_lr)
+
+    def forward(self, x: torch.Tensor):
+        """Forward pass."""
+        return self.fc(self.eff(x))
+
+    def configure_optimizers(self):
+        return torch.optim.Adam(
+            self.parameters(), lr=self.lr, weight_decay=self.weight_decay
+        )
diff --git a/src/frdc/models/inceptionv3.py b/src/frdc/models/inceptionv3.py
@@ -4,6 +4,7 @@
 from sklearn.preprocessing import OrdinalEncoder, StandardScaler
 from torch import nn
 from torchvision.models import Inception_V3_Weights, inception_v3
+from torchvision.models.inception import BasicConv2d, Inception3
 
 from frdc.train.mixmatch_module import MixMatchModule
 from frdc.utils.ema import EMA
@@ -18,16 +19,24 @@ class InceptionV3MixMatchModule(MixMatchModule):
     def __init__(
         self,
         *,
+        in_channels: int,
         n_classes: int,
         lr: float,
         x_scaler: StandardScaler,
         y_encoder: OrdinalEncoder,
         ema_lr: float = 0.001,
+        imagenet_scaling: bool = False,
     ):
         """Initialize the InceptionV3 model.
 
         Args:
-            n_classes: The number of output classes
+            in_channels: The number of input channels.
+            n_classes: The number of classes.
+            lr: The learning rate.
+            x_scaler: The X input StandardScaler.
+            y_encoder: The Y input OrdinalEncoder.
+            ema_lr: The learning rate for the EMA model.
+            imagenet_scaling: Whether to use the adapted ImageNet scaling.
 
         Notes:
             - Min input size: 299 x 299.
@@ -44,32 +53,118 @@ def __init__(
             sharpen_temp=0.5,
             mix_beta_alpha=0.75,
         )
+        self.imagenet_scaling = imagenet_scaling
 
         self.inception = inception_v3(
             weights=Inception_V3_Weights.IMAGENET1K_V1,
+            transform_input=False,
         )
+
+        # Remove the final layer
         self.inception.fc = nn.Identity()
 
-        # Freeze base model
+        # Freeze inception weights
         for param in self.inception.parameters():
             param.requires_grad = False
 
+        # Adapt the first layer to accept the number of channels
+        self.inception = self.adapt_inception_multi_channel(
+            self.inception, in_channels
+        )
+
         self.fc = nn.Sequential(
-            nn.BatchNorm1d(self.INCEPTION_OUT_DIMS),
             nn.Linear(self.INCEPTION_OUT_DIMS, self.INCEPTION_OUT_DIMS // 2),
             nn.BatchNorm1d(self.INCEPTION_OUT_DIMS // 2),
             nn.Linear(self.INCEPTION_OUT_DIMS // 2, n_classes),
             nn.Softmax(dim=1),
         )
+
         # The problem is that the deep copy runs even before the module is
         # initialized, which means ema_model is empty.
         ema_model = deepcopy(self)
-        for param in ema_model.parameters():
-            param.detach_()
+        # for param in ema_model.parameters():
+        #     param.detach_()
+
         self._ema_model = ema_model
         self.ema_updater = EMA(model=self, ema_model=self.ema_model)
         self.ema_lr = ema_lr
 
+    @staticmethod
+    def adapt_inception_multi_channel(
+        inception: Inception3,
+        in_channels: int,
+    ) -> Inception3:
+        """Adapt the 1st layer of the InceptionV3 model to accept n-channels.
+
+        Notes:
+            This operation is in-place, however will still return the model
+
+        Args:
+            inception: The InceptionV3 model
+            in_channels: The number of input channels
+
+        Returns:
+            The adapted InceptionV3 model.
+        """
+
+        original_in_channels = inception.Conv2d_1a_3x3.conv.in_channels
+
+        # Replicate the first layer, but with a different number of channels
+        conv2d_1a_3x3 = BasicConv2d(
+            in_channels=in_channels,
+            out_channels=inception.Conv2d_1a_3x3.conv.out_channels,
+            kernel_size=inception.Conv2d_1a_3x3.conv.kernel_size,
+            stride=inception.Conv2d_1a_3x3.conv.stride,
+        )
+
+        # Copy the BGR weights from the first layer of the original model
+        conv2d_1a_3x3.conv.weight.data[
+            :, :original_in_channels
+        ] = inception.Conv2d_1a_3x3.conv.weight.data
+
+        # We'll repeat the G weights to the other channels as an initial
+        # approximation
+        # We use [1:2] instead of [1] so it doesn't lose the dimension
+        conv2d_1a_3x3.conv.weight.data[
+            :, original_in_channels:
+        ] = inception.Conv2d_1a_3x3.conv.weight.data[:, 1:2].tile(
+            (in_channels - original_in_channels, 1, 1)
+        )
+
+        # Finally, set the new layer back
+        inception.Conv2d_1a_3x3 = conv2d_1a_3x3
+
+        return inception
+
+    @staticmethod
+    def _imagenet_scaling(x: torch.Tensor) -> torch.Tensor:
+        """Perform adapted ImageNet normalization on the input tensor.
+
+        See Also:
+            torchvision.models.inception.Inception3._transform_input
+
+        Notes:
+            This is adapted from the original InceptionV3 model, which
+            uses an RGB transformation. We have adapted it to accept
+            any number of channels.
+
+            Additional channels will use the same mean and std as the
+            green channel. This is because our task-domain is green-dominant.
+
+        """
+        x_ch0 = (
+            torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
+        )
+        x_ch1 = (
+            torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+        )
+        x_ch2 = (
+            torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
+        )
+        x_chk = x[:, 3:] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+        x = torch.cat((x_ch0, x_ch1, x_ch2, x_chk), 1)
+        return x
+
     @property
     def ema_model(self):
         return self._ema_model
@@ -81,24 +176,22 @@ def forward(self, x: torch.Tensor):
         """Forward pass.
 
         Notes:
-            - Min input size: 299 x 299.
-            - Batch size: >= 2.
+            Min input size: 299 x 299.
 
         Args:
             x: Input tensor of shape (batch_size, channels, height, width).
         """
 
-        if (
-            any(s == 1 for s in x.shape)
-            or x.shape[2] < self.MIN_SIZE
-            or x.shape[3] < self.MIN_SIZE
-        ):
+        if x.shape[2] < self.MIN_SIZE or x.shape[3] < self.MIN_SIZE:
             raise RuntimeError(
-                f"Input shape {x.shape} must adhere to the following:\n"
-                f" - No singleton dimensions\n"
-                f" - Size >= {self.MIN_SIZE}\n"
+                f"Input shape {x.shape} is too small for InceptionV3.\n"
+                f"Minimum size: {self.MIN_SIZE} x {self.MIN_SIZE}.\n"
+                f"Got: {x.shape[2]} x {x.shape[3]}."
             )
 
+        if self.imagenet_scaling:
+            x = self._imagenet_scaling(x)
+
         # During training, the auxiliary outputs are used for auxiliary loss,
         # but during testing, only the main output is used.
         if self.training:
@@ -112,4 +205,5 @@ def configure_optimizers(self):
         return torch.optim.Adam(
             self.parameters(),
             lr=self.lr,
+            weight_decay=1e-5,
         )