From 15be47ac291ee139b96932565730be8c3dd8430f Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Tue, 3 Dec 2024 13:44:31 +0100
Subject: [PATCH 1/4] Add better script for cfg customization

---
 config/create_custom_model.sh | 794 ----------------------------------
 pyproject.toml                |   1 +
 yoeo/models.py                |   2 +-
 yoeo/scripts/customizeCfg.py  | 131 ++++++
 yoeo/utils/parse_config.py    |  14 +-
 5 files changed, 146 insertions(+), 796 deletions(-)
 delete mode 100755 config/create_custom_model.sh
 create mode 100644 yoeo/scripts/customizeCfg.py

diff --git a/config/create_custom_model.sh b/config/create_custom_model.sh
deleted file mode 100755
index eba2ebe..0000000
--- a/config/create_custom_model.sh
+++ /dev/null
@@ -1,794 +0,0 @@
-#!/bin/bash
-
-NUM_CLASSES=$1
-
-echo "
-[net]
-# Testing
-#batch=1
-#subdivisions=1
-# Training
-batch=16
-subdivisions=1
-width=416
-height=416
-channels=3
-momentum=0.9
-decay=0.0005
-angle=0
-saturation = 1.5
-exposure = 1.5
-hue=.1
-
-learning_rate=0.001
-burn_in=1000
-max_batches = 500200
-policy=steps
-steps=400000,450000
-scales=.1,.1
-
-[convolutional]
-batch_normalize=1
-filters=32
-size=3
-stride=1
-pad=1
-activation=leaky
-
-# Downsample
-
-[convolutional]
-batch_normalize=1
-filters=64
-size=3
-stride=2
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=32
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=64
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-# Downsample
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=3
-stride=2
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=64
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=64
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-# Downsample
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=2
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-# Downsample
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=2
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-# Downsample
-
-[convolutional]
-batch_normalize=1
-filters=1024
-size=3
-stride=2
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=1024
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=1024
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=1024
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=1024
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[shortcut]
-from=-3
-activation=linear
-
-######################
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=1024
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=1024
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=512
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=1024
-activation=leaky
-
-[convolutional]
-size=1
-stride=1
-pad=1
-filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
-activation=linear
-
-
-[yolo]
-mask = 6,7,8
-anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
-classes=$NUM_CLASSES
-num=9
-jitter=.3
-ignore_thresh = .7
-truth_thresh = 1
-random=1
-
-
-[route]
-layers = -4
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[upsample]
-stride=2
-
-[route]
-layers = -1, 61
-
-
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=512
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=512
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=256
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=512
-activation=leaky
-
-[convolutional]
-size=1
-stride=1
-pad=1
-filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
-activation=linear
-
-
-[yolo]
-mask = 3,4,5
-anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
-classes=$NUM_CLASSES
-num=9
-jitter=.3
-ignore_thresh = .7
-truth_thresh = 1
-random=1
-
-
-
-[route]
-layers = -4
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[upsample]
-stride=2
-
-[route]
-layers = -1, 36
-
-
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=256
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=256
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-filters=128
-size=1
-stride=1
-pad=1
-activation=leaky
-
-[convolutional]
-batch_normalize=1
-size=3
-stride=1
-pad=1
-filters=256
-activation=leaky
-
-[convolutional]
-size=1
-stride=1
-pad=1
-filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
-activation=linear
-
-
-[yolo]
-mask = 0,1,2
-anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
-classes=$NUM_CLASSES
-num=9
-jitter=.3
-ignore_thresh = .7
-truth_thresh = 1
-random=1
-" >> yolov3-custom.cfg
diff --git a/pyproject.toml b/pyproject.toml
index 1cd1093..e3585b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,3 +36,4 @@ yoeo-test = "yoeo.test:run"
 yoeo-to-onnx = "yoeo.scripts.convertPyTorchModelToONNX:run"
 yoeo-onnx-to-openvino = "yoeo.scripts.convertONNXModelToOpenVinoIR:run"
 yoeo-onnx-to-tvm = "yoeo.scripts.convertONNXModelToTVM:run"
+yoeo-custiomize-cfg = "yoeo.scripts.customizeCfg:run"
diff --git a/yoeo/models.py b/yoeo/models.py
index 1b69cd8..784627c 100644
--- a/yoeo/models.py
+++ b/yoeo/models.py
@@ -335,7 +335,7 @@ def load_model(model_path, weights_path=None):
     if weights_path:
         if weights_path.endswith(".pth"):
             # Load checkpoint weights
-            model.load_state_dict(torch.load(weights_path, map_location=device))
+            model.load_state_dict(torch.load(weights_path, map_location=device, weights_only=True))
         else:
             # Load darknet weights
             model.load_darknet_weights(weights_path)
diff --git a/yoeo/scripts/customizeCfg.py b/yoeo/scripts/customizeCfg.py
new file mode 100644
index 0000000..9aa3584
--- /dev/null
+++ b/yoeo/scripts/customizeCfg.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+"""
+This file takes a given model architecture configuration file (cfg / toml)
+and adapts it to the classes of a given dataset defined in a yaml file.
+"""
+
+import argparse
+import yaml
+from yoeo.utils.parse_config import (
+    parse_model_config,
+    write_model_config,
+    parse_data_config,
+)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Customize a model architecture configuration file to a dataset"
+    )
+    parser.add_argument(
+        "--cfg",
+        "-c",
+        type=str,
+        default="config/yoeo.cfg",
+        help="Path to the model architecture configuration file",
+    )
+    parser.add_argument(
+        "--dataset",
+        "-d",
+        type=str,
+        default="config/custom.data",
+        help="Path to the dataset configuration file",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        type=str,
+        default="config/yoeo-custom.cfg",
+        help="Path to the output model architecture configuration file",
+    )
+    return parser.parse_args()
+
+
+def run():
+    args = parse_args()
+
+    # Load the dataset configuration
+    dataset_config = parse_data_config(args.dataset)
+
+    # Load the class names from the dataset configuration
+    with open(dataset_config["names"], "r") as f:
+        class_names = yaml.safe_load(f)
+
+    # Validate the dataset configuration
+    assert (
+        "detection" in class_names
+    ), "Dataset configuration file must contain a 'detection' key listing all the object classes"
+    assert isinstance(
+        class_names["detection"], list
+    ), "The 'detection' key in the dataset configuration file must be a list"
+    assert (
+        len(class_names["detection"]) > 0
+    ), "The 'detection' key in the dataset configuration file must contain at least one class"
+    assert all(
+        isinstance(c, str) for c in class_names["detection"]
+    ), "All classes in the 'detection' key must be strings"
+    assert (
+        "segmentation" in class_names
+    ), "Dataset configuration file must contain a 'segmentation' key listing all the segmentation classes"
+    assert isinstance(
+        class_names["segmentation"], list
+    ), "The 'segmentation' key in the dataset configuration file must be a list"
+    assert (
+        len(class_names["segmentation"]) > 0
+    ), "The 'segmentation' key in the dataset configuration file must contain at least one class"
+    assert all(
+        isinstance(c, str) for c in class_names["segmentation"]
+    ), "All classes in the 'segmentation' key must be strings"
+
+    number_of_object_detection_classes = len(class_names["detection"])
+    number_of_segmentation_classes = len(class_names["segmentation"])
+
+    print(
+        f"Found {number_of_object_detection_classes} object detection classes and {number_of_segmentation_classes} segmentation classes"
+    )
+
+    # Load the model configuration
+    model_architecture = parse_model_config(args.cfg)
+
+    # Search for all yolo layers in the model configuration and
+    # adapt the number of classes as well as
+    # the number of filters in the preceding convolutional layer
+    for i, layer in enumerate(model_architecture):
+        if layer["type"] == "yolo":
+            # Adapt the number of classes
+            layer["classes"] = number_of_object_detection_classes
+            # Adapt the number of filters in the preceding convolutional layer
+            assert (
+                i > 0
+            ), "Yolo layer can not be the first layer in the model architecture"
+            prev_layer = model_architecture[i - 1]
+            assert prev_layer.get("filters") is not None, (
+                "Yolo layer must be preceded by a convolutional layer for this script to work, "
+                "if you do more complex stuff, you have to adapt the configuration manually"
+            )
+            prev_layer["filters"] = (number_of_object_detection_classes + 5) * len(
+                layer["mask"].split(",")
+            )
+        if layer["type"] == "seg":
+            # Adapt the number of classes
+            layer["classes"] = number_of_segmentation_classes
+            # Adapt the number of filters in the preceding convolutional layer
+            assert (
+                i > 0
+            ), "Seg layer can not be the first layer in the model architecture"
+            prev_layer = model_architecture[i - 1]
+            assert prev_layer.get("filters") is not None, (
+                "Seg layer must be preceded by a convolutional layer for this script to work, "
+                "if you do more complex stuff, you have to adapt the configuration manually"
+            )
+            prev_layer["filters"] = number_of_segmentation_classes
+
+    # Write the adapted model configuration to the output file
+    write_model_config(model_architecture, args.output)
+
+    print(f"Model architecture adapted and saved to {args.output}")
+
+
+if __name__ == "__main__":
+    run()
diff --git a/yoeo/utils/parse_config.py b/yoeo/utils/parse_config.py
index 4c9fa7f..df307a6 100644
--- a/yoeo/utils/parse_config.py
+++ b/yoeo/utils/parse_config.py
@@ -1,6 +1,7 @@
+from typing import Any
 
 
-def parse_model_config(path):
+def parse_model_config(path: str) -> list[dict[str, str]]:
     """Parses the yolo-v3 layer configuration file and returns module definitions"""
     file = open(path, 'r')
     lines = file.read().split('\n')
@@ -21,6 +22,17 @@ def parse_model_config(path):
     return module_defs
 
 
+def write_model_config(module_defs: list[dict[str, Any]], path: str):
+    """Writes module definitions to the file"""
+    with open(path, 'w') as f:
+        for module_def in module_defs:
+            f.write(f"[{module_def['type']}]\n")
+            for key, value in module_def.items():
+                if key != 'type' and not (key == 'batch_normalize' and int(value) == 0):
+                    f.write(f"{key}={value}\n")
+            f.write("\n")
+
+
 def parse_data_config(path):
     """Parses the data configuration file"""
     options = dict()

From ce75608ad437420353e1e7cc221d7849e3920102 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Tue, 3 Dec 2024 13:45:47 +0100
Subject: [PATCH 2/4] Extend the gitignore

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 184a610..8f073c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,7 @@ checkpoints/
 logs/
 
 .python-version
+
+*.onnx
+*.pth
+**/tuning_records.json

From 65eb404abbd498cbc0a80aeb005019d5b8dd5195 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Tue, 3 Dec 2024 13:53:38 +0100
Subject: [PATCH 3/4] Update README with instructions for adapting model
 configuration to new class names

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index b3ee34e..1b6765e 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,14 @@ You can adjust the log directory using `--logdir <path>` when running `tensorboa
 #### Classes
 Add class names to `data/custom/yoeo_names.yaml`.
 
+Run the following command to adapt the model file (cfg) to the new number of classes:
+
+```bash
+poetry run yoeo-custiomize-cfg -c config/yoeo.cfg -d config/custom.data -o config/yoeo-custom.cfg
+```
+
+This changes the layers of the model to fit the number of classes in your dataset.
+
 #### Image Folder
 Move the images of your dataset to `data/custom/images/`.
 

From 45010667128591cd708e5820be61c7e1827d53a1 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Tue, 3 Dec 2024 13:55:52 +0100
Subject: [PATCH 4/4] Bump version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e3585b6..ad15414 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "YOEO"
-version = "1.6.0"
+version = "1.6.1"
 description = "A hybrid CNN for object detection and semantic segmentation"
 authors = ["Florian Vahl <git@flova.de>", "Jan Gutsche <git@jagut.de>"]