Update QDTrack zoo (#134)

* Update QDTrack zoo * Finish updating QDTrack zoo * Fix issues * Fix mypy * Fix README
SysCV · Jan 16, 2024 · 93f5c41 · 93f5c41
1 parent ab4fa89
commit 93f5c41
Show file tree

Hide file tree

Showing 14 changed files with 112 additions and 42 deletions.
diff --git a/tests/vis4d-test-data b/tests/vis4d-test-data
diff --git a/tests/zoo/bdd100k/qdtrack_test.py b/tests/zoo/bdd100k/qdtrack_test.py
@@ -21,26 +21,3 @@ def test_frcnn_r50_fpn_1x_bdd100k(self) -> None:
             f"{self.gt_config_path}/qdtrack_frcnn_r50_fpn_1x_bdd100k.yaml",
             self.varying_keys,
         )
-
-    def test_frcnn_r50_fpn_augs_1x_bdd100k(self) -> None:
-        """Test the config for QDTrack Faster-RCNN.
-
-        This instantiates the config and compares it to a ground truth.
-        """
-        compare_configs(
-            f"{self.config_prefix}.qdtrack_frcnn_r50_fpn_augs_1x_bdd100k",
-            f"{self.gt_config_path}/"
-            + "qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.yaml",
-            self.varying_keys,
-        )
-
-    def test_yolox_x_50e_bdd100k(self) -> None:
-        """Test the config for QDTrack YOLOX.
-
-        This instantiates the config and compares it to a ground truth.
-        """
-        compare_configs(
-            f"{self.config_prefix}.qdtrack_yolox_x_50e_bdd100k",
-            f"{self.gt_config_path}/qdtrack_yolox_x_50e_bdd100k.yaml",
-            self.varying_keys,
-        )
diff --git a/tests/zoo/qdtrack_test.py b/tests/zoo/qdtrack_test.py
@@ -0,0 +1,35 @@
+"""QDTrack configs tests."""
+import unittest
+
+from .util import compare_configs
+
+
+class TestQDTrackConfig(unittest.TestCase):
+    """Tests the content of the provided configs for QDTrack."""
+
+    config_prefix = "qdtrack"
+    gt_config_path = "tests/vis4d-test-data/config_test/qdtrack"
+    varying_keys = ["save_prefix", "output_dir", "version", "timestamp"]
+
+    def test_frcnn_r50_fpn_augs_1x_bdd100k(self) -> None:
+        """Test the config for QDTrack Faster-RCNN.
+
+        This instantiates the config and compares it to a ground truth.
+        """
+        compare_configs(
+            f"{self.config_prefix}.qdtrack_frcnn_r50_fpn_augs_1x_bdd100k",
+            f"{self.gt_config_path}/"
+            + "qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.yaml",
+            self.varying_keys,
+        )
+
+    def test_yolox_x_50e_bdd100k(self) -> None:
+        """Test the config for QDTrack YOLOX.
+
+        This instantiates the config and compares it to a ground truth.
+        """
+        compare_configs(
+            f"{self.config_prefix}.qdtrack_yolox_x_50e_bdd100k",
+            f"{self.gt_config_path}/qdtrack_yolox_x_50e_bdd100k.yaml",
+            self.varying_keys,
+        )
diff --git a/vis4d/data/datasets/util.py b/vis4d/data/datasets/util.py
@@ -71,7 +71,7 @@ def im_decode(
                 "Please install opencv-python to use cv2 backend!"
             )
         img_np: NDArrayUI8 = np.frombuffer(im_bytes, np.uint8)
-        img = imdecode(
+        img = imdecode(  # type: ignore
             img_np, IMREAD_GRAYSCALE if mode == "L" else IMREAD_COLOR
         )
         if mode == "RGB":

diff --git a/vis4d/data/transforms/photometric.py b/vis4d/data/transforms/photometric.py
@@ -327,9 +327,9 @@ def __call__(self, images: list[NDArrayF32]) -> list[NDArrayF32]:
         for i, image in enumerate(images):
             image = image[0].astype(np.uint8)
             if self.image_channel_mode == "BGR":
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # type: ignore
             else:
-                image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+                image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)  # type: ignore
             image = image.astype(np.int16)
             hsv_gains = np.random.uniform(-1, 1, 3) * [
                 self.hue_delta,

diff --git a/vis4d/zoo/__init__.py b/vis4d/zoo/__init__.py
@@ -9,6 +9,7 @@
 from .faster_rcnn import AVAILABLE_MODELS as FASTER_RCNN_MODELS
 from .fcn_resnet import AVAILABLE_MODELS as FCN_RESNET_MODELS
 from .mask_rcnn import AVAILABLE_MODELS as MASK_RCNN_MODELS
+from .qdtrack import AVAILABLE_MODELS as QDTRACK_MODELS
 from .retinanet import AVAILABLE_MODELS as RETINANET_MODELS
 from .shift import AVAILABLE_MODELS as SHIFT_MODELS
 from .vit import AVAILABLE_MODELS as VIT_MODELS
@@ -21,6 +22,7 @@
     "faster_rcnn": FASTER_RCNN_MODELS,
     "fcn_resnet": FCN_RESNET_MODELS,
     "mask_rcnn": MASK_RCNN_MODELS,
+    "qdtrack": QDTRACK_MODELS,
     "retinanet": RETINANET_MODELS,
     "shift": SHIFT_MODELS,
     "vit": VIT_MODELS,

diff --git a/vis4d/zoo/bdd100k/README.md b/vis4d/zoo/bdd100k/README.md
@@ -88,19 +88,17 @@ The BDD100K dataset contains MOT annotations for 2K videos (1.4K/200/400 for tra
 
 ### QDTrack
 
-[QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking](https://arxiv.org/abs/2210.06984) [TPAMI, CVPR 2021 Oral]
+[Quasi-Dense Similarity Learning for Multiple Object Tracking](https://arxiv.org/abs/2006.06664) [CVPR 2021 Oral]
 
-Authors: [Tobias Fischer*](https://tobiasfshr.github.io/), [Thomas E Huang*](https://www.thomasehuang.com/), [Jiangmiao Pang*](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)
+Authors: [Jiangmiao Pang](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), Linlu Qiu, [Xia Li](https://xialipku.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)
 
 <details>
 <summary>Abstract</summary>
-Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions in images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of object regions on a pair of images for contrastive learning. We combine this similarity learning with multiple existing object detectors to build Quasi-Dense Tracking (QDTrack), which does not require displacement regression or motion priors. We find that the resulting distinctive feature space admits a simple nearest neighbor search at inference time for object association. In addition, we show that our similarity learning scheme is not limited to video data, but can learn effective instance similarity even from static input, enabling a competitive tracking performance without training on videos or using tracking supervision. We conduct extensive experiments on a wide variety of popular MOT benchmarks. We find that, despite its simplicity, QDTrack rivals the performance of state-of-the-art tracking methods on all benchmarks and sets a new state-of-the-art on the large-scale BDD100K MOT benchmark, while introducing negligible computational overhead to the detector.
+Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions on the images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of region proposals on a pair of images for contrastive learning. We can naturally combine this similarity learning with existing detection methods to build Quasi-Dense Tracking (QDTrack) without turning to displacement regression or motion priors. We also find that the resulting distinctive feature space admits a simple nearest neighbor search at the inference time. Despite its simplicity, QDTrack outperforms all existing methods on MOT, BDD100K, Waymo, and TAO tracking benchmarks. It achieves 68.7 MOTA at 20.3 FPS on MOT17 without using external training data. Compared to methods with similar detectors, it boosts almost 10 points of MOTA and significantly decreases the number of ID switches on BDD100K and Waymo datasets.
 </details>
 
 #### Results
 
-| Detector  | Base Network | Strong Augs. | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals |
-| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
-| Faster R-CNN | R-50-FPN |  | 36.1 | 51.8 | 6165 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
-| Faster R-CNN | R-50-FPN | ✓ | 37.7 | 52.7 | 7257 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
-| YOLOX-x | CSPNet | ✓ | 42.3 | 55.1 | 9164 | [scores]() | [config](./qdtrack/qdtrack_yolox_x_50e_bdd100k.py) | [model]() | [preds]() | [visuals]() |
+| Detector  | Base Network | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals |
+| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
+| Faster R-CNN | R-50-FPN | 36.1 | 51.8 | 6165 | | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
diff --git a/vis4d/zoo/bdd100k/__init__.py b/vis4d/zoo/bdd100k/__init__.py
@@ -5,10 +5,7 @@
     mask_rcnn_r50_3x_bdd100k,
     mask_rcnn_r50_5x_bdd100k,
 )
-from .qdtrack import (
-    qdtrack_frcnn_r50_fpn_1x_bdd100k,
-    qdtrack_yolox_x_50e_bdd100k,
-)
+from .qdtrack import qdtrack_frcnn_r50_fpn_1x_bdd100k
 from .semantic_fpn import (
     semantic_fpn_r50_40k_bdd100k,
     semantic_fpn_r50_80k_bdd100k,
@@ -26,5 +23,4 @@
     "semantic_fpn_r50_80k_bdd100k": semantic_fpn_r50_80k_bdd100k,
     "semantic_fpn_r101_80k_bdd100k": semantic_fpn_r101_80k_bdd100k,
     "qdtrack_frcnn_r50_fpn_1x_bdd100k": qdtrack_frcnn_r50_fpn_1x_bdd100k,
-    "qdtrack_yolox_x_50e_bdd100k": qdtrack_yolox_x_50e_bdd100k,
 }
diff --git a/vis4d/zoo/qdtrack/README.md b/vis4d/zoo/qdtrack/README.md
@@ -0,0 +1,49 @@
+# [TPAMI 2023] QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking
+This is the official implementation of our paper **"QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking"**.
+
+[Tobias Fischer*](https://tobiasfshr.github.io/), [Thomas E Huang*](https://www.thomasehuang.com/), [Jiangmiao Pang*](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)
+
+[[Paper](https://arxiv.org/abs/2210.06984)]
+
+<img src="./src/banner.png" width="830">
+
+## Abstract
+Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions in images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of object regions on a pair of images for contrastive learning. We combine this similarity learning with multiple existing object detectors to build Quasi-Dense Tracking (QDTrack), which does not require displacement regression or motion priors. We find that the resulting distinctive feature space admits a simple nearest neighbor search at inference time for object association. In addition, we show that our similarity learning scheme is not limited to video data, but can learn effective instance similarity even from static input, enabling a competitive tracking performance without training on videos or using tracking supervision. We conduct extensive experiments on a wide variety of popular MOT benchmarks. We find that, despite its simplicity, QDTrack rivals the performance of state-of-the-art tracking methods on all benchmarks and sets a new state-of-the-art on the large-scale BDD100K MOT benchmark, while introducing negligible computational overhead to the detector.
+
+## Model Zoo
+
+| Detector  | Base Network | Strong Augs. | mMOTA-val | mIDF1-val | ID Sw.-val | Config | Weights | Visuals |
+| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
+| Faster R-CNN | R-50-FPN | ✓ | 37.7 | 52.7 | 7257| [config](./qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py) | [model](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k_456b1e.pt) | [visuals](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k_vis.zip) |
+| YOLOX-x | CSPNet | ✓ | 42.3 | 55.1 | 9164 | [config](./qdtrack/qdtrack_yolox_x_50e_bdd100k.py) | [model](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_yolox_x_25e_bdd100k_c14af2.pt) | [visuals](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_yolox_x_25e_bdd100k_vis.zip) |
+
+## Getting Started
+
+### Train
+```bash
+# R50
+python -m vis4d.pl fit --config vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py --gpus 8
+
+# YOLOX
+python -m vis4d.pl fit --config vis4d/zoo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py --gpus 8
+```
+
+### Inference
+```bash
+# R50
+python -m vis4d.pl test --config vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py --ckpt ${checkpoint_path} --gpus ${num_gpus}
+
+# YOLOX
+python -m vis4d.pl test --config vis4d/zoo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py --ckpt ${checkpoint_path} --gpus ${num_gpus}
+```
+
+## Citation
+```
+@article{fischer2023qdtrack,
+  title={Qdtrack: Quasi-dense similarity learning for appearance-only multiple object tracking},
+  author={Fischer, Tobias and Huang, Thomas E and Pang, Jiangmiao and Qiu, Linlu and Chen, Haofeng and Darrell, Trevor and Yu, Fisher},
+  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  year={2023},
+  publisher={IEEE}
+}
+```
diff --git a/vis4d/zoo/qdtrack/__init__.py b/vis4d/zoo/qdtrack/__init__.py
@@ -0,0 +1,13 @@
+"""QDTrack."""
+from . import (
+    qdtrack_frcnn_r50_fpn_augs_1x_bdd100k,
+    qdtrack_yolox_x_50e_bdd100k,
+)
+
+# Lists of available models in BDD100K Model Zoo.
+AVAILABLE_MODELS = {
+    "qdtrack_frcnn_r50_fpn_augs_1x_bdd100k": (
+        qdtrack_frcnn_r50_fpn_augs_1x_bdd100k
+    ),
+    "qdtrack_yolox_x_50e_bdd100k": qdtrack_yolox_x_50e_bdd100k,
+}
diff --git a/vis4d/zoo/bdd100k/qdtrack/data_yolox.py → vis4d/zoo/qdtrack/data_yolox.py b/vis4d/zoo/bdd100k/qdtrack/data_yolox.py → vis4d/zoo/qdtrack/data_yolox.py
diff --git a/.../qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py → .../qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py b/.../qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py → .../qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py
@@ -32,7 +32,7 @@
 from vis4d.eval.bdd100k import BDD100KTrackEvaluator
 from vis4d.op.base import ResNet
 from vis4d.vis.image import BoundingBoxVisualizer
-from vis4d.zoo.bdd100k.qdtrack.data_yolox import get_bdd100k_track_cfg
+from vis4d.zoo.qdtrack.data_yolox import get_bdd100k_track_cfg
 
 
 def get_config() -> ExperimentConfig:

diff --git a/...0k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py → ...oo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py b/...0k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py → ...oo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py
@@ -29,7 +29,7 @@
 from vis4d.engine.connectors import CallbackConnector, DataConnector
 from vis4d.eval.bdd100k import BDD100KTrackEvaluator
 from vis4d.vis.image import BoundingBoxVisualizer
-from vis4d.zoo.bdd100k.qdtrack.data_yolox import get_bdd100k_track_cfg
+from vis4d.zoo.qdtrack.data_yolox import get_bdd100k_track_cfg
 
 
 def get_config() -> ExperimentConfig:

diff --git a/vis4d/zoo/qdtrack/src/banner.png b/vis4d/zoo/qdtrack/src/banner.png
+0 −0		config_test/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.yaml
+0 −0		config_test/qdtrack/qdtrack_yolox_x_50e_bdd100k.yaml