Skip to content

Commit

Permalink
Update QDTrack zoo (#134)
Browse files Browse the repository at this point in the history
* Update QDTrack zoo

* Finish updating QDTrack zoo

* Fix issues

* Fix mypy

* Fix README
  • Loading branch information
thomasehuang authored Jan 16, 2024
1 parent ab4fa89 commit 93f5c41
Show file tree
Hide file tree
Showing 14 changed files with 112 additions and 42 deletions.
23 changes: 0 additions & 23 deletions tests/zoo/bdd100k/qdtrack_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,3 @@ def test_frcnn_r50_fpn_1x_bdd100k(self) -> None:
f"{self.gt_config_path}/qdtrack_frcnn_r50_fpn_1x_bdd100k.yaml",
self.varying_keys,
)

def test_frcnn_r50_fpn_augs_1x_bdd100k(self) -> None:
"""Test the config for QDTrack Faster-RCNN.
This instantiates the config and compares it to a ground truth.
"""
compare_configs(
f"{self.config_prefix}.qdtrack_frcnn_r50_fpn_augs_1x_bdd100k",
f"{self.gt_config_path}/"
+ "qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.yaml",
self.varying_keys,
)

def test_yolox_x_50e_bdd100k(self) -> None:
"""Test the config for QDTrack YOLOX.
This instantiates the config and compares it to a ground truth.
"""
compare_configs(
f"{self.config_prefix}.qdtrack_yolox_x_50e_bdd100k",
f"{self.gt_config_path}/qdtrack_yolox_x_50e_bdd100k.yaml",
self.varying_keys,
)
35 changes: 35 additions & 0 deletions tests/zoo/qdtrack_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""QDTrack configs tests."""
import unittest

from .util import compare_configs


class TestQDTrackConfig(unittest.TestCase):
"""Tests the content of the provided configs for QDTrack."""

config_prefix = "qdtrack"
gt_config_path = "tests/vis4d-test-data/config_test/qdtrack"
varying_keys = ["save_prefix", "output_dir", "version", "timestamp"]

def test_frcnn_r50_fpn_augs_1x_bdd100k(self) -> None:
"""Test the config for QDTrack Faster-RCNN.
This instantiates the config and compares it to a ground truth.
"""
compare_configs(
f"{self.config_prefix}.qdtrack_frcnn_r50_fpn_augs_1x_bdd100k",
f"{self.gt_config_path}/"
+ "qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.yaml",
self.varying_keys,
)

def test_yolox_x_50e_bdd100k(self) -> None:
"""Test the config for QDTrack YOLOX.
This instantiates the config and compares it to a ground truth.
"""
compare_configs(
f"{self.config_prefix}.qdtrack_yolox_x_50e_bdd100k",
f"{self.gt_config_path}/qdtrack_yolox_x_50e_bdd100k.yaml",
self.varying_keys,
)
2 changes: 1 addition & 1 deletion vis4d/data/datasets/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def im_decode(
"Please install opencv-python to use cv2 backend!"
)
img_np: NDArrayUI8 = np.frombuffer(im_bytes, np.uint8)
img = imdecode(
img = imdecode( # type: ignore
img_np, IMREAD_GRAYSCALE if mode == "L" else IMREAD_COLOR
)
if mode == "RGB":
Expand Down
4 changes: 2 additions & 2 deletions vis4d/data/transforms/photometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,9 @@ def __call__(self, images: list[NDArrayF32]) -> list[NDArrayF32]:
for i, image in enumerate(images):
image = image[0].astype(np.uint8)
if self.image_channel_mode == "BGR":
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # type: ignore
else:
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) # type: ignore
image = image.astype(np.int16)
hsv_gains = np.random.uniform(-1, 1, 3) * [
self.hue_delta,
Expand Down
2 changes: 2 additions & 0 deletions vis4d/zoo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .faster_rcnn import AVAILABLE_MODELS as FASTER_RCNN_MODELS
from .fcn_resnet import AVAILABLE_MODELS as FCN_RESNET_MODELS
from .mask_rcnn import AVAILABLE_MODELS as MASK_RCNN_MODELS
from .qdtrack import AVAILABLE_MODELS as QDTRACK_MODELS
from .retinanet import AVAILABLE_MODELS as RETINANET_MODELS
from .shift import AVAILABLE_MODELS as SHIFT_MODELS
from .vit import AVAILABLE_MODELS as VIT_MODELS
Expand All @@ -21,6 +22,7 @@
"faster_rcnn": FASTER_RCNN_MODELS,
"fcn_resnet": FCN_RESNET_MODELS,
"mask_rcnn": MASK_RCNN_MODELS,
"qdtrack": QDTRACK_MODELS,
"retinanet": RETINANET_MODELS,
"shift": SHIFT_MODELS,
"vit": VIT_MODELS,
Expand Down
14 changes: 6 additions & 8 deletions vis4d/zoo/bdd100k/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,17 @@ The BDD100K dataset contains MOT annotations for 2K videos (1.4K/200/400 for tra

### QDTrack

[QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking](https://arxiv.org/abs/2210.06984) [TPAMI, CVPR 2021 Oral]
[Quasi-Dense Similarity Learning for Multiple Object Tracking](https://arxiv.org/abs/2006.06664) [CVPR 2021 Oral]

Authors: [Tobias Fischer*](https://tobiasfshr.github.io/), [Thomas E Huang*](https://www.thomasehuang.com/), [Jiangmiao Pang*](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)
Authors: [Jiangmiao Pang](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), Linlu Qiu, [Xia Li](https://xialipku.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)

<details>
<summary>Abstract</summary>
Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions in images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of object regions on a pair of images for contrastive learning. We combine this similarity learning with multiple existing object detectors to build Quasi-Dense Tracking (QDTrack), which does not require displacement regression or motion priors. We find that the resulting distinctive feature space admits a simple nearest neighbor search at inference time for object association. In addition, we show that our similarity learning scheme is not limited to video data, but can learn effective instance similarity even from static input, enabling a competitive tracking performance without training on videos or using tracking supervision. We conduct extensive experiments on a wide variety of popular MOT benchmarks. We find that, despite its simplicity, QDTrack rivals the performance of state-of-the-art tracking methods on all benchmarks and sets a new state-of-the-art on the large-scale BDD100K MOT benchmark, while introducing negligible computational overhead to the detector.
Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions on the images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of region proposals on a pair of images for contrastive learning. We can naturally combine this similarity learning with existing detection methods to build Quasi-Dense Tracking (QDTrack) without turning to displacement regression or motion priors. We also find that the resulting distinctive feature space admits a simple nearest neighbor search at the inference time. Despite its simplicity, QDTrack outperforms all existing methods on MOT, BDD100K, Waymo, and TAO tracking benchmarks. It achieves 68.7 MOTA at 20.3 FPS on MOT17 without using external training data. Compared to methods with similar detectors, it boosts almost 10 points of MOTA and significantly decreases the number of ID switches on BDD100K and Waymo datasets.
</details>

#### Results

| Detector | Base Network | Strong Augs. | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals |
| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
| Faster R-CNN | R-50-FPN | | 36.1 | 51.8 | 6165 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
| Faster R-CNN | R-50-FPN || 37.7 | 52.7 | 7257 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
| YOLOX-x | CSPNet || 42.3 | 55.1 | 9164 | [scores]() | [config](./qdtrack/qdtrack_yolox_x_50e_bdd100k.py) | [model]() | [preds]() | [visuals]() |
| Detector | Base Network | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals |
| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
| Faster R-CNN | R-50-FPN | 36.1 | 51.8 | 6165 | | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() |
6 changes: 1 addition & 5 deletions vis4d/zoo/bdd100k/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
mask_rcnn_r50_3x_bdd100k,
mask_rcnn_r50_5x_bdd100k,
)
from .qdtrack import (
qdtrack_frcnn_r50_fpn_1x_bdd100k,
qdtrack_yolox_x_50e_bdd100k,
)
from .qdtrack import qdtrack_frcnn_r50_fpn_1x_bdd100k
from .semantic_fpn import (
semantic_fpn_r50_40k_bdd100k,
semantic_fpn_r50_80k_bdd100k,
Expand All @@ -26,5 +23,4 @@
"semantic_fpn_r50_80k_bdd100k": semantic_fpn_r50_80k_bdd100k,
"semantic_fpn_r101_80k_bdd100k": semantic_fpn_r101_80k_bdd100k,
"qdtrack_frcnn_r50_fpn_1x_bdd100k": qdtrack_frcnn_r50_fpn_1x_bdd100k,
"qdtrack_yolox_x_50e_bdd100k": qdtrack_yolox_x_50e_bdd100k,
}
49 changes: 49 additions & 0 deletions vis4d/zoo/qdtrack/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# [TPAMI 2023] QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking
This is the official implementation of our paper **"QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking"**.

[Tobias Fischer*](https://tobiasfshr.github.io/), [Thomas E Huang*](https://www.thomasehuang.com/), [Jiangmiao Pang*](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)

[[Paper](https://arxiv.org/abs/2210.06984)]

<img src="./src/banner.png" width="830">

## Abstract
Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions in images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of object regions on a pair of images for contrastive learning. We combine this similarity learning with multiple existing object detectors to build Quasi-Dense Tracking (QDTrack), which does not require displacement regression or motion priors. We find that the resulting distinctive feature space admits a simple nearest neighbor search at inference time for object association. In addition, we show that our similarity learning scheme is not limited to video data, but can learn effective instance similarity even from static input, enabling a competitive tracking performance without training on videos or using tracking supervision. We conduct extensive experiments on a wide variety of popular MOT benchmarks. We find that, despite its simplicity, QDTrack rivals the performance of state-of-the-art tracking methods on all benchmarks and sets a new state-of-the-art on the large-scale BDD100K MOT benchmark, while introducing negligible computational overhead to the detector.

## Model Zoo

| Detector | Base Network | Strong Augs. | mMOTA-val | mIDF1-val | ID Sw.-val | Config | Weights | Visuals |
| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
| Faster R-CNN | R-50-FPN || 37.7 | 52.7 | 7257| [config](./qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py) | [model](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k_456b1e.pt) | [visuals](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k_vis.zip) |
| YOLOX-x | CSPNet || 42.3 | 55.1 | 9164 | [config](./qdtrack/qdtrack_yolox_x_50e_bdd100k.py) | [model](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_yolox_x_25e_bdd100k_c14af2.pt) | [visuals](https://dl.cv.ethz.ch/vis4d/qdtrack/qdtrack_yolox_x_25e_bdd100k_vis.zip) |

## Getting Started

### Train
```bash
# R50
python -m vis4d.pl fit --config vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py --gpus 8

# YOLOX
python -m vis4d.pl fit --config vis4d/zoo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py --gpus 8
```

### Inference
```bash
# R50
python -m vis4d.pl test --config vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py --ckpt ${checkpoint_path} --gpus ${num_gpus}

# YOLOX
python -m vis4d.pl test --config vis4d/zoo/qdtrack/qdtrack_yolox_x_50e_bdd100k.py --ckpt ${checkpoint_path} --gpus ${num_gpus}
```

## Citation
```
@article{fischer2023qdtrack,
title={Qdtrack: Quasi-dense similarity learning for appearance-only multiple object tracking},
author={Fischer, Tobias and Huang, Thomas E and Pang, Jiangmiao and Qiu, Linlu and Chen, Haofeng and Darrell, Trevor and Yu, Fisher},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
year={2023},
publisher={IEEE}
}
```
13 changes: 13 additions & 0 deletions vis4d/zoo/qdtrack/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""QDTrack."""
from . import (
qdtrack_frcnn_r50_fpn_augs_1x_bdd100k,
qdtrack_yolox_x_50e_bdd100k,
)

# Lists of available models in BDD100K Model Zoo.
AVAILABLE_MODELS = {
"qdtrack_frcnn_r50_fpn_augs_1x_bdd100k": (
qdtrack_frcnn_r50_fpn_augs_1x_bdd100k
),
"qdtrack_yolox_x_50e_bdd100k": qdtrack_yolox_x_50e_bdd100k,
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from vis4d.eval.bdd100k import BDD100KTrackEvaluator
from vis4d.op.base import ResNet
from vis4d.vis.image import BoundingBoxVisualizer
from vis4d.zoo.bdd100k.qdtrack.data_yolox import get_bdd100k_track_cfg
from vis4d.zoo.qdtrack.data_yolox import get_bdd100k_track_cfg


def get_config() -> ExperimentConfig:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from vis4d.engine.connectors import CallbackConnector, DataConnector
from vis4d.eval.bdd100k import BDD100KTrackEvaluator
from vis4d.vis.image import BoundingBoxVisualizer
from vis4d.zoo.bdd100k.qdtrack.data_yolox import get_bdd100k_track_cfg
from vis4d.zoo.qdtrack.data_yolox import get_bdd100k_track_cfg


def get_config() -> ExperimentConfig:
Expand Down
Binary file added vis4d/zoo/qdtrack/src/banner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 93f5c41

Please sign in to comment.