From a6310990b2da3d166e8c7151af01df4c7fac8def Mon Sep 17 00:00:00 2001 From: "Thomas E. Huang" Date: Wed, 29 Nov 2023 14:05:33 +0100 Subject: [PATCH] Update QDTrack + YOLOX (#131) * Update zoo * Clean up code, update zoo * Updates to qdtrack configs and yolox * Fix lint * Fix qdtrack inference test * Update test --- tests/model/track/qdtrack_test.py | 13 ++- tests/model/track/testcases/qdtrack-yolox.pt | Bin 3083 -> 3083 bytes vis4d/config/common/models/qdtrack.py | 3 +- vis4d/engine/callbacks/yolox_callbacks.py | 8 +- vis4d/engine/optim/scheduler.py | 31 +++--- vis4d/model/track/qdtrack.py | 27 +---- vis4d/zoo/bdd100k/README.md | 13 +-- vis4d/zoo/bdd100k/__init__.py | 2 - vis4d/zoo/bdd100k/qdtrack/data_yolox.py | 77 ++++++++------ .../qdtrack_frcnn_r50_fpn_1x_bdd100k.py | 10 +- ... qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py} | 99 +++++++++--------- .../qdtrack/qdtrack_yolox_x_50e_bdd100k.py | 29 +++-- 12 files changed, 152 insertions(+), 160 deletions(-) rename vis4d/zoo/bdd100k/qdtrack/{qdtrack_yolox_s_50e_bdd100k.py => qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py} (67%) diff --git a/tests/model/track/qdtrack_test.py b/tests/model/track/qdtrack_test.py index 7753cfd69..50435d8b9 100644 --- a/tests/model/track/qdtrack_test.py +++ b/tests/model/track/qdtrack_test.py @@ -14,9 +14,9 @@ from vis4d.data.transforms.pad import PadImages from vis4d.data.transforms.resize import GenResizeParameters, ResizeImages from vis4d.data.transforms.to_tensor import ToTensor +from vis4d.model.adapter import ModelExpEMAAdapter from vis4d.model.track.qdtrack import ( REV_KEYS, - YOLOX_REV_KEYS, FasterRCNNQDTrack, TrackOut, YOLOXQDTrack, @@ -87,10 +87,13 @@ def test_inference_yolox(self): """Inference test for YOLOX QDTrack.""" TrackIDCounter.reset() # reset track ID counter model_weights = ( - "https://dl.cv.ethz.ch/vis4d/qdtrack-yolox-ema_bdd100k.ckpt" + "https://dl.cv.ethz.ch/vis4d/bdd100k/qdtrack/" + "qdtrack_yolox_x_25e_bdd100k/qdtrack_yolox_x_25e_bdd100k_c14af2.pt" + ) + qdtrack = ModelExpEMAAdapter(YOLOXQDTrack(num_classes=8)) + load_model_checkpoint( + qdtrack, model_weights, rev_keys=[("^model.", "")] ) - qdtrack = YOLOXQDTrack(num_classes=8) - load_model_checkpoint(qdtrack, model_weights, rev_keys=YOLOX_REV_KEYS) qdtrack.eval() data_root = osp.join(get_test_data("bdd100k_test"), "track/images") @@ -133,4 +136,4 @@ def test_inference_yolox(self): for pred, expected in zip(pred_entry, expected_entry): print("PREDICTION:", pred.shape, pred) print("EXPECTED:", expected.shape, expected) - assert torch.isclose(pred, expected, atol=1e-4).all().item() + assert torch.isclose(pred, expected, atol=1e-2).all().item() diff --git a/tests/model/track/testcases/qdtrack-yolox.pt b/tests/model/track/testcases/qdtrack-yolox.pt index 8a8e64021928a7b63f684c3dbf951197ed143749..cf8b5a29c38fffc8236c7402bf742bac48d295d9 100644 GIT binary patch delta 1031 zcmZWnc}Nsd9G;n-S!dj`m3Cz_@9a=Bk2Kq^x9#t_SsS@_k|KnOnD^omRG@Z%L{!$3 zSfZ0C%I@eQGyS9NSSdR!3(Bl4h_W=xqO#u1kOw_@!<+ez`F$U6i)o9g>V_2Ja(boN z8N0aSZn6~aa*mK+Boe@4Npv|)3Pe`16C}yy3|6t-Y_yDxSFvUlo1|har(jc5EQwQe zGgT~?3*`%(b=hPqS6YsWqu^1%4ftX|0vDF{!qJpSJg@5{B-Cf)_G}hso_zwj&Kihm zn}qw`jl_IX7sP1GA^%Vi_I_{7wnWr{C2th`bVcFb?nCh4@GZF9>j97O3Nrjg zz@7#V1Q^oc;PekLR^JGBjg8PTw-q|wEk59|dx^o@Gjb{zhN&QDwZZHxa$jo@;%wGw z47tz6ke8gUyt|r*bvh5MH0_0zn^>GRCjtw{FTwoq75Hr_kLPV{h2-z2V9~;8T>adD z%eH=kXTK{Us5S_{vt0#h&ZHK$!?7^Cwc}3&zO&j58ixnk0&l_c&i7FGupS=SXr#xlJ)udjuEG7=YEF{>h z@k$@D>FMb)l_excAX&L)=YpCL>Jy?&*Tfrz zM4Q!Lw(y_$wV?zxE-yx>&4p-|IR^#Q%s|cKGEv>cP*hv62#x%>Q1O^v%iJR;v}R$7 z&FUvh{YZtEWw>l#=AQP$Lhb)0O4Awn!GVZHOc7<`?EKxIX@W3~kx3+VqKS)BVvs&D zT)l?lB#Sg~1BxYcV1KdH-$3`ks?t!g)-IooCMLBWnB)f0Fg{OjFr0_GeztKZRD{~5$hDOND-qY0v0P` zaT1{}LBzI7W=Ttb8uCe{B)|CD&2C8CUk~XGMtrtRg?B#-$J#xoAo+VberKaF#jW-A zx4~{wgIgJdAFho6)v0zsHyfd6^cxV(Ae5^1g5OCU?ka4C9r{KXfcs!GbU;&U4}=&M z(0lGaq;;)!4&&%th!`Jje(-W1>R{n0gN>J~*!^ z!;=+vKzY0pFJCtVEBtSPZ}}tmWPAZ{Wp5&3nRy>3!L6Wo9K>^(ukavkHoi>XgVA0S z-fZp>4AjIphTNP1w_u>dTg^mUEdjiNfEq?iluR&9ktk-0^vvU`5~B2uXJV|DXdX9_ zU(!I1pjC0@!jg<{_yV@Yq&Q~hPVAm&=8Z=a(e`6IdcVqr6dRrBLaYPThvuQzkz3H& ztW>o3XfhgH5|8+Jn=||yNrWt(F1kyv!d;zYCEIq1T}EYua9 ziZa@g(Cq;`Dom3K`SWED+ zspbDrE)_LpkDJly)Rd8OY;-v{WMml&bF*`InhG{K^D}dEtQG^W(on;Oh!=Mbo5=pX zBe`t6&J#G-ed^c*@#XfSIE7QmOI9VOJy)*rIxiIu1LZ26C(#L|X^hBP!?F0x^c;hK z{vQzREwUOomM5ACtcxN`ZMNHJ;g9-^WhKPKYnm_P`>2WDR~hIDWtvI diff --git a/vis4d/config/common/models/qdtrack.py b/vis4d/config/common/models/qdtrack.py index 7b989ae23..6f6c93b34 100644 --- a/vis4d/config/common/models/qdtrack.py +++ b/vis4d/config/common/models/qdtrack.py @@ -37,6 +37,7 @@ CONN_BBOX_2D_TRAIN = { "images": K.images, "images_hw": K.input_hw, + "original_hw": K.original_hw, "frame_ids": K.frame_ids, "boxes2d": K.boxes2d, "boxes2d_classes": K.boxes2d_classes, @@ -67,8 +68,6 @@ "ref_track_ids": pred_key("ref_track_ids"), } -CONN_BBOX_2D_YOLOX_TRAIN = {**CONN_BBOX_2D_TRAIN, "original_hw": K.original_hw} - CONN_YOLOX_LOSS_2D = { "cls_outs": pred_key(f"{PRED_PREFIX}.cls_score"), "reg_outs": pred_key(f"{PRED_PREFIX}.bbox_pred"), diff --git a/vis4d/engine/callbacks/yolox_callbacks.py b/vis4d/engine/callbacks/yolox_callbacks.py index be2096505..2d2ba5ea8 100644 --- a/vis4d/engine/callbacks/yolox_callbacks.py +++ b/vis4d/engine/callbacks/yolox_callbacks.py @@ -18,7 +18,7 @@ get_world_size, synchronize, ) -from vis4d.common.logging import rank_zero_info +from vis4d.common.logging import rank_zero_info, rank_zero_warn from vis4d.data.const import CommonKeys as K from vis4d.data.data_pipe import DataPipe from vis4d.data.typing import DictDataOrList @@ -65,13 +65,15 @@ def on_train_epoch_end( found_loss = True yolox_loss = loss["loss"] break - assert found_loss, "YOLOXHeadLoss should be in LossModule." rank_zero_info( "Switching YOLOX training mode starting next training epoch " "(turning off strong augmentations, adding L1 loss, switching to " "validation every epoch)." ) - yolox_loss.loss_l1 = l1_loss # set L1 loss function + if found_loss: + yolox_loss.loss_l1 = l1_loss # set L1 loss function + else: + rank_zero_warn("YOLOXHeadLoss should be in LossModule.") # Set data pipeline to default DataPipe to skip strong augs. # Switch to checking validation every epoch. dataloader = trainer_state["train_dataloader"] diff --git a/vis4d/engine/optim/scheduler.py b/vis4d/engine/optim/scheduler.py index 2a5f03b01..d539cdd76 100644 --- a/vis4d/engine/optim/scheduler.py +++ b/vis4d/engine/optim/scheduler.py @@ -33,12 +33,28 @@ def __init__( self.lr_schedulers_cfg = lr_schedulers_cfg self.lr_schedulers: dict[int, LRSchedulerDict] = {} super().__init__(optimizer) - self.steps_per_epoch = steps_per_epoch + self._convert_epochs_to_steps() + for i, lr_scheduler_cfg in enumerate(self.lr_schedulers_cfg): if lr_scheduler_cfg["begin"] == 0: self._instantiate_lr_scheduler(i, lr_scheduler_cfg) + def _convert_epochs_to_steps(self) -> None: + """Convert epochs to steps.""" + for lr_scheduler_cfg in self.lr_schedulers_cfg: + if ( + lr_scheduler_cfg["convert_epochs_to_steps"] + and not lr_scheduler_cfg["epoch_based"] + ): + lr_scheduler_cfg["begin"] *= self.steps_per_epoch + lr_scheduler_cfg["end"] *= self.steps_per_epoch + if lr_scheduler_cfg["convert_attributes"] is not None: + for attr in lr_scheduler_cfg["convert_attributes"]: + lr_scheduler_cfg["scheduler"]["init_args"][ + attr + ] *= self.steps_per_epoch + def _instantiate_lr_scheduler( self, scheduler_idx: int, lr_scheduler_cfg: LrSchedulerConfig ) -> None: @@ -49,19 +65,6 @@ def _instantiate_lr_scheduler( pg["lr"] for pg in self.optimizer.param_groups ] - # Convert epochs to steps - if ( - lr_scheduler_cfg["convert_epochs_to_steps"] - and not lr_scheduler_cfg["epoch_based"] - ): - lr_scheduler_cfg["begin"] *= self.steps_per_epoch - lr_scheduler_cfg["end"] *= self.steps_per_epoch - if lr_scheduler_cfg["convert_attributes"] is not None: - for attr in lr_scheduler_cfg["convert_attributes"]: - lr_scheduler_cfg["scheduler"]["init_args"][ - attr - ] *= self.steps_per_epoch - self.lr_schedulers[scheduler_idx] = { "scheduler": instantiate_classes( lr_scheduler_cfg["scheduler"], optimizer=self.optimizer diff --git a/vis4d/model/track/qdtrack.py b/vis4d/model/track/qdtrack.py index be57f75c7..285d46a41 100644 --- a/vis4d/model/track/qdtrack.py +++ b/vis4d/model/track/qdtrack.py @@ -7,6 +7,7 @@ from torch import Tensor, nn from vis4d.common.ckpt import load_model_checkpoint +from vis4d.model.detect.yolox import REV_KEYS as YOLOX_REV_KEYS from vis4d.op.base import BaseModel, CSPDarknet, ResNet from vis4d.op.box.box2d import scale_and_clip_boxes from vis4d.op.box.encoder import DeltaXYWHBBoxDecoder @@ -26,37 +27,11 @@ from .util import split_key_ref_indices REV_KEYS = [ - # (r"^detector.rpn_head.mm_dense_head\.", "rpn_head."), - # (r"\.rpn_reg\.", ".rpn_box."), - # (r"^detector.roi_head.mm_roi_head.bbox_head\.", "roi_head."), - # (r"^detector.backbone.mm_backbone\.", "body."), - # ( - # r"^detector.backbone.neck.mm_neck.lateral_convs\.", - # "inner_blocks.", - # ), - # ( - # r"^detector.backbone.neck.mm_neck.fpn_convs\.", - # "layer_blocks.", - # ), - # (r"\.conv.weight", ".weigh2t"), - # (r"\.conv.bias", ".bias"), (r"^faster_rcnn_heads\.", "faster_rcnn_head."), (r"^backbone.body\.", "basemodel."), (r"^qdtrack\.", "qdtrack_head."), ] -# from old Vis4D checkpoint -YOLOX_REV_KEYS = [ - (r"^detector.backbone.mm_backbone\.", "basemodel."), - (r"^bbox_head\.", "yolox_head."), - (r"^detector.backbone.neck.mm_neck\.", "fpn."), - (r"^detector.bbox_head.mm_dense_head\.", "yolox_head."), - (r"^similarity_head\.", "qdtrack_head.similarity_head."), - (r"\.bn\.", ".norm."), - (r"\.conv.weight", ".weight"), - (r"\.conv.bias", ".bias"), -] - class FasterRCNNQDTrackOut(NamedTuple): """Output of QDtrack model.""" diff --git a/vis4d/zoo/bdd100k/README.md b/vis4d/zoo/bdd100k/README.md index b4c8dd932..c7f7be0aa 100644 --- a/vis4d/zoo/bdd100k/README.md +++ b/vis4d/zoo/bdd100k/README.md @@ -90,16 +90,17 @@ The BDD100K dataset contains MOT annotations for 2K videos (1.4K/200/400 for tra [QDTrack: Quasi-Dense Similarity Learning for Appearance-Only Multiple Object Tracking](https://arxiv.org/abs/2210.06984) [TPAMI, CVPR 2021 Oral] -Authors: [Tobias Fischer](https://tobiasfshr.github.io/), [Thomas E Huang](https://www.thomasehuang.com/), [Jiangmiao Pang](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/) +Authors: [Tobias Fischer*](https://tobiasfshr.github.io/), [Thomas E Huang*](https://www.thomasehuang.com/), [Jiangmiao Pang*](https://scholar.google.com/citations?user=ssSfKpAAAAAJ), [Linlu Qiu](https://linlu-qiu.github.io/), [Haofeng Chen](https://www.haofeng.io/), Qi Li, [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Fisher Yu](https://www.yf.io/)
Abstract -Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions on the images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of region proposals on a pair of images for contrastive learning. We can naturally combine this similarity learning with existing detection methods to build Quasi-Dense Tracking (QDTrack) without turning to displacement regression or motion priors. We also find that the resulting distinctive feature space admits a simple nearest neighbor search at the inference time. Despite its simplicity, QDTrack outperforms all existing methods on MOT, BDD100K, Waymo, and TAO tracking benchmarks. It achieves 68.7 MOTA at 20.3 FPS on MOT17 without using external training data. Compared to methods with similar detectors, it boosts almost 10 points of MOTA and significantly decreases the number of ID switches on BDD100K and Waymo datasets. +Similarity learning has been recognized as a crucial step for object tracking. However, existing multiple object tracking methods only use sparse ground truth matching as the training objective, while ignoring the majority of the informative regions in images. In this paper, we present Quasi-Dense Similarity Learning, which densely samples hundreds of object regions on a pair of images for contrastive learning. We combine this similarity learning with multiple existing object detectors to build Quasi-Dense Tracking (QDTrack), which does not require displacement regression or motion priors. We find that the resulting distinctive feature space admits a simple nearest neighbor search at inference time for object association. In addition, we show that our similarity learning scheme is not limited to video data, but can learn effective instance similarity even from static input, enabling a competitive tracking performance without training on videos or using tracking supervision. We conduct extensive experiments on a wide variety of popular MOT benchmarks. We find that, despite its simplicity, QDTrack rivals the performance of state-of-the-art tracking methods on all benchmarks and sets a new state-of-the-art on the large-scale BDD100K MOT benchmark, while introducing negligible computational overhead to the detector.
#### Results -| Detector | Base Network | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals | -| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | -| Faster R-CNN | R-50-FPN | | | | [scores]() | [config]() | [model]() | [preds]() | [visuals]() | -| YOLOX-x | CSPNet | | | | [scores]() | [config]() | [model]() | [preds]() | [visuals]() | +| Detector | Base Network | Strong Augs. | mMOTA-val | mIDF1-val | ID Sw.-val | Scores-val | Config | Weights | Preds | Visuals | +| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | +| Faster R-CNN | R-50-FPN | | 36.1 | 51.8 | 6165 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() | +| Faster R-CNN | R-50-FPN | ✓ | 37.7 | 52.7 | 7257 | [scores]() | [config](./qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py) | [model]() | [preds]() | [visuals]() | +| YOLOX-x | CSPNet | ✓ | 42.3 | 55.1 | 9164 | [scores]() | [config](./qdtrack/qdtrack_yolox_x_50e_bdd100k.py) | [model]() | [preds]() | [visuals]() | diff --git a/vis4d/zoo/bdd100k/__init__.py b/vis4d/zoo/bdd100k/__init__.py index c74dd994f..9c0794c32 100644 --- a/vis4d/zoo/bdd100k/__init__.py +++ b/vis4d/zoo/bdd100k/__init__.py @@ -7,7 +7,6 @@ ) from .qdtrack import ( qdtrack_frcnn_r50_fpn_1x_bdd100k, - qdtrack_yolox_s_50e_bdd100k, qdtrack_yolox_x_50e_bdd100k, ) from .semantic_fpn import ( @@ -27,6 +26,5 @@ "semantic_fpn_r50_80k_bdd100k": semantic_fpn_r50_80k_bdd100k, "semantic_fpn_r101_80k_bdd100k": semantic_fpn_r101_80k_bdd100k, "qdtrack_frcnn_r50_fpn_1x_bdd100k": qdtrack_frcnn_r50_fpn_1x_bdd100k, - "qdtrack_yolox_s_50e_bdd100k": qdtrack_yolox_s_50e_bdd100k, "qdtrack_yolox_x_50e_bdd100k": qdtrack_yolox_x_50e_bdd100k, } diff --git a/vis4d/zoo/bdd100k/qdtrack/data_yolox.py b/vis4d/zoo/bdd100k/qdtrack/data_yolox.py index 1d0b33655..265712cf0 100644 --- a/vis4d/zoo/bdd100k/qdtrack/data_yolox.py +++ b/vis4d/zoo/bdd100k/qdtrack/data_yolox.py @@ -34,6 +34,7 @@ MosaicBoxes2D, MosaicImages, ) +from vis4d.data.transforms.normalize import NormalizeImages from vis4d.data.transforms.pad import PadImages from vis4d.data.transforms.photometric import RandomHSV from vis4d.data.transforms.post_process import ( @@ -51,6 +52,7 @@ def get_train_dataloader( data_backend: None | ConfigDict, image_size: tuple[int, int], + normalize_image: bool, samples_per_gpu: int, workers_per_gpu: int, ) -> ConfigDict: @@ -141,36 +143,36 @@ def get_train_dataloader( [class_config(PostProcessBoxes2D, min_area=1.0)] ) + batch_transforms = [ + class_config(RandomHSV, same_on_batch=False), + class_config( + RandomApply, + transforms=[class_config(FlipImages), class_config(FlipBoxes2D)], + probability=0.5, + same_on_batch=False, + ), + class_config( + GenResizeParameters, + shape=image_size, + keep_ratio=True, + scale_range=(0.5, 1.5), + same_on_batch=False, + ), + class_config(ResizeImages), + class_config(ResizeBoxes2D), + class_config(GenCropParameters, shape=image_size, same_on_batch=False), + class_config(CropImages), + class_config(CropBoxes2D), + ] + if normalize_image: + batch_transforms += [ + class_config(NormalizeImages), + class_config(PadImages), + ] + else: + batch_transforms += [class_config(PadImages, value=114.0)] train_batchprocess_cfg = class_config( - compose, - transforms=[ - class_config(RandomHSV, same_on_batch=False), - class_config( - RandomApply, - transforms=[ - class_config(FlipImages), - class_config(FlipBoxes2D), - ], - probability=0.5, - same_on_batch=False, - ), - class_config( - GenResizeParameters, - shape=image_size, - keep_ratio=True, - scale_range=(0.5, 1.5), - same_on_batch=False, - ), - class_config(ResizeImages), - class_config(ResizeBoxes2D), - class_config( - GenCropParameters, shape=image_size, same_on_batch=False - ), - class_config(CropImages), - class_config(CropBoxes2D), - class_config(PadImages, value=114.0), - class_config(ToTensor), - ], + compose, transforms=batch_transforms + [class_config(ToTensor)] ) return class_config( @@ -192,6 +194,7 @@ def get_train_dataloader( def get_test_dataloader( data_backend: None | ConfigDict, image_size: tuple[int, int], + normalize_image: bool, samples_per_gpu: int, workers_per_gpu: int, ) -> ConfigDict: @@ -218,12 +221,15 @@ def get_test_dataloader( compose, transforms=preprocess_transforms ) + if normalize_image: + batch_transforms = [ + class_config(NormalizeImages), + class_config(PadImages), + ] + else: + batch_transforms = [class_config(PadImages, value=114.0)] test_batchprocess_cfg = class_config( - compose, - transforms=[ - class_config(PadImages, value=114.0), - class_config(ToTensor), - ], + compose, transforms=batch_transforms + [class_config(ToTensor)] ) test_dataset_cfg = class_config( @@ -242,6 +248,7 @@ def get_test_dataloader( def get_bdd100k_track_cfg( data_backend: None | ConfigDict = None, image_size: tuple[int, int] = (800, 1440), + normalize_image: bool = False, samples_per_gpu: int = 2, workers_per_gpu: int = 2, ) -> DataConfig: @@ -251,6 +258,7 @@ def get_bdd100k_track_cfg( data.train_dataloader = get_train_dataloader( data_backend=data_backend, image_size=image_size, + normalize_image=normalize_image, samples_per_gpu=samples_per_gpu, workers_per_gpu=workers_per_gpu, ) @@ -258,6 +266,7 @@ def get_bdd100k_track_cfg( data.test_dataloader = get_test_dataloader( data_backend=data_backend, image_size=image_size, + normalize_image=normalize_image, samples_per_gpu=1, workers_per_gpu=1, ) diff --git a/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py index ccfb8e694..6cf0d5cd1 100644 --- a/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py +++ b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py @@ -1,5 +1,5 @@ # pylint: disable=duplicate-code -"""QDTrack-FasterRCNN BDD100K.""" +"""QDTrack with Faster R-CNN on BDD100K.""" from __future__ import annotations import lightning.pytorch as pl @@ -44,8 +44,8 @@ def get_config() -> ExperimentConfig: # High level hyper parameters params = ExperimentParameters() - params.samples_per_gpu = 2 - params.workers_per_gpu = 2 + params.samples_per_gpu = 4 # batch size = 4 GPUs * 4 samples per GPU = 16 + params.workers_per_gpu = 4 params.lr = 0.02 params.num_epochs = 12 config.params = params @@ -70,9 +70,7 @@ def get_config() -> ExperimentConfig: ) config.model, config.loss = get_qdtrack_cfg( - num_classes=num_classes, - basemodel=basemodel, - # weights="https://dl.cv.ethz.ch/vis4d/qdtrack_bdd100k_frcnn_res50_heavy_augs.pt", # pylint: disable=line-too-long + num_classes=num_classes, basemodel=basemodel ) ###################################################### diff --git a/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_s_50e_bdd100k.py b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py similarity index 67% rename from vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_s_50e_bdd100k.py rename to vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py index bca12a322..98b3997c2 100644 --- a/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_s_50e_bdd100k.py +++ b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py @@ -1,20 +1,17 @@ # pylint: disable=duplicate-code -"""QDTrack-YOLOX BDD100K.""" +"""QDTrack with Faster R-CNN on BDD100K.""" from __future__ import annotations -import pytorch_lightning as pl from lightning.pytorch.callbacks import ModelCheckpoint +from torch.optim import SGD +from torch.optim.lr_scheduler import LinearLR, MultiStepLR from vis4d.config import class_config from vis4d.config.common.datasets.bdd100k import CONN_BDD100K_TRACK_EVAL from vis4d.config.common.models.qdtrack import ( CONN_BBOX_2D_TEST, - CONN_BBOX_2D_YOLOX_TRAIN, - get_qdtrack_yolox_cfg, -) -from vis4d.config.common.models.yolox import ( - get_yolox_callbacks_cfg, - get_yolox_optimizers_cfg, + CONN_BBOX_2D_TRAIN, + get_qdtrack_cfg, ) from vis4d.config.default import ( get_default_callbacks_cfg, @@ -23,11 +20,17 @@ ) from vis4d.config.default.data_connectors import CONN_BBOX_2D_TRACK_VIS from vis4d.config.typing import ExperimentConfig, ExperimentParameters +from vis4d.config.util import get_lr_scheduler_cfg, get_optimizer_cfg from vis4d.data.datasets.bdd100k import bdd100k_track_map from vis4d.data.io.hdf5 import HDF5Backend -from vis4d.engine.callbacks import EvaluatorCallback, VisualizerCallback +from vis4d.engine.callbacks import ( + EvaluatorCallback, + VisualizerCallback, + YOLOXModeSwitchCallback, +) from vis4d.engine.connectors import CallbackConnector, DataConnector from vis4d.eval.bdd100k import BDD100KTrackEvaluator +from vis4d.op.base import ResNet from vis4d.vis.image import BoundingBoxVisualizer from vis4d.zoo.bdd100k.qdtrack.data_yolox import get_bdd100k_track_cfg @@ -41,20 +44,14 @@ def get_config() -> ExperimentConfig: ###################################################### ## General Config ## ###################################################### - config = get_default_cfg(exp_name="qdtrack_yolox_s_50e_bdd100k") - config.checkpoint_period = 5 - config.check_val_every_n_epoch = 5 - - # ckpt_path = ( - # "vis4d-workspace/QDTrack/pretrained/qdtrack-yolox-ema_bdd100k.ckpt" - # ) + config = get_default_cfg(exp_name="qdtrack_frcnn_r50_fpn_augs_1x_bdd100k") - # Hyper Parameters + # High level hyper parameters params = ExperimentParameters() - params.samples_per_gpu = 2 - params.workers_per_gpu = 4 - params.lr = 0.0005 - params.num_epochs = 50 + params.samples_per_gpu = 4 # batch size = 4 GPUs * 4 samples per GPU = 16 + params.workers_per_gpu = 8 + params.lr = 0.02 + params.num_epochs = 12 config.params = params ###################################################### @@ -64,6 +61,8 @@ def get_config() -> ExperimentConfig: config.data = get_bdd100k_track_cfg( data_backend=data_backend, + image_size=(720, 1280), + normalize_image=True, samples_per_gpu=params.samples_per_gpu, workers_per_gpu=params.workers_per_gpu, ) @@ -72,27 +71,40 @@ def get_config() -> ExperimentConfig: ## MODEL ## ###################################################### num_classes = len(bdd100k_track_map) - weights = ( - "mmdet://yolox/yolox_s_8x8_300e_coco/" - "yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth" + basemodel = class_config( + ResNet, resnet_name="resnet50", pretrained=True, trainable_layers=3 ) - config.model, config.loss = get_qdtrack_yolox_cfg( - num_classes, "small", weights=weights + + config.model, config.loss = get_qdtrack_cfg( + num_classes=num_classes, basemodel=basemodel ) ###################################################### ## OPTIMIZERS ## ###################################################### - num_last_epochs, warmup_epochs = 10, 1 - config.optimizers = get_yolox_optimizers_cfg( - params.lr, params.num_epochs, warmup_epochs, num_last_epochs - ) + config.optimizers = [ + get_optimizer_cfg( + optimizer=class_config( + SGD, lr=params.lr, momentum=0.9, weight_decay=0.0001 + ), + lr_schedulers=[ + get_lr_scheduler_cfg( + class_config(LinearLR, start_factor=0.1, total_iters=1000), + end=1000, + epoch_based=False, + ), + get_lr_scheduler_cfg( + class_config(MultiStepLR, milestones=[8, 11], gamma=0.1), + ), + ], + ) + ] ###################################################### ## DATA CONNECTOR ## ###################################################### config.train_data_connector = class_config( - DataConnector, key_mapping=CONN_BBOX_2D_YOLOX_TRAIN + DataConnector, key_mapping=CONN_BBOX_2D_TRAIN ) config.test_data_connector = class_config( @@ -103,20 +115,18 @@ def get_config() -> ExperimentConfig: ## CALLBACKS ## ###################################################### # Logger and Checkpoint - callbacks = get_default_callbacks_cfg( - config.output_dir, refresh_rate=config.log_every_n_steps - ) + callbacks = get_default_callbacks_cfg(config.output_dir) - # YOLOX callbacks - callbacks += get_yolox_callbacks_cfg( - switch_epoch=params.num_epochs - num_last_epochs, num_sizes=0 - ) + # Mode switch for strong augmentations + callbacks += [class_config(YOLOXModeSwitchCallback, switch_epoch=9)] # Visualizer callbacks.append( class_config( VisualizerCallback, - visualizer=class_config(BoundingBoxVisualizer, vis_freq=500), + visualizer=class_config( + BoundingBoxVisualizer, vis_freq=500, image_mode="BGR" + ), save_prefix=config.output_dir, test_connector=class_config( CallbackConnector, key_mapping=CONN_BBOX_2D_TRACK_VIS @@ -146,24 +156,19 @@ def get_config() -> ExperimentConfig: # PL Trainer args pl_trainer = get_default_pl_trainer_cfg(config) pl_trainer.max_epochs = params.num_epochs - pl_trainer.check_val_every_n_epoch = config.check_val_every_n_epoch pl_trainer.checkpoint_callback = class_config( ModelCheckpoint, dirpath=config.get_ref("output_dir") + "/checkpoints", verbose=True, save_last=True, save_on_train_epoch_end=True, - every_n_epochs=config.checkpoint_period, - save_top_k=5, + every_n_epochs=1, + save_top_k=4, mode="max", monitor="step", ) pl_trainer.wandb = True - pl_trainer.precision = "16-mixed" + pl_trainer.gradient_clip_val = 35 config.pl_trainer = pl_trainer - # PL Callbacks - pl_callbacks: list[pl.callbacks.Callback] = [] - config.pl_callbacks = pl_callbacks - return config.value_mode() diff --git a/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py b/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py index 3b1fe21db..340fb07db 100644 --- a/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py +++ b/vis4d/zoo/bdd100k/qdtrack/qdtrack_yolox_x_50e_bdd100k.py @@ -1,5 +1,5 @@ # pylint: disable=duplicate-code -"""QDTrack-YOLOX BDD100K.""" +"""QDTrack with YOLOX-x on BDD100K.""" from __future__ import annotations import pytorch_lightning as pl @@ -9,7 +9,7 @@ from vis4d.config.common.datasets.bdd100k import CONN_BDD100K_TRACK_EVAL from vis4d.config.common.models.qdtrack import ( CONN_BBOX_2D_TEST, - CONN_BBOX_2D_YOLOX_TRAIN, + CONN_BBOX_2D_TRAIN, get_qdtrack_yolox_cfg, ) from vis4d.config.common.models.yolox import ( @@ -45,16 +45,12 @@ def get_config() -> ExperimentConfig: config.checkpoint_period = 5 config.check_val_every_n_epoch = 5 - # ckpt_path = ( - # "vis4d-workspace/QDTrack/pretrained/qdtrack-yolox-ema_bdd100k.ckpt" - # ) - # Hyper Parameters params = ExperimentParameters() - params.samples_per_gpu = 5 - params.workers_per_gpu = 4 - params.lr = 0.000625 - params.num_epochs = 50 + params.samples_per_gpu = 8 # batch size = 8 GPUs * 8 samples per GPU = 64 + params.workers_per_gpu = 8 + params.lr = 0.001 + params.num_epochs = 25 config.params = params ###################################################### @@ -83,16 +79,17 @@ def get_config() -> ExperimentConfig: ###################################################### ## OPTIMIZERS ## ###################################################### - num_last_epochs, warmup_epochs = 10, 1 + # we use a schedule with 50 epochs, but only train for 25 epochs + num_total_epochs, num_last_epochs = 50, 10 config.optimizers = get_yolox_optimizers_cfg( - params.lr, params.num_epochs, warmup_epochs, num_last_epochs + params.lr, num_total_epochs, 1, num_last_epochs ) ###################################################### ## DATA CONNECTOR ## ###################################################### config.train_data_connector = class_config( - DataConnector, key_mapping=CONN_BBOX_2D_YOLOX_TRAIN + DataConnector, key_mapping=CONN_BBOX_2D_TRAIN ) config.test_data_connector = class_config( @@ -109,14 +106,16 @@ def get_config() -> ExperimentConfig: # YOLOX callbacks callbacks += get_yolox_callbacks_cfg( - switch_epoch=params.num_epochs - num_last_epochs, num_sizes=0 + switch_epoch=num_total_epochs - num_last_epochs, num_sizes=0 ) # Visualizer callbacks.append( class_config( VisualizerCallback, - visualizer=class_config(BoundingBoxVisualizer, vis_freq=500), + visualizer=class_config( + BoundingBoxVisualizer, vis_freq=500, image_mode="BGR" + ), save_prefix=config.output_dir, test_connector=class_config( CallbackConnector, key_mapping=CONN_BBOX_2D_TRACK_VIS