From 59534e9d014f7517cd13ec3b711292b9995948e3 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Tue, 9 Jul 2024 18:21:15 +0200 Subject: [PATCH 01/19] fix: Fix reduce_mean and nms. --- vis4d/common/distributed.py | 2 +- vis4d/op/box/box2d.py | 2 +- vis4d/zoo/bevformer/bevformer_base.py | 2 +- vis4d/zoo/bevformer/bevformer_tiny.py | 2 +- vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py | 2 +- vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py | 2 +- vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py | 2 +- vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py | 2 +- vis4d/zoo/qdtrack/qdtrack_yolox_x_25e_bdd100k.py | 2 +- vis4d/zoo/vit/vit_small_imagenet.py | 2 +- vis4d/zoo/vit/vit_tiny_imagenet.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/vis4d/common/distributed.py b/vis4d/common/distributed.py index 7241c67c..8ba797b8 100644 --- a/vis4d/common/distributed.py +++ b/vis4d/common/distributed.py @@ -335,7 +335,7 @@ def reduce_mean(tensor: torch.Tensor) -> torch.Tensor: if not (dist.is_available() and dist.is_initialized()): return tensor tensor = tensor.clone() - dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + dist.all_reduce(tensor.div_(get_world_size()), op=dist.ReduceOp.SUM) return tensor diff --git a/vis4d/op/box/box2d.py b/vis4d/op/box/box2d.py index 3359c21e..906f8764 100644 --- a/vis4d/op/box/box2d.py +++ b/vis4d/op/box/box2d.py @@ -420,7 +420,7 @@ def multiclass_nms( labels = torch.arange(num_classes, dtype=torch.long, device=scores.device) labels = labels.view(1, -1).expand_as(scores) - bboxes = bboxes.view(-1, 4) + bboxes = bboxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) diff --git a/vis4d/zoo/bevformer/bevformer_base.py b/vis4d/zoo/bevformer/bevformer_base.py index c3120202..9b39d412 100644 --- a/vis4d/zoo/bevformer/bevformer_base.py +++ b/vis4d/zoo/bevformer/bevformer_base.py @@ -2,7 +2,7 @@ """BEVFormer base with ResNet-101-DCN backbone.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR diff --git a/vis4d/zoo/bevformer/bevformer_tiny.py b/vis4d/zoo/bevformer/bevformer_tiny.py index 94245518..cdc984bd 100644 --- a/vis4d/zoo/bevformer/bevformer_tiny.py +++ b/vis4d/zoo/bevformer/bevformer_tiny.py @@ -2,7 +2,7 @@ """BEVFormer tiny with ResNet-50 backbone.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR diff --git a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py index 6f685615..3abc1132 100644 --- a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py +++ b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py @@ -2,7 +2,7 @@ """CC-3DT with Faster-RCNN ResNet-101 detector using KF3D motion model.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR diff --git a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py index 1354b44a..2bf34a63 100644 --- a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py +++ b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py @@ -2,7 +2,7 @@ """CC-3DT with Faster-RCNN ResNet-50 detector using KF3D motion model.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR diff --git a/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py b/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py index 55d6eac9..163c3c19 100644 --- a/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py +++ b/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py @@ -2,7 +2,7 @@ """CC-3DT VeloLSTM for BEVFormer on nuScenes.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import Adam from torch.optim.lr_scheduler import MultiStepLR diff --git a/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py b/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py index 01a8dbe0..86568c69 100644 --- a/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py +++ b/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py @@ -2,7 +2,7 @@ """CC-3DT VeloLSTM on nuScenes.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch.optim import Adam from torch.optim.lr_scheduler import MultiStepLR diff --git a/vis4d/zoo/qdtrack/qdtrack_yolox_x_25e_bdd100k.py b/vis4d/zoo/qdtrack/qdtrack_yolox_x_25e_bdd100k.py index 0cfcbabd..01bbfa8d 100644 --- a/vis4d/zoo/qdtrack/qdtrack_yolox_x_25e_bdd100k.py +++ b/vis4d/zoo/qdtrack/qdtrack_yolox_x_25e_bdd100k.py @@ -2,7 +2,7 @@ """QDTrack with YOLOX-x on BDD100K.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from lightning.pytorch.callbacks import ModelCheckpoint from vis4d.config import class_config diff --git a/vis4d/zoo/vit/vit_small_imagenet.py b/vis4d/zoo/vit/vit_small_imagenet.py index 85ae78f0..7d3e1ace 100644 --- a/vis4d/zoo/vit/vit_small_imagenet.py +++ b/vis4d/zoo/vit/vit_small_imagenet.py @@ -2,7 +2,7 @@ """VIT ImageNet-1k training example.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch import nn from torch.optim import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR diff --git a/vis4d/zoo/vit/vit_tiny_imagenet.py b/vis4d/zoo/vit/vit_tiny_imagenet.py index 0d7f8c58..1ab99401 100644 --- a/vis4d/zoo/vit/vit_tiny_imagenet.py +++ b/vis4d/zoo/vit/vit_tiny_imagenet.py @@ -2,7 +2,7 @@ """VIT ImageNet-1k training example.""" from __future__ import annotations -import pytorch_lightning as pl +import lightning.pytorch as pl from torch import nn from torch.optim import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR From 91b2fca47f854092322d2a689a1a3e5384adb28c Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Wed, 10 Jul 2024 14:48:02 +0200 Subject: [PATCH 02/19] fix: Fix lint. --- requirements/install.txt | 2 +- requirements/viewer.txt | 1 + vis4d/data/datasets/util.py | 5 +++-- vis4d/vis/image/canvas/pillow_backend.py | 7 +++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/requirements/install.txt b/requirements/install.txt index c1f6cfe5..8f47770e 100644 --- a/requirements/install.txt +++ b/requirements/install.txt @@ -7,7 +7,7 @@ h5py jsonargparse[signatures] lightning ml_collections==0.1.1 # Config interface. Need exact version since we overwrite internal functions -numpy>=1.21.0 +numpy>=1.21.0,<2.0.0 opencv-python pandas pillow diff --git a/requirements/viewer.txt b/requirements/viewer.txt index 6c0ba1c1..2cf81fd6 100644 --- a/requirements/viewer.txt +++ b/requirements/viewer.txt @@ -1 +1,2 @@ open3d +matplotlib>3.9 diff --git a/vis4d/data/datasets/util.py b/vis4d/data/datasets/util.py index b4ffbcb9..aeb377ff 100644 --- a/vis4d/data/datasets/util.py +++ b/vis4d/data/datasets/util.py @@ -54,8 +54,9 @@ def im_decode( "L", }, f"{mode} not supported for image decoding!" if backend == "PIL": - pil_img = Image.open(BytesIO(bytearray(im_bytes))) - pil_img = ImageOps.exif_transpose(pil_img) # type: ignore + pil_img_file = Image.open(BytesIO(bytearray(im_bytes))) + pil_img = ImageOps.exif_transpose(pil_img_file) + assert pil_img is not None, "Image could not be loaded!" if pil_img.mode == "L": # pragma: no cover if mode == "L": img: NDArrayUI8 = np.array(pil_img)[..., None] diff --git a/vis4d/vis/image/canvas/pillow_backend.py b/vis4d/vis/image/canvas/pillow_backend.py index 0dbb7c70..05bb0252 100644 --- a/vis4d/vis/image/canvas/pillow_backend.py +++ b/vis4d/vis/image/canvas/pillow_backend.py @@ -45,11 +45,14 @@ def create_canvas( Raises: ValueError: If the canvas is not initialized. """ - if image is None and image_hw is None: - raise ValueError("Image or Image Shapes required to create canvas") if image_hw is not None: white_image = np.ones([*image_hw, 3]) * 255 image = white_image.astype(np.uint8) + else: + assert ( + image is not None + ), "Image or Image Shapes required to create canvas" + self._image = Image.fromarray(image) self._image_draw = ImageDraw.Draw(self._image) From 96871a50dbd2caad9bdcf86d9f2d6487f430d974 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Fri, 26 Jul 2024 15:41:47 +0200 Subject: [PATCH 03/19] feat: Add VIS flag and update transformer related code. --- .../vis/image/bounding_box_visualizer_test.py | 4 +- vis4d/common/util.py | 2 +- vis4d/engine/experiment.py | 16 ++- vis4d/engine/flag.py | 6 + vis4d/engine/run.py | 12 +- vis4d/op/layer/attention.py | 7 +- vis4d/op/layer/ms_deform_attn.py | 24 +++- vis4d/op/layer/positional_encoding.py | 120 +++++------------- vis4d/op/layer/transformer.py | 2 + vis4d/pl/run.py | 26 +++- vis4d/vis/image/bounding_box_visualizer.py | 12 +- 11 files changed, 129 insertions(+), 102 deletions(-) diff --git a/tests/vis/image/bounding_box_visualizer_test.py b/tests/vis/image/bounding_box_visualizer_test.py index 7626409e..fda92e23 100644 --- a/tests/vis/image/bounding_box_visualizer_test.py +++ b/tests/vis/image/bounding_box_visualizer_test.py @@ -32,8 +32,10 @@ def setUp(self) -> None: self.scores: list[NDArrayF64] = testcase_gt["scores"] self.tracks = [np.arange(len(b)) for b in self.boxes] + cat_mapping = {v: k for k, v in COCO_COLOR_MAPPING.items()} + self.vis = BoundingBoxVisualizer( - n_colors=20, class_id_mapping=COCO_COLOR_MAPPING, vis_freq=1 + n_colors=20, cat_mapping=cat_mapping, vis_freq=1 ) def tearDown(self) -> None: diff --git a/vis4d/common/util.py b/vis4d/common/util.py index 75d51ce4..38dca5d7 100644 --- a/vis4d/common/util.py +++ b/vis4d/common/util.py @@ -73,7 +73,7 @@ def set_tf32(use_tf32: bool, precision: str) -> None: # pragma: no cover def init_random_seed() -> int: """Initialize random seed for the experiment.""" - return np.random.randint(2**31) + return int(np.random.randint(2**31)) def set_random_seed(seed: int, deterministic: bool = False) -> None: diff --git a/vis4d/engine/experiment.py b/vis4d/engine/experiment.py index a0781ab1..68c05046 100644 --- a/vis4d/engine/experiment.py +++ b/vis4d/engine/experiment.py @@ -33,6 +33,7 @@ from vis4d.common.util import init_random_seed, set_random_seed, set_tf32 from vis4d.config import instantiate_classes from vis4d.config.typing import ExperimentConfig +from vis4d.engine.callbacks import VisualizerCallback from .optim import set_up_optimizers from .parser import pprints_config @@ -87,6 +88,7 @@ def run_experiment( use_slurm: bool = False, ckpt_path: str | None = None, resume: bool = False, + vis: bool = False, ) -> None: """Entry point for running a single experiment. @@ -99,6 +101,7 @@ def run_experiment( required environment variables for slurm. ckpt_path (str | None): Path to a checkpoint to load. resume (bool): If set, resume training from the checkpoint. + vis (bool): If set, enable visualizer callback. Raises: ValueError: If `mode` is not `fit` or `test`. @@ -141,7 +144,18 @@ def run_experiment( ) # Callbacks - callbacks = [instantiate_classes(cb) for cb in config.callbacks] + callbacks = [] + for cb in config.callbacks: + callback = instantiate_classes(cb) + + if not vis and isinstance(callback, VisualizerCallback): + rank_zero_info( + "VisualizerCallback is not used. " + "Please set --vis=True to use it." + ) + continue + + callbacks.append(callback) # Setup DDP & seed seed = init_random_seed() if config.seed == -1 else config.seed diff --git a/vis4d/engine/flag.py b/vis4d/engine/flag.py index f127f190..fa4fe376 100644 --- a/vis4d/engine/flag.py +++ b/vis4d/engine/flag.py @@ -21,6 +21,11 @@ _SLURM = flags.DEFINE_bool( "slurm", default=False, help="If set, setup slurm running jobs." ) +_VIS = flags.DEFINE_bool( + "vis", + default=False, + help="If set, running visualization using visualizer callback.", +) __all__ = [ @@ -31,4 +36,5 @@ "_SHOW_CONFIG", "_SWEEP", "_SLURM", + "_VIS", ] diff --git a/vis4d/engine/run.py b/vis4d/engine/run.py index c0033ab4..2e3a95c3 100644 --- a/vis4d/engine/run.py +++ b/vis4d/engine/run.py @@ -11,7 +11,16 @@ from vis4d.config.typing import ExperimentConfig from .experiment import run_experiment -from .flag import _CKPT, _CONFIG, _GPUS, _RESUME, _SHOW_CONFIG, _SLURM, _SWEEP +from .flag import ( + _CKPT, + _CONFIG, + _GPUS, + _RESUME, + _SHOW_CONFIG, + _SLURM, + _SWEEP, + _VIS, +) def main(argv: ArgsType) -> None: @@ -68,6 +77,7 @@ def main(argv: ArgsType) -> None: _SLURM.value, _CKPT.value, _RESUME.value, + _VIS.value, ) diff --git a/vis4d/op/layer/attention.py b/vis4d/op/layer/attention.py index 4a539ee3..31b09d51 100644 --- a/vis4d/op/layer/attention.py +++ b/vis4d/op/layer/attention.py @@ -120,6 +120,7 @@ def __init__( super().__init__() self.batch_first = batch_first self.embed_dims = embed_dims + self.num_heads = num_heads self.attn = nn.MultiheadAttention( embed_dims, num_heads, dropout=attn_drop, **kwargs @@ -193,8 +194,10 @@ def forward( key_pos = query_pos else: rank_zero_warn( - "position encoding of key is" - + f"missing in {self.__class__.__name__}." + f"Position encoding of key in {self.__class__.__name__}" + + "is missing, and positional encodeing of query has " + + "has different shape and cannot be usde for key. " + + "It it is not desired, please provide key_pos." ) if query_pos is not None: diff --git a/vis4d/op/layer/ms_deform_attn.py b/vis4d/op/layer/ms_deform_attn.py index 9acb0556..3d3589c2 100644 --- a/vis4d/op/layer/ms_deform_attn.py +++ b/vis4d/op/layer/ms_deform_attn.py @@ -223,12 +223,15 @@ def __init__( is_power_of_2(d_model // n_heads) self.d_model = d_model - self.embed_dims = d_model self.n_levels = n_levels self.n_heads = n_heads self.n_points = n_points self.im2col_step = im2col_step + # Aligned Attributes to MHA + self.embed_dims = d_model + self.num_heads = n_heads + self.sampling_offsets = nn.Linear( d_model, n_heads * n_levels * n_points * 2 ) @@ -359,3 +362,22 @@ def forward( output = self.output_proj(output) return output + + def __call__( + self, + query: Tensor, + reference_points: Tensor, + input_flatten: Tensor, + input_spatial_shapes: Tensor, + input_level_start_index: Tensor, + input_padding_mask: Tensor | None = None, + ) -> Tensor: + """Type definition for call implementation.""" + return self._call_impl( + query, + reference_points, + input_flatten, + input_spatial_shapes, + input_level_start_index, + input_padding_mask, + ) diff --git a/vis4d/op/layer/positional_encoding.py b/vis4d/op/layer/positional_encoding.py index c931c4b5..37ccfa7c 100644 --- a/vis4d/op/layer/positional_encoding.py +++ b/vis4d/op/layer/positional_encoding.py @@ -3,6 +3,8 @@ Modified from mmdetection (https://github.com/open-mmlab/mmdetection). """ +from __future__ import annotations + import math import torch @@ -59,24 +61,45 @@ def __init__( self.eps = eps self.offset = offset - def forward(self, mask: Tensor) -> Tensor: + def forward( + self, mask: Tensor | None, inputs: Tensor | None = None + ) -> Tensor: """Forward function for `SinePositionalEncoding`. Args: - mask (Tensor): ByteTensor mask. Non-zero values representing + mask (Tensor | None): ByteTensor mask. Non-zero values representing ignored positions, while zero values means valid positions - for this image. Shape [bs, h, w]. + for this image. Shape [bs, h, w]. If None, it means single + image or batch image with no padding. + inputs (Tensor | None): The input tensor. It mask is None, this + input tensor is required to get the shape of the input image. Returns: pos (Tensor): Returned position embedding with shape [bs, num_feats*2, h, w]. """ - # For convenience of exporting to ONNX, it's required to convert - # `masks` from bool to int. - mask = mask.to(torch.int) - not_mask = 1 - mask # logical_not - y_embed = not_mask.cumsum(1, dtype=torch.float32) - x_embed = not_mask.cumsum(2, dtype=torch.float32) + if mask is not None: + # For convenience of exporting to ONNX, it's required to convert + # `masks` from bool to int. + mask = mask.to(torch.int) + b, h, w = mask.size() + device = mask.device + not_mask = 1 - mask # logical_not + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + else: + # single image or batch image with no padding + assert isinstance(inputs, Tensor) + b, _, h, w = inputs.shape + device = inputs.device + x_embed = torch.arange( + 1, w + 1, dtype=torch.float32, device=device + ) + x_embed = x_embed.view(1, 1, -1).repeat(b, h, 1) + y_embed = torch.arange( + 1, h + 1, dtype=torch.float32, device=device + ) + y_embed = y_embed.view(1, -1, 1).repeat(b, 1, w) if self.normalize: y_embed = ( (y_embed + self.offset) @@ -89,13 +112,13 @@ def forward(self, mask: Tensor) -> Tensor: * self.scale ) dim_t = torch.arange( - self.num_feats, dtype=torch.float32, device=mask.device + self.num_feats, dtype=torch.float32, device=device ) dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_feats) pos_x = x_embed[:, :, :, None] / dim_t pos_y = y_embed[:, :, :, None] / dim_t # use `view` instead of `flatten` for dynamically exporting to ONNX - b, h, w = mask.size() + pos_x = torch.stack( (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 ).view(b, h, w, -1) @@ -167,78 +190,3 @@ def forward(self, mask: Tensor) -> Tensor: .repeat(mask.shape[0], 1, 1, 1) ) return pos - - -class SinePositionalEncoding3D(SinePositionalEncoding): - """3D Position encoding with sine and cosine functions.""" - - def forward(self, mask: Tensor) -> Tensor: - """Forward function for `SinePositionalEncoding3D`. - - Args: - mask (Tensor): ByteTensor mask. Non-zero values representing - ignored positions, while zero values means valid positions - for this image. Shape [bs, t, h, w]. - - Returns: - pos (Tensor): Returned position embedding with shape - [bs, num_feats*2, h, w]. - """ - assert mask.dim() == 4, ( - f"{mask.shape} should be a 4-dimensional Tensor," - f" got {mask.dim()}-dimensional Tensor instead " - ) - # For convenience of exporting to ONNX, it's required to convert - # `masks` from bool to int. - mask = mask.to(torch.int) - not_mask = 1 - mask # logical_not - z_embed = not_mask.cumsum(1, dtype=torch.float32) - y_embed = not_mask.cumsum(2, dtype=torch.float32) - x_embed = not_mask.cumsum(3, dtype=torch.float32) - if self.normalize: - z_embed = ( - (z_embed + self.offset) - / (z_embed[:, -1:, :, :] + self.eps) - * self.scale - ) - y_embed = ( - (y_embed + self.offset) - / (y_embed[:, :, -1:, :] + self.eps) - * self.scale - ) - x_embed = ( - (x_embed + self.offset) - / (x_embed[:, :, :, -1:] + self.eps) - * self.scale - ) - dim_t = torch.arange( - self.num_feats, dtype=torch.float32, device=mask.device - ) - dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_feats) - - dim_t_z = torch.arange( - (self.num_feats * 2), dtype=torch.float32, device=mask.device - ) - dim_t_z = self.temperature ** ( - 2 * (dim_t_z // 2) / (self.num_feats * 2) - ) - - pos_x = x_embed[:, :, :, :, None] / dim_t - pos_y = y_embed[:, :, :, :, None] / dim_t - pos_z = z_embed[:, :, :, :, None] / dim_t_z - # use `view` instead of `flatten` for dynamically exporting to ONNX - b, t, h, w = mask.size() - pos_x = torch.stack( - (pos_x[:, :, :, :, 0::2].sin(), pos_x[:, :, :, :, 1::2].cos()), - dim=5, - ).view(b, t, h, w, -1) - pos_y = torch.stack( - (pos_y[:, :, :, :, 0::2].sin(), pos_y[:, :, :, :, 1::2].cos()), - dim=5, - ).view(b, t, h, w, -1) - pos_z = torch.stack( - (pos_z[:, :, :, :, 0::2].sin(), pos_z[:, :, :, :, 1::2].cos()), - dim=5, - ).view(b, t, h, w, -1) - pos = (torch.cat((pos_y, pos_x), dim=4) + pos_z).permute(0, 1, 4, 2, 3) - return pos diff --git a/vis4d/op/layer/transformer.py b/vis4d/op/layer/transformer.py index 08c7950d..2600ade3 100644 --- a/vis4d/op/layer/transformer.py +++ b/vis4d/op/layer/transformer.py @@ -212,6 +212,8 @@ def __init__( LayerScale. Default: 0.0 """ super().__init__() + self.embed_dims = embed_dims + layers: list[nn.Module] = [] in_channels = embed_dims for _ in range(num_fcs - 1): diff --git a/vis4d/pl/run.py b/vis4d/pl/run.py index e2f3f1b3..574f3572 100644 --- a/vis4d/pl/run.py +++ b/vis4d/pl/run.py @@ -16,8 +16,15 @@ from vis4d.common.util import set_tf32 from vis4d.config import instantiate_classes from vis4d.config.typing import ExperimentConfig -from vis4d.engine.callbacks import CheckpointCallback -from vis4d.engine.flag import _CKPT, _CONFIG, _GPUS, _RESUME, _SHOW_CONFIG +from vis4d.engine.callbacks import CheckpointCallback, VisualizerCallback +from vis4d.engine.flag import ( + _CKPT, + _CONFIG, + _GPUS, + _RESUME, + _SHOW_CONFIG, + _VIS, +) from vis4d.engine.parser import pprints_config from vis4d.pl.callbacks import CallbackWrapper, LRSchedulerCallback from vis4d.pl.data_module import DataModule @@ -83,12 +90,23 @@ def main(argv: ArgsType) -> None: test_data_connector = None # Callbacks + vis = _VIS.value + callbacks: list[Callback] = [] for cb in config.callbacks: callback = instantiate_classes(cb) # Skip checkpoint callback to use PL ModelCheckpoint - if not isinstance(callback, CheckpointCallback): - callbacks.append(CallbackWrapper(callback)) + if isinstance(callback, CheckpointCallback): + continue + + if not vis and isinstance(callback, VisualizerCallback): + rank_zero_info( + "VisualizerCallback is not used. " + "Please set --vis=True to use it." + ) + continue + + callbacks.append(CallbackWrapper(callback)) if "pl_callbacks" in config: pl_callbacks = [instantiate_classes(cb) for cb in config.pl_callbacks] diff --git a/vis4d/vis/image/bounding_box_visualizer.py b/vis4d/vis/image/bounding_box_visualizer.py index 72ece8d8..f9dc88c4 100644 --- a/vis4d/vis/image/bounding_box_visualizer.py +++ b/vis4d/vis/image/bounding_box_visualizer.py @@ -45,7 +45,7 @@ def __init__( self, *args: ArgsType, n_colors: int = 50, - class_id_mapping: dict[int, str] | None = None, + cat_mapping: dict[str, int] | None = None, file_type: str = "png", canvas: CanvasBackend = PillowCanvasBackend(), viewer: ImageViewerBackend = MatplotlibImageViewer(), @@ -55,9 +55,9 @@ def __init__( Args: n_colors (int): How many colors should be used for the internal - color map - class_id_mapping (dict[int, str]): Mapping from class id to - human readable name + color map + cat_mapping (dict[str, int]): Mapping from class names to class + ids. Defaults to None. file_type (str): Desired file type canvas (CanvasBackend): Backend that is used to draw on images viewer (ImageViewerBackend): Backend that is used show images @@ -66,7 +66,9 @@ def __init__( self._samples: list[DataSample] = [] self.color_palette = generate_color_map(n_colors) self.class_id_mapping = ( - class_id_mapping if class_id_mapping is not None else {} + {v: k for k, v in cat_mapping.items()} + if cat_mapping is not None + else {} ) self.file_type = file_type self.canvas = canvas From c834f28dfd2eceb65fa8c9bce1255a90d2ad5b08 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Fri, 26 Jul 2024 17:13:57 +0200 Subject: [PATCH 04/19] fix: Fix mypy for PyTorch 2.4.0 --- docs/source/user_guide/faster_rcnn_example.py | 2 +- tests/engine/callbacks/checkpoint_test.py | 2 +- tests/engine/optim/scheduler_test.py | 4 ++-- tests/model/detect/mask_rcnn_test.py | 4 ++-- tests/model/detect/retinanet_test.py | 4 ++-- tests/model/seg/fcn_resnet_test.py | 4 ++-- tests/model/seg/semantic_fpn_test.py | 4 ++-- tests/pl/trainer_test.py | 4 ++-- vis4d/data/transforms/resize.py | 2 +- vis4d/engine/optim/scheduler.py | 16 ++++++++-------- vis4d/zoo/base/models/yolox.py | 2 +- .../faster_rcnn/faster_rcnn_r50_1x_bdd100k.py | 2 +- .../faster_rcnn/faster_rcnn_r50_3x_bdd100k.py | 2 +- .../mask_rcnn/mask_rcnn_r50_1x_bdd100k.py | 2 +- .../mask_rcnn/mask_rcnn_r50_3x_bdd100k.py | 2 +- .../mask_rcnn/mask_rcnn_r50_5x_bdd100k.py | 2 +- .../qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py | 2 +- .../semantic_fpn_r101_80k_bdd100k.py | 2 +- .../semantic_fpn/semantic_fpn_r50_40k_bdd100k.py | 2 +- .../semantic_fpn/semantic_fpn_r50_80k_bdd100k.py | 2 +- vis4d/zoo/bevformer/bevformer_base.py | 2 +- vis4d/zoo/bevformer/bevformer_tiny.py | 2 +- .../cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py | 2 +- .../cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py | 2 +- .../cc_3dt/velo_lstm_bevformer_base_100e_nusc.py | 2 +- .../cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py | 2 +- vis4d/zoo/faster_rcnn/faster_rcnn_coco.py | 2 +- vis4d/zoo/fcn_resnet/fcn_resnet_coco.py | 2 +- vis4d/zoo/mask_rcnn/mask_rcnn_coco.py | 2 +- .../qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py | 2 +- vis4d/zoo/retinanet/retinanet_coco.py | 2 +- .../faster_rcnn/faster_rcnn_r50_12e_shift.py | 2 +- .../faster_rcnn/faster_rcnn_r50_36e_shift.py | 2 +- .../faster_rcnn_r50_6e_shift_all_domains.py | 2 +- .../shift/mask_rcnn/mask_rcnn_r50_12e_shift.py | 2 +- .../shift/mask_rcnn/mask_rcnn_r50_36e_shift.py | 2 +- .../mask_rcnn_r50_6e_shift_all_domains.py | 2 +- .../semantic_fpn/semantic_fpn_r50_160k_shift.py | 4 ++-- .../semantic_fpn_r50_160k_shift_all_domains.py | 4 ++-- .../semantic_fpn/semantic_fpn_r50_40k_shift.py | 4 ++-- .../semantic_fpn_r50_40k_shift_all_domains.py | 4 ++-- vis4d/zoo/vit/vit_small_imagenet.py | 2 +- vis4d/zoo/vit/vit_tiny_imagenet.py | 2 +- 43 files changed, 60 insertions(+), 60 deletions(-) diff --git a/docs/source/user_guide/faster_rcnn_example.py b/docs/source/user_guide/faster_rcnn_example.py index b639930d..4d5cab51 100644 --- a/docs/source/user_guide/faster_rcnn_example.py +++ b/docs/source/user_guide/faster_rcnn_example.py @@ -6,7 +6,7 @@ import lightning.pytorch as pl import numpy as np -from torch.optim import SGD +from torch.optim.sgd import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR from vis4d.config import class_config diff --git a/tests/engine/callbacks/checkpoint_test.py b/tests/engine/callbacks/checkpoint_test.py index 560a433f..0df0132c 100644 --- a/tests/engine/callbacks/checkpoint_test.py +++ b/tests/engine/callbacks/checkpoint_test.py @@ -4,7 +4,7 @@ import tempfile import unittest -from torch.optim import SGD +from torch.optim.sgd import SGD from tests.util import MOCKLOSS, MockModel from vis4d.config import class_config diff --git a/tests/engine/optim/scheduler_test.py b/tests/engine/optim/scheduler_test.py index 226e8eb4..e94cf7a5 100644 --- a/tests/engine/optim/scheduler_test.py +++ b/tests/engine/optim/scheduler_test.py @@ -5,8 +5,8 @@ import torch import torch.nn.functional as F -from torch import optim from torch.optim.lr_scheduler import LRScheduler +from torch.optim.sgd import SGD from torch.testing import assert_close from vis4d.engine.optim.scheduler import ConstantLR, PolyLR, QuadraticLRWarmup @@ -68,7 +68,7 @@ def setUp(self) -> None: model = ToyModel() self.lr = 0.05 self.l2_mult = 10 - self.optimizer = optim.SGD( + self.optimizer = SGD( [ {"params": model.conv1.parameters()}, { diff --git a/tests/model/detect/mask_rcnn_test.py b/tests/model/detect/mask_rcnn_test.py index 4b5ff954..b830bbcf 100644 --- a/tests/model/detect/mask_rcnn_test.py +++ b/tests/model/detect/mask_rcnn_test.py @@ -3,7 +3,7 @@ import unittest import torch -from torch import optim +from torch.optim.sgd import SGD from tests.util import get_test_data, get_test_file from vis4d.common.ckpt import load_model_checkpoint @@ -120,7 +120,7 @@ def test_train(self): ] ) - optimizer = optim.SGD(mask_rcnn.parameters(), lr=0.001, momentum=0.9) + optimizer = SGD(mask_rcnn.parameters(), lr=0.001, momentum=0.9) dataset = COCO(get_test_data("coco_test"), split="train") train_loader = get_train_dataloader(dataset, 2, (256, 256)) diff --git a/tests/model/detect/retinanet_test.py b/tests/model/detect/retinanet_test.py index 58f34645..dc878e7e 100644 --- a/tests/model/detect/retinanet_test.py +++ b/tests/model/detect/retinanet_test.py @@ -3,7 +3,7 @@ import unittest import torch -from torch import optim +from torch.optim.sgd import SGD from tests.util import get_test_data, get_test_file from vis4d.common.ckpt import load_model_checkpoint @@ -75,7 +75,7 @@ def test_train(self) -> None: retina_net.retinanet_head.box_sampler, ) - optimizer = optim.SGD(retina_net.parameters(), lr=0.001, momentum=0.9) + optimizer = SGD(retina_net.parameters(), lr=0.001, momentum=0.9) dataset = COCO(get_test_data("coco_test"), split="train") train_loader = get_train_dataloader(dataset, 2, (256, 256)) diff --git a/tests/model/seg/fcn_resnet_test.py b/tests/model/seg/fcn_resnet_test.py index dc1a2f98..4a9f90fc 100644 --- a/tests/model/seg/fcn_resnet_test.py +++ b/tests/model/seg/fcn_resnet_test.py @@ -5,7 +5,7 @@ import unittest import torch -from torch import optim +from torch.optim.sgd import SGD from tests.util import get_test_data, get_test_file from vis4d.common.ckpt import load_model_checkpoint @@ -46,7 +46,7 @@ def test_train(self) -> None: """Test FCNResNet training.""" model = FCNResNet(base_model="resnet50", resize=(64, 64)) loss_fn = MultiLevelSegLoss(feature_idx=(4, 5), weights=[0.5, 1]) - optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) dataset = COCO( get_test_data("coco_test"), split="train", use_pascal_voc_cats=True ) diff --git a/tests/model/seg/semantic_fpn_test.py b/tests/model/seg/semantic_fpn_test.py index 53a1c4de..19932f19 100644 --- a/tests/model/seg/semantic_fpn_test.py +++ b/tests/model/seg/semantic_fpn_test.py @@ -5,7 +5,7 @@ import unittest import torch -from torch import optim +from torch.optim.sgd import SGD from tests.util import get_test_data, get_test_file from vis4d.data.const import CommonKeys as K @@ -59,7 +59,7 @@ def test_train(self) -> None: """Test SemanticFPN training.""" model = SemanticFPN(num_classes=21) loss_fn = SegCrossEntropyLoss() - optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) train_loader = get_train_dataloader(self.dataset, 2) model.train() diff --git a/tests/pl/trainer_test.py b/tests/pl/trainer_test.py index 2608983f..fb963a63 100644 --- a/tests/pl/trainer_test.py +++ b/tests/pl/trainer_test.py @@ -7,7 +7,7 @@ import unittest from ml_collections import ConfigDict -from torch import optim +from torch.optim.sgd import SGD from torch.utils.data import DataLoader, Dataset from tests.util import get_test_data @@ -85,7 +85,7 @@ def get_training_module(model_cfg: ConfigDict): } ) - optimizer_cfg = get_optimizer_cfg(class_config(optim.SGD, lr=0.01)) + optimizer_cfg = get_optimizer_cfg(class_config(SGD, lr=0.01)) return TrainingModule( model_cfg=model_cfg, optimizers_cfg=[optimizer_cfg], diff --git a/vis4d/data/transforms/resize.py b/vis4d/data/transforms/resize.py index c613bc0e..f157bd88 100644 --- a/vis4d/data/transforms/resize.py +++ b/vis4d/data/transforms/resize.py @@ -309,7 +309,7 @@ def __call__( optical_flow_[:, :, 0] *= scale_factor[0] optical_flow_[:, :, 1] *= scale_factor[1] optical_flows[i] = optical_flow_.numpy() - return optical_flow_.numpy() + return optical_flows @Transform( diff --git a/vis4d/engine/optim/scheduler.py b/vis4d/engine/optim/scheduler.py index 2e1e6337..496e05f0 100644 --- a/vis4d/engine/optim/scheduler.py +++ b/vis4d/engine/optim/scheduler.py @@ -5,8 +5,8 @@ from typing import TypedDict -from torch.optim import Optimizer from torch.optim.lr_scheduler import LRScheduler +from torch.optim.optimizer import Optimizer from vis4d.common.typing import DictStrAny from vis4d.config import copy_and_resolve_references, instantiate_classes @@ -80,10 +80,10 @@ def _instantiate_lr_scheduler( def get_lr(self) -> list[float]: # type: ignore """Get current learning rate.""" - return [ - lr_scheduler["scheduler"].get_lr() - for lr_scheduler in self.lr_schedulers.values() - ] + lr = [] + for lr_scheduler in self.lr_schedulers.values(): + lr.extend(lr_scheduler["scheduler"].get_lr()) + return lr def state_dict(self) -> dict[int, DictStrAny]: # type: ignore """Get state dict.""" @@ -163,7 +163,7 @@ def __init__( def get_lr(self) -> list[float]: # type: ignore """Compute current learning rate.""" - step_count = self._step_count - 1 # type: ignore + step_count = self._step_count - 1 if step_count == 0: return [ group["lr"] * self.factor @@ -213,7 +213,7 @@ def __init__( def get_lr(self) -> list[float]: # type: ignore """Compute current learning rate.""" - step_count = self._step_count - 1 # type: ignore + step_count = self._step_count - 1 if step_count == 0 or step_count > self.max_steps: return [group["lr"] for group in self.optimizer.param_groups] decay_factor = ( @@ -247,7 +247,7 @@ def __init__( def get_lr(self) -> list[float]: # type: ignore """Compute current learning rate.""" - step_count = self._step_count - 1 # type: ignore + step_count = self._step_count - 1 if step_count >= self.max_steps: return self.base_lrs factors = [ diff --git a/vis4d/zoo/base/models/yolox.py b/vis4d/zoo/base/models/yolox.py index 044650de..eadc3bc1 100644 --- a/vis4d/zoo/base/models/yolox.py +++ b/vis4d/zoo/base/models/yolox.py @@ -3,8 +3,8 @@ from __future__ import annotations from ml_collections import ConfigDict, FieldReference -from torch.optim import SGD from torch.optim.lr_scheduler import CosineAnnealingLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import OptimizerConfig diff --git a/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_1x_bdd100k.py b/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_1x_bdd100k.py index d9219669..ffe7f884 100644 --- a/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_1x_bdd100k.py +++ b/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_1x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_3x_bdd100k.py b/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_3x_bdd100k.py index 7abc3a7c..dafee957 100644 --- a/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_3x_bdd100k.py +++ b/vis4d/zoo/bdd100k/faster_rcnn/faster_rcnn_r50_3x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_1x_bdd100k.py b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_1x_bdd100k.py index 8fc5385d..d52e07f2 100644 --- a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_1x_bdd100k.py +++ b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_1x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_3x_bdd100k.py b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_3x_bdd100k.py index 9a709d21..719eb1d0 100644 --- a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_3x_bdd100k.py +++ b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_3x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_5x_bdd100k.py b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_5x_bdd100k.py index 9c6dec31..8aaf9431 100644 --- a/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_5x_bdd100k.py +++ b/vis4d/zoo/bdd100k/mask_rcnn/mask_rcnn_r50_5x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py index 9b047d2d..d2af3037 100644 --- a/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py +++ b/vis4d/zoo/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r101_80k_bdd100k.py b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r101_80k_bdd100k.py index db45092d..12510ae7 100644 --- a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r101_80k_bdd100k.py +++ b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r101_80k_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_40k_bdd100k.py b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_40k_bdd100k.py index deb8f78f..938c004d 100644 --- a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_40k_bdd100k.py +++ b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_40k_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_80k_bdd100k.py b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_80k_bdd100k.py index 06b95cd3..53c29ba3 100644 --- a/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_80k_bdd100k.py +++ b/vis4d/zoo/bdd100k/semantic_fpn/semantic_fpn_r50_80k_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/bevformer/bevformer_base.py b/vis4d/zoo/bevformer/bevformer_base.py index 9b39d412..3d4cdd9f 100644 --- a/vis4d/zoo/bevformer/bevformer_base.py +++ b/vis4d/zoo/bevformer/bevformer_base.py @@ -3,7 +3,7 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import AdamW +from torch.optim.adamw import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR from vis4d.config import class_config diff --git a/vis4d/zoo/bevformer/bevformer_tiny.py b/vis4d/zoo/bevformer/bevformer_tiny.py index cdc984bd..1fba154f 100644 --- a/vis4d/zoo/bevformer/bevformer_tiny.py +++ b/vis4d/zoo/bevformer/bevformer_tiny.py @@ -3,7 +3,7 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import AdamW +from torch.optim.adamw import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR from vis4d.config import class_config diff --git a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py index 3abc1132..8bed245d 100644 --- a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py +++ b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py index 2bf34a63..c850866e 100644 --- a/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py +++ b/vis4d/zoo/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py b/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py index 163c3c19..b7d26a38 100644 --- a/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py +++ b/vis4d/zoo/cc_3dt/velo_lstm_bevformer_base_100e_nusc.py @@ -3,7 +3,7 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import Adam +from torch.optim.adam import Adam from torch.optim.lr_scheduler import MultiStepLR from vis4d.config import class_config diff --git a/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py b/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py index 86568c69..aa51e6ea 100644 --- a/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py +++ b/vis4d/zoo/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.py @@ -3,7 +3,7 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import Adam +from torch.optim.adam import Adam from torch.optim.lr_scheduler import MultiStepLR from vis4d.config import class_config diff --git a/vis4d/zoo/faster_rcnn/faster_rcnn_coco.py b/vis4d/zoo/faster_rcnn/faster_rcnn_coco.py index 005bedb8..e0d6d66c 100644 --- a/vis4d/zoo/faster_rcnn/faster_rcnn_coco.py +++ b/vis4d/zoo/faster_rcnn/faster_rcnn_coco.py @@ -4,8 +4,8 @@ import lightning.pytorch as pl import numpy as np -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.sweep import grid_search diff --git a/vis4d/zoo/fcn_resnet/fcn_resnet_coco.py b/vis4d/zoo/fcn_resnet/fcn_resnet_coco.py index 1f177c3f..1baad29b 100644 --- a/vis4d/zoo/fcn_resnet/fcn_resnet_coco.py +++ b/vis4d/zoo/fcn_resnet/fcn_resnet_coco.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/mask_rcnn/mask_rcnn_coco.py b/vis4d/zoo/mask_rcnn/mask_rcnn_coco.py index 58816730..cc354626 100644 --- a/vis4d/zoo/mask_rcnn/mask_rcnn_coco.py +++ b/vis4d/zoo/mask_rcnn/mask_rcnn_coco.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py b/vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py index a49ca546..0a78a559 100644 --- a/vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py +++ b/vis4d/zoo/qdtrack/qdtrack_frcnn_r50_fpn_augs_1x_bdd100k.py @@ -3,8 +3,8 @@ from __future__ import annotations from lightning.pytorch.callbacks import ModelCheckpoint -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/retinanet/retinanet_coco.py b/vis4d/zoo/retinanet/retinanet_coco.py index 5ab9d42d..6d26b54c 100644 --- a/vis4d/zoo/retinanet/retinanet_coco.py +++ b/vis4d/zoo/retinanet/retinanet_coco.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_12e_shift.py b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_12e_shift.py index 1e452102..785a14b4 100644 --- a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_12e_shift.py +++ b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_12e_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_36e_shift.py b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_36e_shift.py index 946e1fb6..0dcc7b01 100644 --- a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_36e_shift.py +++ b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_36e_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_6e_shift_all_domains.py b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_6e_shift_all_domains.py index 7d84cae7..d0d301df 100644 --- a/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_6e_shift_all_domains.py +++ b/vis4d/zoo/shift/faster_rcnn/faster_rcnn_r50_6e_shift_all_domains.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters diff --git a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_12e_shift.py b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_12e_shift.py index d9da7bb4..51dca6df 100644 --- a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_12e_shift.py +++ b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_12e_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import FieldConfigDict, class_config from vis4d.data.io.hdf5 import HDF5Backend diff --git a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_36e_shift.py b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_36e_shift.py index 40963714..939fe5f9 100644 --- a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_36e_shift.py +++ b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_36e_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import FieldConfigDict, class_config from vis4d.data.io.hdf5 import HDF5Backend diff --git a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_6e_shift_all_domains.py b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_6e_shift_all_domains.py index 1f915339..3e461e3c 100644 --- a/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_6e_shift_all_domains.py +++ b/vis4d/zoo/shift/mask_rcnn/mask_rcnn_r50_6e_shift_all_domains.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch.optim import SGD from torch.optim.lr_scheduler import LinearLR, MultiStepLR +from torch.optim.sgd import SGD from vis4d.config import FieldConfigDict, class_config from vis4d.data.io.hdf5 import HDF5Backend diff --git a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift.py b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift.py index 983d8fef..312a7672 100644 --- a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift.py +++ b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch import optim from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters @@ -106,7 +106,7 @@ def get_config() -> ExperimentConfig: config.optimizers = [ get_optimizer_cfg( optimizer=class_config( - optim.SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 + SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 ), lr_schedulers=[ get_lr_scheduler_cfg( diff --git a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift_all_domains.py b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift_all_domains.py index 6fa1b785..aacaf4a9 100644 --- a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift_all_domains.py +++ b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_160k_shift_all_domains.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch import optim from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters @@ -108,7 +108,7 @@ def get_config() -> ExperimentConfig: config.optimizers = [ get_optimizer_cfg( optimizer=class_config( - optim.SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 + SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 ), lr_schedulers=[ get_lr_scheduler_cfg( diff --git a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift.py b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift.py index bc86ea10..5185df09 100644 --- a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift.py +++ b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch import optim from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters @@ -106,7 +106,7 @@ def get_config() -> ExperimentConfig: config.optimizers = [ get_optimizer_cfg( optimizer=class_config( - optim.SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 + SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 ), lr_schedulers=[ get_lr_scheduler_cfg( diff --git a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift_all_domains.py b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift_all_domains.py index 8a6f4095..e1cea254 100644 --- a/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift_all_domains.py +++ b/vis4d/zoo/shift/semantic_fpn/semantic_fpn_r50_40k_shift_all_domains.py @@ -3,8 +3,8 @@ from __future__ import annotations import lightning.pytorch as pl -from torch import optim from torch.optim.lr_scheduler import LinearLR +from torch.optim.sgd import SGD from vis4d.config import class_config from vis4d.config.typing import ExperimentConfig, ExperimentParameters @@ -106,7 +106,7 @@ def get_config() -> ExperimentConfig: config.optimizers = [ get_optimizer_cfg( optimizer=class_config( - optim.SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 + SGD, lr=params.lr, momentum=0.9, weight_decay=0.0005 ), lr_schedulers=[ get_lr_scheduler_cfg( diff --git a/vis4d/zoo/vit/vit_small_imagenet.py b/vis4d/zoo/vit/vit_small_imagenet.py index 7d3e1ace..92b4ac81 100644 --- a/vis4d/zoo/vit/vit_small_imagenet.py +++ b/vis4d/zoo/vit/vit_small_imagenet.py @@ -4,7 +4,7 @@ import lightning.pytorch as pl from torch import nn -from torch.optim import AdamW +from torch.optim.adamw import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR from vis4d.config import class_config diff --git a/vis4d/zoo/vit/vit_tiny_imagenet.py b/vis4d/zoo/vit/vit_tiny_imagenet.py index 1ab99401..d915ca17 100644 --- a/vis4d/zoo/vit/vit_tiny_imagenet.py +++ b/vis4d/zoo/vit/vit_tiny_imagenet.py @@ -4,7 +4,7 @@ import lightning.pytorch as pl from torch import nn -from torch.optim import AdamW +from torch.optim.adamw import AdamW from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR from vis4d.config import class_config From 456e3daf2ff958d3cb2b983915121363f27b56ad Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Wed, 7 Aug 2024 15:56:05 +0200 Subject: [PATCH 05/19] feat: Add compute FLOPs flag. --- vis4d/common/ckpt.py | 4 +++- vis4d/pl/run.py | 1 + vis4d/zoo/base/runtime.py | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vis4d/common/ckpt.py b/vis4d/common/ckpt.py index fdd5b6e4..c4ad64b4 100644 --- a/vis4d/common/ckpt.py +++ b/vis4d/common/ckpt.py @@ -205,7 +205,9 @@ def load_from_local( filename = osp.expanduser(filename) if not osp.isfile(filename): raise FileNotFoundError(f"{filename} can not be found.") - checkpoint = torch.load(filename, map_location=map_location) + checkpoint = torch.load( + filename, weights_only=True, map_location=map_location + ) return checkpoint diff --git a/vis4d/pl/run.py b/vis4d/pl/run.py index 574f3572..bfa43ab1 100644 --- a/vis4d/pl/run.py +++ b/vis4d/pl/run.py @@ -155,6 +155,7 @@ def main(argv: ArgsType) -> None: hyper_params, config.seed, ckpt_path if not resume else None, + config.compute_flops, ) data_module = DataModule(config.data) diff --git a/vis4d/zoo/base/runtime.py b/vis4d/zoo/base/runtime.py index 65e842f9..c457638c 100644 --- a/vis4d/zoo/base/runtime.py +++ b/vis4d/zoo/base/runtime.py @@ -58,6 +58,7 @@ def get_default_cfg( config.use_tf32 = False config.tf32_matmul_precision = "highest" config.benchmark = False + config.compute_flops = False return config From 70fbfd7e92e6d60c6bfdbde994d1e30131028915 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Wed, 21 Aug 2024 17:15:30 +0200 Subject: [PATCH 06/19] feat: Add fix sized resize, update vis functional and fix test error. --- docs/source/user_guide/3D_visualization.ipynb | 2 +- docs/source/user_guide/visualization.ipynb | 2 +- tests/vis4d-test-data | 2 +- tests/zoo/cc_3dt_test.py | 12 + vis4d/data/transforms/resize.py | 283 +++++++++--------- vis4d/op/base/vit.py | 2 +- vis4d/vis/functional/__init__.py | 25 -- .../image.py => image/functional.py} | 50 ++-- .../functional.py} | 10 +- 9 files changed, 186 insertions(+), 202 deletions(-) delete mode 100644 vis4d/vis/functional/__init__.py rename vis4d/vis/{functional/image.py => image/functional.py} (92%) rename vis4d/vis/{functional/pointcloud.py => pointcloud/functional.py} (93%) diff --git a/docs/source/user_guide/3D_visualization.ipynb b/docs/source/user_guide/3D_visualization.ipynb index 1d45f813..b9355051 100644 --- a/docs/source/user_guide/3D_visualization.ipynb +++ b/docs/source/user_guide/3D_visualization.ipynb @@ -20,7 +20,7 @@ "os.environ[\"WEBRTC_IP\"] = \"127.0.0.1\"\n", "\n", "import pickle\n", - "from vis4d.vis.functional import show_points\n", + "from vis4d.vis.pointcloud.functional import show_points\n", "import numpy as np" ] }, diff --git a/docs/source/user_guide/visualization.ipynb b/docs/source/user_guide/visualization.ipynb index 30e92e35..fbd5c85e 100644 --- a/docs/source/user_guide/visualization.ipynb +++ b/docs/source/user_guide/visualization.ipynb @@ -30,7 +30,7 @@ "from __future__ import annotations\n", "\n", "from vis4d.common.typing import NDArrayF64, NDArrayI64\n", - "from vis4d.vis.functional import imshow_bboxes, imshow_masks, imshow_topk_bboxes, imshow, draw_bboxes, draw_masks, imshow_track_matches\n", + "from vis4d.vis.image.functional import imshow_bboxes, imshow_masks, imshow_topk_bboxes, imshow, draw_bboxes, draw_masks, imshow_track_matches\n", "\n", "import pickle\n", "import numpy as np" diff --git a/tests/vis4d-test-data b/tests/vis4d-test-data index 1e52c194..79af15a3 160000 --- a/tests/vis4d-test-data +++ b/tests/vis4d-test-data @@ -1 +1 @@ -Subproject commit 1e52c194859cfc09ee2f10af595303da3646d7d3 +Subproject commit 79af15a3d98116d8400b7489e6bcaf590c1eaff5 diff --git a/tests/zoo/cc_3dt_test.py b/tests/zoo/cc_3dt_test.py index 93f04e18..67475a8d 100644 --- a/tests/zoo/cc_3dt_test.py +++ b/tests/zoo/cc_3dt_test.py @@ -96,6 +96,18 @@ def test_nusc_vis(self) -> None: ) ) + def test_nusc_test(self) -> None: + """Test the config.""" + cfg_gt = f"{self.gt_config_path}/cc_3dt_nusc_test.yaml" + + self.assertTrue( + compare_configs( + f"{self.config_prefix}.cc_3dt_nusc_test", + cfg_gt, + self.varying_keys, + ) + ) + def test_bevformer_base_velo_lstm_nusc(self) -> None: """Test the config.""" cfg_gt = ( diff --git a/vis4d/data/transforms/resize.py b/vis4d/data/transforms/resize.py index f157bd88..c829f7a0 100644 --- a/vis4d/data/transforms/resize.py +++ b/vis4d/data/transforms/resize.py @@ -50,6 +50,7 @@ def __init__( align_long_edge: bool = False, resize_short_edge: bool = False, allow_overflow: bool = False, + fixed_scale: bool = False, ) -> None: """Creates an instance of the class. @@ -78,14 +79,63 @@ def __init__( to the smallest size such that it is no smaller than shape. Otherwise, we scale the image to the largest size such that it is no larger than shape. Defaults to False. + fixed_scale (bool, optional): If set to True, we scale the image + without offset. Defaults to False. """ self.shape = shape self.keep_ratio = keep_ratio + + assert multiscale_mode in {"list", "range"} self.multiscale_mode = multiscale_mode + + assert ( + scale_range[0] <= scale_range[1] + ), f"Invalid scale range: {scale_range[1]} < {scale_range[0]}" self.scale_range = scale_range + self.align_long_edge = align_long_edge self.resize_short_edge = resize_short_edge self.allow_overflow = allow_overflow + self.fixed_scale = fixed_scale + + def _get_target_shape( + self, input_shape: tuple[int, int] + ) -> tuple[int, int]: + """Generate possibly random target shape.""" + if self.multiscale_mode == "range": + assert isinstance( + self.shape, tuple + ), "Specify shape as tuple when using multiscale mode range." + if self.scale_range[0] < self.scale_range[1]: # do multi-scale + w_scale = ( + random.uniform(0, 1) + * (self.scale_range[1] - self.scale_range[0]) + + self.scale_range[0] + ) + h_scale = ( + random.uniform(0, 1) + * (self.scale_range[1] - self.scale_range[0]) + + self.scale_range[0] + ) + else: + h_scale = w_scale = 1.0 + + shape = int(self.shape[0] * h_scale), int(self.shape[1] * w_scale) + else: + assert isinstance( + self.shape, list + ), "Specify shape as list when using multiscale mode list." + shape = random.choice(self.shape) + + return get_resize_shape( + input_shape, + shape, + self.keep_ratio, + self.align_long_edge, + self.resize_short_edge, + self.allow_overflow, + self.fixed_scale, + ) def __call__( self, images: list[NDArrayF32] @@ -94,16 +144,7 @@ def __call__( image = images[0] im_shape = (image.shape[1], image.shape[2]) - target_shape = get_target_shape( - im_shape, - self.shape, - self.keep_ratio, - self.multiscale_mode, - self.scale_range, - self.align_long_edge, - self.resize_short_edge, - self.allow_overflow, - ) + target_shape = self._get_target_shape(im_shape) scale_factor = ( target_shape[1] / im_shape[1], target_shape[0] / im_shape[0], @@ -117,6 +158,66 @@ def __call__( return resize_params, target_shapes +def get_resize_shape( + original_shape: tuple[int, int], + new_shape: tuple[int, int], + keep_ratio: bool = True, + align_long_edge: bool = False, + resize_short_edge: bool = False, + allow_overflow: bool = False, + fixed_scale: bool = False, +) -> tuple[int, int]: + """Get shape for resize, considering keep_ratio and align_long_edge. + + Args: + original_shape (tuple[int, int]): Original shape in [H, W]. + new_shape (tuple[int, int]): New shape in [H, W]. + keep_ratio (bool, optional): Whether to keep the aspect ratio. + Defaults to True. + align_long_edge (bool, optional): Whether to align the long edge of + the original shape with the long edge of the new shape. + Defaults to False. + resize_short_edge (bool, optional): Whether to resize according to the + short edge. Defaults to False. + allow_overflow (bool, optional): Whether to allow overflow. + Defaults to False. + fixed_scale (bool, optional): Whether to use fixed scale. + + Returns: + tuple[int, int]: The new shape in [H, W]. + """ + h, w = original_shape + new_h, new_w = new_shape + + if keep_ratio: + if allow_overflow: + comp_fn = max + else: + comp_fn = min + + if align_long_edge: + long_edge, short_edge = max(new_shape), min(new_shape) + scale_factor = comp_fn( + long_edge / max(h, w), short_edge / min(h, w) + ) + elif resize_short_edge: + short_edge = min(original_shape) + new_short_edge = min(new_shape) + scale_factor = new_short_edge / short_edge + else: + scale_factor = comp_fn(new_w / w, new_h / h) + + if fixed_scale: + offset = 0.0 + else: + offset = 0.5 + + new_h = int(h * scale_factor + offset) + new_w = int(w * scale_factor + offset) + + return new_h, new_w + + @Transform([K.images, "transforms.resize.target_shape"], K.images) class ResizeImages: """Resize Images.""" @@ -166,6 +267,36 @@ def __call__( return images +def resize_image( + inputs: NDArrayF32, + shape: tuple[int, int], + interpolation: str = "bilinear", + antialias: bool = False, + backend: str = "torch", +) -> NDArrayF32: + """Resize image.""" + if backend == "torch": + image = torch.from_numpy(inputs).permute(0, 3, 1, 2) + image = resize_tensor(image, shape, interpolation, antialias) + return image.permute(0, 2, 3, 1).numpy() + + if backend == "cv2": + cv2_interp_codes = { + "nearest": INTER_NEAREST, + "bilinear": INTER_LINEAR, + "bicubic": INTER_CUBIC, + "area": INTER_AREA, + "lanczos": INTER_LANCZOS4, + } + return cv2.resize( # pylint: disable=no-member, unsubscriptable-object + inputs[0].astype(np.uint8), + (shape[1], shape[0]), + interpolation=cv2_interp_codes[interpolation], + )[None, ...].astype(np.float32) + + raise ValueError(f"Invalid imresize backend: {backend}") + + @Transform([K.boxes2d, "transforms.resize.scale_factor"], K.boxes2d) class ResizeBoxes2D: """Resize list of 2D bounding boxes.""" @@ -389,34 +520,6 @@ def __call__( return intrinsics -def resize_image( - inputs: NDArrayF32, - shape: tuple[int, int], - interpolation: str = "bilinear", - antialias: bool = False, - backend: str = "torch", -) -> NDArrayF32: - """Resize image.""" - if backend == "torch": - image = torch.from_numpy(inputs).permute(0, 3, 1, 2) - image = resize_tensor(image, shape, interpolation, antialias) - return image.permute(0, 2, 3, 1).numpy() - if backend == "cv2": - cv2_interp_codes = { - "nearest": INTER_NEAREST, - "bilinear": INTER_LINEAR, - "bicubic": INTER_CUBIC, - "area": INTER_AREA, - "lanczos": INTER_LANCZOS4, - } - return cv2.resize( # pylint: disable=no-member, unsubscriptable-object - inputs[0].astype(np.uint8), - (shape[1], shape[0]), - interpolation=cv2_interp_codes[interpolation], - )[None, ...].astype(np.float32) - raise ValueError(f"Invalid imresize backend: {backend}") - - def resize_tensor( inputs: Tensor, shape: tuple[int, int], @@ -434,107 +537,3 @@ def resize_tensor( antialias=antialias, ) return output - - -def get_resize_shape( - original_shape: tuple[int, int], - new_shape: tuple[int, int], - keep_ratio: bool = True, - align_long_edge: bool = False, - resize_short_edge: bool = False, - allow_overflow: bool = False, -) -> tuple[int, int]: - """Get shape for resize, considering keep_ratio and align_long_edge. - - Args: - original_shape (tuple[int, int]): Original shape in [H, W]. - new_shape (tuple[int, int]): New shape in [H, W]. - keep_ratio (bool, optional): Whether to keep the aspect ratio. - Defaults to True. - align_long_edge (bool, optional): Whether to align the long edge of - the original shape with the long edge of the new shape. - Defaults to False. - resize_short_edge (bool, optional): Whether to resize according to the - short edge. Defaults to False. - allow_overflow (bool, optional): Whether to allow overflow. - Defaults to False. - - Returns: - tuple[int, int]: The new shape in [H, W]. - """ - h, w = original_shape - new_h, new_w = new_shape - if keep_ratio: - if allow_overflow: - comp_fn = max - else: - comp_fn = min - if align_long_edge: - long_edge, short_edge = max(new_shape), min(new_shape) - scale_factor = comp_fn( - long_edge / max(h, w), short_edge / min(h, w) - ) - elif resize_short_edge: - short_edge = min(original_shape) - new_short_edge = min(new_shape) - scale_factor = new_short_edge / short_edge - else: - scale_factor = comp_fn(new_w / w, new_h / h) - new_h = int(h * scale_factor + 0.5) - new_w = int(w * scale_factor + 0.5) - return new_h, new_w - - -def get_target_shape( - input_shape: tuple[int, int], - shape: tuple[int, int] | list[tuple[int, int]], - keep_ratio: bool = False, - multiscale_mode: str = "range", - scale_range: tuple[float, float] = (1.0, 1.0), - align_long_edge: bool = False, - resize_short_edge: bool = False, - allow_overflow: bool = False, -) -> tuple[int, int]: - """Generate possibly random target shape.""" - assert multiscale_mode in {"list", "range"} - if multiscale_mode == "list": - assert isinstance( - shape, list - ), "Specify shape as list when using multiscale mode list." - assert len(shape) >= 1 - else: - assert isinstance( - shape, tuple - ), "Specify shape as tuple when using multiscale mode range." - assert ( - scale_range[0] <= scale_range[1] - ), f"Invalid scale range: {scale_range[1]} < {scale_range[0]}" - - if multiscale_mode == "range": - assert isinstance(shape, tuple) - if scale_range[0] < scale_range[1]: # do multi-scale - w_scale = ( - random.uniform(0, 1) * (scale_range[1] - scale_range[0]) - + scale_range[0] - ) - h_scale = ( - random.uniform(0, 1) * (scale_range[1] - scale_range[0]) - + scale_range[0] - ) - else: - h_scale = w_scale = 1.0 - - shape = int(shape[0] * h_scale), int(shape[1] * w_scale) - else: - assert isinstance(shape, list) - shape = random.choice(shape) - - shape = get_resize_shape( - input_shape, - shape, - keep_ratio, - align_long_edge, - resize_short_edge, - allow_overflow, - ) - return shape diff --git a/vis4d/op/base/vit.py b/vis4d/op/base/vit.py index fdbdbd05..43404075 100644 --- a/vis4d/op/base/vit.py +++ b/vis4d/op/base/vit.py @@ -3,7 +3,7 @@ from __future__ import annotations import torch -from timm.models.helpers import named_apply +from timm.models import named_apply from torch import nn from ..layer import PatchEmbed, TransformerBlock diff --git a/vis4d/vis/functional/__init__.py b/vis4d/vis/functional/__init__.py deleted file mode 100644 index 5a46ff0e..00000000 --- a/vis4d/vis/functional/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Function interface for visualization functions.""" - -from .image import ( - draw_bboxes, - draw_masks, - imshow, - imshow_bboxes, - imshow_masks, - imshow_topk_bboxes, - imshow_track_matches, -) -from .pointcloud import draw_points, show_3d, show_points - -__all__ = [ - "imshow", - "draw_masks", - "draw_bboxes", - "imshow_bboxes", - "imshow_masks", - "imshow_topk_bboxes", - "imshow_track_matches", - "show_3d", - "draw_points", - "show_points", -] diff --git a/vis4d/vis/functional/image.py b/vis4d/vis/image/functional.py similarity index 92% rename from vis4d/vis/functional/image.py rename to vis4d/vis/image/functional.py index baf51643..4fbb7118 100644 --- a/vis4d/vis/functional/image.py +++ b/vis4d/vis/image/functional.py @@ -13,22 +13,24 @@ NDArrayF32, NDArrayUI8, ) -from vis4d.vis.image.canvas import CanvasBackend, PillowCanvasBackend -from vis4d.vis.image.util import ( + +from ..util import generate_color_map +from .canvas import CanvasBackend, PillowCanvasBackend +from .util import ( preprocess_boxes, preprocess_boxes3d, preprocess_image, preprocess_masks, project_point, ) -from vis4d.vis.image.viewer import ImageViewerBackend, MatplotlibImageViewer -from vis4d.vis.util import generate_color_map +from .viewer import ImageViewerBackend, MatplotlibImageViewer def imshow( image: ArrayLike, image_mode: str = "RGB", image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Shows a single image. @@ -37,28 +39,13 @@ def imshow( image_mode (str, optional): Image Mode. Defaults to "RGB". image_viewer (ImageViewerBackend, optional): The Image viewer backend to use. Defaults to MatplotlibImageViewer(). + file_path (str): The path to save the image to. Defaults to None. """ image = preprocess_image(image, image_mode) image_viewer.show_images([image]) - -def imsave( - image: ArrayLike, - file_path: str, - image_mode: str = "RGB", - image_viewer: ImageViewerBackend = MatplotlibImageViewer(), -) -> None: - """Shows a single image. - - Args: - image (NDArrayNumber): The image to show. - file_path (str): The path to save the image to. - image_mode (str, optional): Image Mode. Defaults to "RGB". - image_viewer (ImageViewerBackend, optional): The Image viewer backend - to use. Defaults to MatplotlibImageViewer(). - """ - image = preprocess_image(image, image_mode) - image_viewer.save_images([image], [file_path]) + if file_path is not None: + image_viewer.save_images([image], [file_path]) def draw_masks( @@ -158,6 +145,7 @@ def imshow_bboxes( image_mode: str = "RGB", box_width: int = 1, image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Shows the bounding boxes overlayed on the given image. @@ -176,6 +164,7 @@ class id to class name box_width (int, optional): Width of the box border. Defaults to 1. image_viewer (ImageViewerBackend, optional): The Image viewer backend to use. Defaults to MatplotlibImageViewer(). + file_path (str): The path to save the image to. Defaults to None. """ image = preprocess_image(image, mode=image_mode) img = draw_bboxes( @@ -189,7 +178,7 @@ class id to class name image_mode, box_width, ) - imshow(img, image_mode, image_viewer) + imshow(img, image_mode, image_viewer, file_path) def draw_bbox3d( @@ -244,6 +233,7 @@ def imshow_bboxes3d( n_colors: int = 50, image_mode: str = "RGB", image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Show image with bounding boxes.""" image = preprocess_image(image, mode=image_mode) @@ -259,7 +249,7 @@ def imshow_bboxes3d( n_colors=n_colors, image_mode=image_mode, ) - imshow(img, image_mode, image_viewer) + imshow(img, image_mode, image_viewer, file_path) def imshow_masks( @@ -270,6 +260,7 @@ def imshow_masks( image_mode: str = "RGB", canvas: CanvasBackend = PillowCanvasBackend(), image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Shows semantic masks overlayed over the given image. @@ -286,11 +277,13 @@ def imshow_masks( Defaults to PillowCanvasBackend(). image_viewer (ImageViewerBackend, optional): The Image viewer backend to use. Defaults to MatplotlibImageViewer(). + file_path (str): The path to save the image to. Defaults to None. """ imshow( draw_masks(image, masks, class_ids, n_colors, image_mode, canvas), image_mode, image_viewer, + file_path, ) @@ -306,6 +299,7 @@ def imshow_topk_bboxes( image_mode: str = "RGB", box_width: int = 1, image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Visualize the 'topk' bounding boxes with highest score. @@ -325,6 +319,7 @@ class id to class name box_width (int, optional): Width of the box border. Defaults to 1. image_viewer (ImageViewerBackend, optional): The Image viewer backend to use. Defaults to MatplotlibImageViewer(). + file_path (str): The path to save the image to. Defaults to None. """ scores = array_to_numpy(scores, n_dims=1, dtype=np.float32) @@ -344,6 +339,7 @@ class id to class name image_mode, box_width, image_viewer, + file_path, ) @@ -356,6 +352,7 @@ def imshow_track_matches( ref_track_ids: list[ArrayLikeInt], image_mode: str = "RGB", image_viewer: ImageViewerBackend = MatplotlibImageViewer(), + file_path: str | None = None, ) -> None: """Visualize paired bounding boxes successively for batched frame pairs. @@ -372,6 +369,7 @@ def imshow_track_matches( image_mode (str, optional): Color mode if the image. Defaults to "RGB". image_viewer (ImageViewerBackend, optional): The Image viewer backend to use. Defaults to MatplotlibImageViewer(). + file_path (str): The path to save the image to. Defaults to None. """ key_imgs_np = arrays_to_numpy(*key_imgs, n_dims=3, dtype=np.float32) ref_imgs_np = arrays_to_numpy(*ref_imgs, n_dims=3, dtype=np.float32) @@ -404,12 +402,14 @@ def imshow_track_matches( key_box[key_i], image_mode=image_mode, image_viewer=image_viewer, + file_path=file_path, ) imshow_bboxes( ref_image, ref_box[ref_i], image_mode=image_mode, image_viewer=image_viewer, + file_path=file_path, ) else: # stack imgs horizontal @@ -420,4 +420,4 @@ def imshow_track_matches( ref_image, ref_box[batch_i], image_mode=image_mode ) stacked_img = np.vstack([k_img, r_img]) - imshow(stacked_img, image_mode, image_viewer) + imshow(stacked_img, image_mode, image_viewer, file_path) diff --git a/vis4d/vis/functional/pointcloud.py b/vis4d/vis/pointcloud/functional.py similarity index 93% rename from vis4d/vis/functional/pointcloud.py rename to vis4d/vis/pointcloud/functional.py index ed4335a6..fec7fd83 100644 --- a/vis4d/vis/functional/pointcloud.py +++ b/vis4d/vis/pointcloud/functional.py @@ -3,12 +3,10 @@ from __future__ import annotations from vis4d.common.typing import ArrayLikeFloat, ArrayLikeInt -from vis4d.vis.pointcloud.scene import Scene3D -from vis4d.vis.pointcloud.viewer import ( - Open3DVisualizationBackend, - PointCloudVisualizerBackend, -) -from vis4d.vis.util import DEFAULT_COLOR_MAPPING + +from ..util import DEFAULT_COLOR_MAPPING +from .scene import Scene3D +from .viewer import Open3DVisualizationBackend, PointCloudVisualizerBackend def show_3d( From 4f490d14d4b4949fc0a6ef8ec2c656d70c37133b Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Wed, 21 Aug 2024 17:33:07 +0200 Subject: [PATCH 07/19] fix: Fix tests. --- docs/source/user_guide/getting_started.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user_guide/getting_started.ipynb b/docs/source/user_guide/getting_started.ipynb index db99ef46..08824478 100644 --- a/docs/source/user_guide/getting_started.ipynb +++ b/docs/source/user_guide/getting_started.ipynb @@ -251,7 +251,7 @@ "from vis4d.model.detect.faster_rcnn import FasterRCNN\n", "\n", "from vis4d.data.const import CommonKeys as K\n", - "from vis4d.vis.functional.image import imshow_bboxes\n", + "from vis4d.vis.image.functional import imshow_bboxes\n", "\n", "from vis4d.config import instantiate_classes\n", "from vis4d.zoo.base.datasets.coco import get_coco_detection_cfg" From 17c4e02e8c8d2c2fa98470c2e9773c29fba5e854 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Wed, 21 Aug 2024 17:53:57 +0200 Subject: [PATCH 08/19] fix: Update ipynb for test. --- docs/source/user_guide/visualization.ipynb | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/docs/source/user_guide/visualization.ipynb b/docs/source/user_guide/visualization.ipynb index fbd5c85e..6c1c285c 100644 --- a/docs/source/user_guide/visualization.ipynb +++ b/docs/source/user_guide/visualization.ipynb @@ -15,17 +15,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", - "[Open3D INFO] WebRTC GUI backend enabled.\n", - "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n" - ] - } - ], + "outputs": [], "source": [ "from __future__ import annotations\n", "\n", @@ -453,7 +443,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.9" }, "vscode": { "interpreter": { From d042e2f680dc68a4270c8a1ed38dddcaa7882a3f Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Thu, 22 Aug 2024 11:34:55 +0200 Subject: [PATCH 09/19] feat: Update docs. --- docs/source/conf.py | 2 +- docs/source/datasets.rst | 3 +++ docs/source/dev_guide/cli.rst | 12 +++++++----- docs/source/faq/trouble.rst | 4 ---- 4 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 docs/source/datasets.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 08f9b4c9..68d0f507 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ project = "Vis4D" copyright = "2022, ETH Zurich" -author = "Tobias Fischer" +author = "Vis4D Team" # -- General configuration --------------------------------------------------- diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst new file mode 100644 index 00000000..3acad4ec --- /dev/null +++ b/docs/source/datasets.rst @@ -0,0 +1,3 @@ +******** +Datasets +******** \ No newline at end of file diff --git a/docs/source/dev_guide/cli.rst b/docs/source/dev_guide/cli.rst index fe62471b..538e3ede 100644 --- a/docs/source/dev_guide/cli.rst +++ b/docs/source/dev_guide/cli.rst @@ -1,10 +1,11 @@ ### CLI ### + We provide a command line interface for training and evaluating your models. Assuming you have installed the package using pip, you can use the command `vis4d` to access the CLI. -Alternatively, you can run the CLI using `python -m vis4d.engine.cli` or `python -m vis4d.pl.cli` if you want to use the PyTorch Lightning version. +Alternatively, you can run the CLI using `python -m vis4d.engine.run` or `python -m vis4d.pl.run` if you want to use the PyTorch Lightning version. The CLI relies on a configuration file to specify each experiment. We use `ml_collections `_ as underlying framework to define the configuration files. You can read up on our configuration files in the `Config System `_ section. @@ -12,6 +13,7 @@ You can read up on our configuration files in the `Config System `_ section. @@ -20,6 +22,7 @@ We support both, our own training engine as well as `PyTorch Lightning Date: Fri, 23 Aug 2024 21:19:02 +0200 Subject: [PATCH 10/19] fix: Separate to hdf5 script. --- tests/data/io/to_hdf5_test.py | 2 +- vis4d/data/io/hdf5.py | 62 ---------------------------- vis4d/data/io/to_hdf5.py | 76 +++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 63 deletions(-) create mode 100644 vis4d/data/io/to_hdf5.py diff --git a/tests/data/io/to_hdf5_test.py b/tests/data/io/to_hdf5_test.py index 5e469746..caf61923 100644 --- a/tests/data/io/to_hdf5_test.py +++ b/tests/data/io/to_hdf5_test.py @@ -4,7 +4,7 @@ import unittest from tests.util import get_test_data -from vis4d.data.io.hdf5 import convert_dataset +from vis4d.data.io.to_hdf5 import convert_dataset class TestHDF5(unittest.TestCase): diff --git a/vis4d/data/io/hdf5.py b/vis4d/data/io/hdf5.py index 15107770..3d6b594e 100644 --- a/vis4d/data/io/hdf5.py +++ b/vis4d/data/io/hdf5.py @@ -6,12 +6,10 @@ from __future__ import annotations -import argparse import os from typing import Literal import numpy as np -from tqdm import tqdm from vis4d.common.imports import H5PY_AVAILABLE @@ -242,63 +240,3 @@ def close(self) -> None: for client, _ in self.db_cache.values(): client.close() self.db_cache.clear() - - -def convert_dataset(source_dir: str) -> None: - """Convert a dataset to HDF5 format. - - This function converts an arbitary dictionary to an HDF5 file. The keys - inside the HDF5 file preserve the directory structure of the original. - - As an example, if you convert "/path/to/dataset" to HDF5, the resulting - file will be: "/path/to/dataset.hdf5". The file "relative/path/to/file" - will be stored at "relative/path/to/file" inside /path/to/dataset.hdf5. - - Args: - source_dir (str): The path to the dataset to convert. - """ - if not os.path.exists(source_dir): - raise FileNotFoundError(f"No such file or directory: {source_dir}") - - source_dir = os.path.join(source_dir, "") # must end with trailing slash - hdf5_path = source_dir.rstrip("/") + ".hdf5" - if os.path.exists(hdf5_path): - print(f"File {hdf5_path} already exists! Skipping {source_dir}") - return - - print(f"Converting dataset at: {source_dir}") - hdf5_file = h5py.File(hdf5_path, mode="w") - sub_dirs = list(os.walk(source_dir)) - file_count = sum(len(files) for (_, _, files) in sub_dirs) - - with tqdm(total=file_count) as pbar: - for root, _, files in sub_dirs: - g_name = root.replace(source_dir, "") - g = hdf5_file.create_group(g_name) if g_name else hdf5_file - for f in files: - filepath = os.path.join(root, f) - if os.path.isfile(filepath): - with open(filepath, "rb") as fp: - file_content = fp.read() - g.create_dataset( - f, data=np.frombuffer(file_content, dtype="uint8") - ) - pbar.update() - - hdf5_file.close() - print("done.") - - -if __name__ == "__main__": # pragma: no cover - parser = argparse.ArgumentParser( - description="Converts a dataset at the specified path to hdf5. The " - "local directory structure is preserved in the hdf5 file." - ) - parser.add_argument( - "-p", - "--path", - required=True, - help="path to the root folder of a specific dataset to convert", - ) - args = parser.parse_args() - convert_dataset(args.path) diff --git a/vis4d/data/io/to_hdf5.py b/vis4d/data/io/to_hdf5.py new file mode 100644 index 00000000..4a2161a5 --- /dev/null +++ b/vis4d/data/io/to_hdf5.py @@ -0,0 +1,76 @@ +"""Script to convert a dataset to hdf5 format.""" + +from __future__ import annotations + +import argparse +import os + +import numpy as np +from tqdm import tqdm + +from vis4d.common.imports import H5PY_AVAILABLE + +if H5PY_AVAILABLE: + import h5py +else: + raise ImportError("Please install h5py to enable HDF5Backend.") + + +def convert_dataset(source_dir: str) -> None: + """Convert a dataset to HDF5 format. + + This function converts an arbitary dictionary to an HDF5 file. The keys + inside the HDF5 file preserve the directory structure of the original. + + As an example, if you convert "/path/to/dataset" to HDF5, the resulting + file will be: "/path/to/dataset.hdf5". The file "relative/path/to/file" + will be stored at "relative/path/to/file" inside /path/to/dataset.hdf5. + + Args: + source_dir (str): The path to the dataset to convert. + """ + if not os.path.exists(source_dir): + raise FileNotFoundError(f"No such file or directory: {source_dir}") + + source_dir = os.path.join(source_dir, "") # must end with trailing slash + hdf5_path = source_dir.rstrip("/") + ".hdf5" + if os.path.exists(hdf5_path): + print(f"File {hdf5_path} already exists! Skipping {source_dir}") + return + + print(f"Converting dataset at: {source_dir}") + hdf5_file = h5py.File(hdf5_path, mode="w") + sub_dirs = list(os.walk(source_dir)) + file_count = sum(len(files) for (_, _, files) in sub_dirs) + + with tqdm(total=file_count) as pbar: + for root, _, files in sub_dirs: + g_name = root.replace(source_dir, "") + g = hdf5_file.create_group(g_name) if g_name else hdf5_file + for f in files: + filepath = os.path.join(root, f) + if os.path.isfile(filepath): + with open(filepath, "rb") as fp: + file_content = fp.read() + g.create_dataset( + f, data=np.frombuffer(file_content, dtype="uint8") + ) + pbar.update() + + hdf5_file.close() + print("done.") + + +if __name__ == "__main__": # pragma: no cover + parser = argparse.ArgumentParser( + description="Converts a dataset at the specified path to hdf5. The " + "local directory structure is preserved in the hdf5 file." + ) + parser.add_argument( + "-p", + "--path", + required=True, + help="path to the root folder of a specific dataset to convert", + ) + args = parser.parse_args() + convert_dataset(args.path) From 539b8a9d3bf6221b82c69b6f5c22b1e4d657e2c6 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Tue, 27 Aug 2024 12:18:23 +0200 Subject: [PATCH 11/19] feat: Give user freedom to select dist tmp dir. --- vis4d/common/distributed.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vis4d/common/distributed.py b/vis4d/common/distributed.py index 8ba797b8..a87d96c8 100644 --- a/vis4d/common/distributed.py +++ b/vis4d/common/distributed.py @@ -264,7 +264,7 @@ def all_gather_object_gpu( # type: ignore def create_tmpdir( - rank: int, tmpdir: None | str = None + rank: int, tmpdir: None | str = None, use_system_tmp: bool = True ) -> str: # pragma: no cover """Create and distribute a temporary directory across all processes.""" if tmpdir is not None: @@ -273,10 +273,10 @@ def create_tmpdir( if rank == 0: # create a temporary directory default_tmpdir = tempfile.gettempdir() - if default_tmpdir is not None: + if default_tmpdir is not None and use_system_tmp: dist_tmpdir = os.path.join(default_tmpdir, ".dist_tmp") else: - dist_tmpdir = ".dist_tmp" + dist_tmpdir = os.path.join("vis4d-workspace", ".dist_tmp") os.makedirs(dist_tmpdir, exist_ok=True) tmpdir = tempfile.mkdtemp(dir=dist_tmpdir) else: @@ -288,6 +288,7 @@ def all_gather_object_cpu( # type: ignore data: Any, tmpdir: None | str = None, rank_zero_return_only: bool = True, + use_system_tmp: bool = True, ) -> list[Any] | None: # pragma: no cover """Share arbitrary picklable data via file system caching. @@ -304,7 +305,7 @@ def all_gather_object_cpu( # type: ignore return [data] # make tmp dir - tmpdir = create_tmpdir(rank, tmpdir) + tmpdir = create_tmpdir(rank, tmpdir, use_system_tmp) # encode & save with open(os.path.join(tmpdir, f"part_{rank}.pkl"), "wb") as f: From 8b0055fc5f3afb5d338fe525d836a46248a70474 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Tue, 27 Aug 2024 17:44:15 +0200 Subject: [PATCH 12/19] fix: Fix pylint. --- vis4d/common/distributed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vis4d/common/distributed.py b/vis4d/common/distributed.py index a87d96c8..36acaa19 100644 --- a/vis4d/common/distributed.py +++ b/vis4d/common/distributed.py @@ -295,7 +295,8 @@ def all_gather_object_cpu( # type: ignore Args: data: any picklable object. tmpdir: Save path for temporary files. If None, safely create tmpdir. - rank_zero_return_only: if results should only be returned on rank 0 + rank_zero_return_only: if results should only be returned on rank 0. + use_system_tmp: if use system tmpdir or not. Returns: list[Any]: list of data gathered from each process. From 68c99ce7d0750dfab8d0b25fdd97e9515b72e00a Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Fri, 30 Aug 2024 21:32:00 +0200 Subject: [PATCH 13/19] feat: Add pl trainer ddp timeout and separate evaluator callback metircs. --- vis4d/engine/callbacks/evaluator.py | 57 ++++++++++++++++++----------- vis4d/eval/coco/detect.py | 2 - vis4d/pl/trainer.py | 6 ++- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/vis4d/engine/callbacks/evaluator.py b/vis4d/engine/callbacks/evaluator.py index 989b005c..b7b34bc4 100644 --- a/vis4d/engine/callbacks/evaluator.py +++ b/vis4d/engine/callbacks/evaluator.py @@ -90,13 +90,26 @@ def on_test_epoch_end( self.evaluator.gather(all_gather_object_cpu) synchronize() - log_dict = self.evaluate() - log_dict = broadcast(log_dict) + self.process() + + log_dict: MetricLogs = {} + for metric in self.metrics_to_eval: + metric_dict = self.evaluate(metric) + metric_dict = broadcast(metric_dict) + assert isinstance(metric_dict, dict) + log_dict.update(metric_dict) + self.evaluator.reset() + return log_dict @rank_zero_only - def evaluate(self) -> MetricLogs: + def process(self) -> None: + """Process the evaluator.""" + self.evaluator.process() + + @rank_zero_only + def evaluate(self, metric: str) -> MetricLogs: """Evaluate the performance after processing all input/output pairs. Returns: @@ -104,26 +117,26 @@ def evaluate(self) -> MetricLogs: keys are formatted as {metric_name}/{key_name}, and the values are the corresponding evaluated values. """ - rank_zero_info("Running evaluator %s...", str(self.evaluator)) - self.evaluator.process() - + rank_zero_info( + f"Running evaluator {str(self.evaluator)} with {metric} metric... ", + ) log_dict = {} - for metric in self.metrics_to_eval: - # Save output predictions. This is done here instead of - # on_test_batch_end because the evaluator may not have processed - # all batches yet. - if self.save_predictions: - output_dir = os.path.join(self.output_dir, metric) - self.evaluator.save(metric, output_dir) - - # Evaluate metric - metric_dict, metric_str = self.evaluator.evaluate(metric) - for k, v in metric_dict.items(): - log_k = metric + "/" + k - rank_zero_info("%s: %.4f", log_k, v) - log_dict[f"{metric}/{k}"] = v - rank_zero_info("Showing results for metric: %s", metric) - rank_zero_info(metric_str) + # Save output predictions. This is done here instead of + # on_test_batch_end because the evaluator may not have processed + # all batches yet. + if self.save_predictions: + output_dir = os.path.join(self.output_dir, metric) + self.evaluator.save(metric, output_dir) + + # Evaluate metric + metric_dict, metric_str = self.evaluator.evaluate(metric) + for k, v in metric_dict.items(): + log_k = metric + "/" + k + rank_zero_info("%s: %.4f", log_k, v) + log_dict[f"{metric}/{k}"] = v + + rank_zero_info("Showing results for metric: %s", metric) + rank_zero_info(metric_str) return log_dict diff --git a/vis4d/eval/coco/detect.py b/vis4d/eval/coco/detect.py index 0acc4265..bf81db2f 100644 --- a/vis4d/eval/coco/detect.py +++ b/vis4d/eval/coco/detect.py @@ -131,7 +131,6 @@ def __init__( coco_gt_cats = self._coco_gt.loadCats(self._coco_gt.getCatIds()) self.cat_map = {c["name"]: c["id"] for c in coco_gt_cats} self._predictions: list[DictStrAny] = [] - self.coco_dt: COCO | None = None @property def metrics(self) -> list[str]: @@ -151,7 +150,6 @@ def gather(self, gather_func: GenericFunc) -> None: def reset(self) -> None: """Reset the saved predictions to start new round of evaluation.""" self._predictions = [] - self.coco_dt = None def process_batch( # type: ignore # pylint: disable=arguments-differ self, diff --git a/vis4d/pl/trainer.py b/vis4d/pl/trainer.py index 53fc8115..7d3db479 100644 --- a/vis4d/pl/trainer.py +++ b/vis4d/pl/trainer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import datetime import os.path as osp from lightning.pytorch import Callback, Trainer @@ -31,6 +32,7 @@ def __init__( checkpoint_callback: ModelCheckpoint | None = None, wandb: bool = False, seed: int = -1, + timeout: int = 3600, **kwargs: ArgsType, ) -> None: """Perform some basic common setups at the beginning of a job. @@ -54,6 +56,7 @@ def __init__( seed (int, optional): The integer value seed for global random state. Defaults to -1. If -1, a random seed will be generated. This will be set by TrainingModule. + timeout: Timeout (seconds) for DDP connection. Default: 3600. """ self.work_dir = work_dir self.exp_name = exp_name @@ -126,7 +129,8 @@ def __init__( elif kwargs["devices"] > 1: # pragma: no cover if kwargs["accelerator"] == "gpu": ddp_plugin = DDPStrategy( - find_unused_parameters=find_unused_parameters + find_unused_parameters=find_unused_parameters, + timeout=datetime.timedelta(timeout), ) kwargs["strategy"] = ddp_plugin From 4ca57f69cbd17e5eaaf8b3dbf23ebc6d8fc08966 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Fri, 30 Aug 2024 21:45:55 +0200 Subject: [PATCH 14/19] fix: Fix lint. --- vis4d/engine/callbacks/evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vis4d/engine/callbacks/evaluator.py b/vis4d/engine/callbacks/evaluator.py index b7b34bc4..c536e466 100644 --- a/vis4d/engine/callbacks/evaluator.py +++ b/vis4d/engine/callbacks/evaluator.py @@ -118,7 +118,7 @@ def evaluate(self, metric: str) -> MetricLogs: values are the corresponding evaluated values. """ rank_zero_info( - f"Running evaluator {str(self.evaluator)} with {metric} metric... ", + f"Running evaluator {str(self.evaluator)} with {metric} metric... " ) log_dict = {} From ae4523a00a656dfd705e41b7deb2d6f4c0b4dd40 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Mon, 2 Sep 2024 16:27:47 +0200 Subject: [PATCH 15/19] fix: Fix bbox3d visualizer for corner cases. Update Visualizer callback to support multiple visualizers. --- vis4d/engine/callbacks/visualizer.py | 17 ++++++-- vis4d/vis/image/bbox3d_visualizer.py | 62 +++++++++++++++------------- vis4d/vis/image/util.py | 22 +++++++--- 3 files changed, 63 insertions(+), 38 deletions(-) diff --git a/vis4d/engine/callbacks/visualizer.py b/vis4d/engine/callbacks/visualizer.py index 8b12159a..d8003112 100644 --- a/vis4d/engine/callbacks/visualizer.py +++ b/vis4d/engine/callbacks/visualizer.py @@ -27,6 +27,7 @@ def __init__( show: bool = False, save_to_disk: bool = True, save_prefix: str | None = None, + output_dir: str | None = None, **kwargs: ArgsType, ) -> None: """Init callback. @@ -35,11 +36,13 @@ def __init__( visualizer (Visualizer): Visualizer. visualize_train (bool): If the training data should be visualized. Defaults to False. - save_prefix (str): Output directory for saving the visualizations. show (bool): If the visualizations should be shown. Defaults to False. save_to_disk (bool): If the visualizations should be saved to disk. Defaults to True. + save_prefix (str): Output directory prefix for distinguish + different visualizations. + output_dir (str): Output directory for saving the visualizations. """ super().__init__(*args, **kwargs) self.visualizer = visualizer @@ -50,9 +53,15 @@ def __init__( if self.save_to_disk: assert ( - save_prefix is not None - ), "If save_to_disk is True, save_prefix must be provided." - self.output_dir = f"{self.save_prefix}/vis" + output_dir is not None + ), "If save_to_disk is True, output_dir must be provided." + + output_dir = os.path.join(output_dir, "vis") + + if save_prefix is not None: + output_dir = os.path.join(output_dir, save_prefix) + + self.output_dir = output_dir def setup(self) -> None: # pragma: no cover """Setup callback.""" diff --git a/vis4d/vis/image/bbox3d_visualizer.py b/vis4d/vis/image/bbox3d_visualizer.py index 74f4a7f3..8020fdc3 100644 --- a/vis4d/vis/image/bbox3d_visualizer.py +++ b/vis4d/vis/image/bbox3d_visualizer.py @@ -230,30 +230,32 @@ def process_single_image( [], ) - for center, corners, label, color, track_id in zip( - *preprocess_boxes3d( - image_hw, - boxes3d, - intrinsics, - extrinsics, - scores, - class_ids, - track_ids, - self.color_palette, - self.class_id_mapping, - axis_mode=self.axis_mode, - ) - ): - data_sample.boxes.append( - DetectionBox3D( - corners=corners, - label=label, - color=color, - track_id=track_id, + if len(boxes3d) != 0: + for center, corners, label, color, track_id in zip( + *preprocess_boxes3d( + image_hw, + boxes3d, + intrinsics, + extrinsics, + scores, + class_ids, + track_ids, + self.color_palette, + self.class_id_mapping, + axis_mode=self.axis_mode, ) - ) - if track_id is not None: - self.trajectories[track_id].append(center) + ): + data_sample.boxes.append( + DetectionBox3D( + corners=corners, + label=label, + color=color, + track_id=track_id, + ) + ) + if track_id is not None: + self.trajectories[track_id].append(center) + self._samples.append(data_sample) def show(self, cur_iter: int, blocking: bool = True) -> None: @@ -279,9 +281,13 @@ def _draw_image(self, sample: DataSample) -> NDArrayUI8: """ self.canvas.create_canvas(sample.image) - global_to_cam = inverse_rigid_transform( - torch.from_numpy(sample.extrinsics) - ).numpy() + if self.plot_trajectory: + assert ( + sample.extrinsics is not None + ), "Extrinsics is needed to plot trajectory." + global_to_cam = inverse_rigid_transform( + torch.from_numpy(sample.extrinsics) + ).numpy() for box in sample.boxes: self.canvas.draw_box_3d( @@ -300,8 +306,8 @@ def _draw_image(self, sample: DataSample) -> NDArrayUI8: if self.plot_trajectory: assert ( - sample.extrinsics is not None and box.track_id is not None - ), "Extrinsics and track id must be set to plot trajectory." + box.track_id is not None + ), "track id must be set to plot trajectory." trajectory = self.trajectories[box.track_id] for center in trajectory: diff --git a/vis4d/vis/image/util.py b/vis4d/vis/image/util.py index c2a20afb..e57c5ae0 100644 --- a/vis4d/vis/image/util.py +++ b/vis4d/vis/image/util.py @@ -223,18 +223,28 @@ def preprocess_boxes3d( class_ids_np = array_to_numpy(class_ids, n_dims=1, dtype=np.int32) track_ids_np = array_to_numpy(track_ids, n_dims=1, dtype=np.int32) - boxes3d_np = boxes3d_np[mask] - corners_np = corners_np[mask] - scores_np = scores_np[mask] if scores_np is not None else None - class_ids_np = class_ids_np[mask] if class_ids_np is not None else None - track_ids_np = track_ids_np[mask] if track_ids_np is not None else None - centers_proc: list[tuple[float, float, float]] = [] corners_proc: list[list[tuple[float, float, float]]] = [] colors_proc: list[tuple[int, int, int]] = [] labels_proc: list[str] = [] track_ids_proc: list[int] = [] + if len(mask) == 1: + if not mask[0]: + return ( + centers_proc, + corners_proc, + labels_proc, + colors_proc, + track_ids_proc, + ) + else: + boxes3d_np = boxes3d_np[mask] + corners_np = corners_np[mask] + scores_np = scores_np[mask] if scores_np is not None else None + class_ids_np = class_ids_np[mask] if class_ids_np is not None else None + track_ids_np = track_ids_np[mask] if track_ids_np is not None else None + for idx in range(corners_np.shape[0]): class_id = None if class_ids_np is None else class_ids_np[idx].item() score = None if scores_np is None else scores_np[idx].item() From d95fb728185f8311695335534157db18580ecf89 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Sun, 13 Oct 2024 22:32:18 +0200 Subject: [PATCH 16/19] feat: Add grad checking and set wandb id. --- pyproject.toml | 1 + vis4d/pl/trainer.py | 1 + vis4d/pl/training_module.py | 41 ++++++++++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9a06910a..22fd40ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ line_length = 79 [tool.pyright] include = ["vis4d"] +typeCheckingMode = "off" [tool.coverage] [tool.coverage.report] diff --git a/vis4d/pl/trainer.py b/vis4d/pl/trainer.py index 7d3db479..b3e067ec 100644 --- a/vis4d/pl/trainer.py +++ b/vis4d/pl/trainer.py @@ -75,6 +75,7 @@ def __init__( save_dir=work_dir, project=exp_name, name=version, + id=version, ) elif TENSORBOARD_AVAILABLE: exp_logger = TensorBoardLogger( diff --git a/vis4d/pl/training_module.py b/vis4d/pl/training_module.py index 8cc78153..48c6f7b6 100644 --- a/vis4d/pl/training_module.py +++ b/vis4d/pl/training_module.py @@ -6,14 +6,16 @@ import lightning.pytorch as pl from lightning.pytorch import seed_everything +from lightning.pytorch.core.optimizer import LightningOptimizer from ml_collections import ConfigDict from torch import nn +from torch.optim.optimizer import Optimizer from vis4d.common.ckpt import load_model_checkpoint from vis4d.common.distributed import broadcast from vis4d.common.imports import FVCORE_AVAILABLE from vis4d.common.logging import rank_zero_info -from vis4d.common.typing import DictStrAny +from vis4d.common.typing import DictStrAny, GenericFunc from vis4d.common.util import init_random_seed from vis4d.config import instantiate_classes from vis4d.config.typing import OptimizerConfig @@ -45,6 +47,7 @@ def __init__( seed: int = -1, ckpt_path: None | str = None, compute_flops: bool = False, + check_the_unused_parameters: bool = False, ) -> None: """Initialize the TrainingModule. @@ -63,6 +66,8 @@ def __init__( Defaults to None. compute_flops (bool, optional): If to compute the FLOPs of the model. Defaults to False. + check_the_unused_parameters (bool, optional): If to check the + unused parameters. Defaults to False. """ super().__init__() self.model_cfg = model_cfg @@ -74,6 +79,7 @@ def __init__( self.seed = seed self.ckpt_path = ckpt_path self.compute_flops = compute_flops + self.check_unused_parameters = check_the_unused_parameters # Create model placeholder self.model: nn.Module @@ -187,3 +193,36 @@ def lr_scheduler_step( # type: ignore # pylint: disable=arguments-differ,line-t """Perform a step on the lr scheduler.""" # TODO: Support metric if needed scheduler.step(self.current_epoch) + + def optimizer_step( + self, + epoch: int, + batch_idx: int, + optimizer: Optimizer | LightningOptimizer, + optimizer_closure: GenericFunc | None = None, + ) -> None: + """Optimizer step. + + Args: + epoch (int): Current epoch. + batch_idx (int): Index of current batch. + optimizer: A PyTorch optimizer + optimizer_closure: The optimizer closure. This closure must be executed as it includes the + calls to ``training_step()``, ``optimizer.zero_grad()``, and ``backward()``. + + Examples:: + + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_closure): + # Add your custom logic to run directly before `optimizer.step()` + + optimizer.step(closure=optimizer_closure) + + # Add your custom logic to run directly after `optimizer.step()` + + """ + if self.check_unused_parameters: + for name, param in self.model.named_parameters(): + if param.grad is None: + rank_zero_info(name) + + optimizer.step(closure=optimizer_closure) From 53f2242570da12a7a9aacac99db690b62882987a Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Tue, 15 Oct 2024 13:55:52 +0200 Subject: [PATCH 17/19] fix: Fix function args. --- vis4d/eval/common/depth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vis4d/eval/common/depth.py b/vis4d/eval/common/depth.py index 02d7db4b..cee434ef 100644 --- a/vis4d/eval/common/depth.py +++ b/vis4d/eval/common/depth.py @@ -100,8 +100,8 @@ def process_batch( # type: ignore # pylint: disable=arguments-differ """Process a batch of data. Args: - prediction (np.array): Prediction optical flow, in shape (H, W, 2). - groundtruth (np.array): Target optical flow, in shape (H, W, 2). + prediction (np.array): Prediction optical flow, in shape (B, H, W). + groundtruth (np.array): Target optical flow, in shape (B, H, W). """ preds = ( array_to_numpy(prediction, n_dims=None, dtype=np.float32) From efd205c5f9bf8b8f7ec934b064d82608bb908680 Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Thu, 28 Nov 2024 14:58:35 +0100 Subject: [PATCH 18/19] feat: Update CBGS, bbox3D visualization. --- vis4d/data/cbgs.py | 4 +- vis4d/vis/image/bbox3d_visualizer.py | 5 + vis4d/vis/image/canvas/pillow_backend.py | 173 +++-------------------- 3 files changed, 28 insertions(+), 154 deletions(-) diff --git a/vis4d/data/cbgs.py b/vis4d/data/cbgs.py index 226b25d0..d087ad54 100644 --- a/vis4d/data/cbgs.py +++ b/vis4d/data/cbgs.py @@ -113,7 +113,9 @@ def _get_sample_indices(self) -> list[int]: sample_indices = [] frac = 1.0 / len(self.cat2id) - ratios = [frac / v for v in class_distribution.values()] + ratios = [ + frac / v if v > 0 else 1 for v in class_distribution.values() + ] for cls_inds, ratio in zip( list(class_sample_indices.values()), ratios ): diff --git a/vis4d/vis/image/bbox3d_visualizer.py b/vis4d/vis/image/bbox3d_visualizer.py index 8020fdc3..89225294 100644 --- a/vis4d/vis/image/bbox3d_visualizer.py +++ b/vis4d/vis/image/bbox3d_visualizer.py @@ -64,6 +64,7 @@ def __init__( image_mode: str = "RGB", width: int = 2, camera_near_clip: float = 0.15, + plot_heading: bool = True, axis_mode: AxisMode = AxisMode.ROS, trajectory_length: int = 10, plot_trajectory: bool = True, @@ -84,6 +85,8 @@ def __init__( width (int): Width of the drawn bounding boxes. Defaults to 2. camera_near_clip (float): Near clipping plane of the camera. Defaults to 0.15. + plot_heading (bool): If the heading should be plotted. Defaults to + True. axis_mode (AxisMode): Axis mode for the input bboxes. Defaults to AxisMode.ROS (i.e. global coordinate). trajectory_length (int): How many past frames should be used to @@ -117,6 +120,7 @@ def __init__( self.width = width self.camera_near_clip = camera_near_clip + self.plot_heading = plot_heading self.canvas = canvas if canvas is not None else PillowCanvasBackend() self.viewer = viewer if viewer is not None else MatplotlibImageViewer() @@ -296,6 +300,7 @@ def _draw_image(self, sample: DataSample) -> NDArrayUI8: sample.intrinsics, self.width, self.camera_near_clip, + self.plot_heading, ) selected_corner = project_point(box.corners[0], sample.intrinsics) diff --git a/vis4d/vis/image/canvas/pillow_backend.py b/vis4d/vis/image/canvas/pillow_backend.py index 05bb0252..8b4da8a2 100644 --- a/vis4d/vis/image/canvas/pillow_backend.py +++ b/vis4d/vis/image/canvas/pillow_backend.py @@ -2,12 +2,9 @@ from __future__ import annotations -import base64 -from io import BytesIO - import numpy as np from PIL import Image, ImageDraw -from PIL.ImageFont import ImageFont +from PIL.ImageFont import ImageFont, load_default from vis4d.common.typing import NDArrayBool, NDArrayF32, NDArrayF64, NDArrayUI8 @@ -18,14 +15,17 @@ class PillowCanvasBackend(CanvasBackend): """Canvas backend using Pillow.""" - def __init__(self, font: ImageFont | None = None) -> None: + def __init__( + self, font: ImageFont | None = None, font_size: int | None = None + ) -> None: """Creates a new canvas backend. Args: font (ImageFont): Pillow font to use for the label. + font_size (int): Font size to use for the label. """ self._image_draw: ImageDraw.ImageDraw | None = None - self._font = font if font is not None else load_default_font() + self._font = font if font is not None else load_default(font_size) self._image: Image.Image | None = None def create_canvas( @@ -282,6 +282,7 @@ def draw_box_3d( intrinsics: NDArrayF32, width: int = 0, camera_near_clip: float = 0.15, + plot_heading: bool = True, ) -> None: """Draws a 3D box onto the given canvas.""" # Draw Front @@ -327,16 +328,19 @@ def draw_box_3d( ) # Draw line indicating the front - center_bottom_forward = np.mean(corners[:2], axis=0, dtype=np.float32) - center_bottom = np.mean(corners[:4], axis=0, dtype=np.float32) - self._draw_box_3d_line( - tuple(center_bottom.tolist()), - tuple(center_bottom_forward.tolist()), - color, - intrinsics, - width, - camera_near_clip, - ) + if plot_heading: + center_bottom_forward = np.mean( + corners[:2], axis=0, dtype=np.float32 + ) + center_bottom = np.mean(corners[:4], axis=0, dtype=np.float32) + self._draw_box_3d_line( + tuple(center_bottom.tolist()), + tuple(center_bottom_forward.tolist()), + color, + intrinsics, + width, + camera_near_clip, + ) def as_numpy_image(self) -> NDArrayUI8: """Returns the current canvas as numpy image. @@ -364,140 +368,3 @@ def save_to_disk(self, image_path: str) -> None: "No Image initialized! Did you call 'create_canvas'?" ) self._image.save(image_path) - - -def load_default_font() -> ImageFont: - """Load a "better than nothing" default font.""" - f = ImageFont() - f._load_pilfont_data( # pylint: disable=protected-access - # courB08 - BytesIO( - base64.b64decode( - b""" -UElMZm9udAo7Ozs7OzsxMDsKREFUQQoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYAAAAA//8AAQAAAAAAAAABAAEA -BgAAAAH/+gADAAAAAQAAAAMABgAGAAAAAf/6AAT//QADAAAABgADAAYAAAAA//kABQABAAYAAAAL -AAgABgAAAAD/+AAFAAEACwAAABAACQAGAAAAAP/5AAUAAAAQAAAAFQAHAAYAAP////oABQAAABUA -AAAbAAYABgAAAAH/+QAE//wAGwAAAB4AAwAGAAAAAf/5AAQAAQAeAAAAIQAIAAYAAAAB//kABAAB -ACEAAAAkAAgABgAAAAD/+QAE//0AJAAAACgABAAGAAAAAP/6AAX//wAoAAAALQAFAAYAAAAB//8A -BAACAC0AAAAwAAMABgAAAAD//AAF//0AMAAAADUAAQAGAAAAAf//AAMAAAA1AAAANwABAAYAAAAB -//kABQABADcAAAA7AAgABgAAAAD/+QAFAAAAOwAAAEAABwAGAAAAAP/5AAYAAABAAAAARgAHAAYA -AAAA//kABQAAAEYAAABLAAcABgAAAAD/+QAFAAAASwAAAFAABwAGAAAAAP/5AAYAAABQAAAAVgAH -AAYAAAAA//kABQAAAFYAAABbAAcABgAAAAD/+QAFAAAAWwAAAGAABwAGAAAAAP/5AAUAAABgAAAA -ZQAHAAYAAAAA//kABQAAAGUAAABqAAcABgAAAAD/+QAFAAAAagAAAG8ABwAGAAAAAf/8AAMAAABv -AAAAcQAEAAYAAAAA//wAAwACAHEAAAB0AAYABgAAAAD/+gAE//8AdAAAAHgABQAGAAAAAP/7AAT/ -/gB4AAAAfAADAAYAAAAB//oABf//AHwAAACAAAUABgAAAAD/+gAFAAAAgAAAAIUABgAGAAAAAP/5 -AAYAAQCFAAAAiwAIAAYAAP////oABgAAAIsAAACSAAYABgAA////+gAFAAAAkgAAAJgABgAGAAAA -AP/6AAUAAACYAAAAnQAGAAYAAP////oABQAAAJ0AAACjAAYABgAA////+gAFAAAAowAAAKkABgAG -AAD////6AAUAAACpAAAArwAGAAYAAAAA//oABQAAAK8AAAC0AAYABgAA////+gAGAAAAtAAAALsA -BgAGAAAAAP/6AAQAAAC7AAAAvwAGAAYAAP////oABQAAAL8AAADFAAYABgAA////+gAGAAAAxQAA -AMwABgAGAAD////6AAUAAADMAAAA0gAGAAYAAP////oABQAAANIAAADYAAYABgAA////+gAGAAAA -2AAAAN8ABgAGAAAAAP/6AAUAAADfAAAA5AAGAAYAAP////oABQAAAOQAAADqAAYABgAAAAD/+gAF -AAEA6gAAAO8ABwAGAAD////6AAYAAADvAAAA9gAGAAYAAAAA//oABQAAAPYAAAD7AAYABgAA//// -+gAFAAAA+wAAAQEABgAGAAD////6AAYAAAEBAAABCAAGAAYAAP////oABgAAAQgAAAEPAAYABgAA -////+gAGAAABDwAAARYABgAGAAAAAP/6AAYAAAEWAAABHAAGAAYAAP////oABgAAARwAAAEjAAYA -BgAAAAD/+gAFAAABIwAAASgABgAGAAAAAf/5AAQAAQEoAAABKwAIAAYAAAAA//kABAABASsAAAEv -AAgABgAAAAH/+QAEAAEBLwAAATIACAAGAAAAAP/5AAX//AEyAAABNwADAAYAAAAAAAEABgACATcA -AAE9AAEABgAAAAH/+QAE//wBPQAAAUAAAwAGAAAAAP/7AAYAAAFAAAABRgAFAAYAAP////kABQAA -AUYAAAFMAAcABgAAAAD/+wAFAAABTAAAAVEABQAGAAAAAP/5AAYAAAFRAAABVwAHAAYAAAAA//sA -BQAAAVcAAAFcAAUABgAAAAD/+QAFAAABXAAAAWEABwAGAAAAAP/7AAYAAgFhAAABZwAHAAYAAP// -//kABQAAAWcAAAFtAAcABgAAAAD/+QAGAAABbQAAAXMABwAGAAAAAP/5AAQAAgFzAAABdwAJAAYA -AP////kABgAAAXcAAAF+AAcABgAAAAD/+QAGAAABfgAAAYQABwAGAAD////7AAUAAAGEAAABigAF -AAYAAP////sABQAAAYoAAAGQAAUABgAAAAD/+wAFAAABkAAAAZUABQAGAAD////7AAUAAgGVAAAB -mwAHAAYAAAAA//sABgACAZsAAAGhAAcABgAAAAD/+wAGAAABoQAAAacABQAGAAAAAP/7AAYAAAGn -AAABrQAFAAYAAAAA//kABgAAAa0AAAGzAAcABgAA////+wAGAAABswAAAboABQAGAAD////7AAUA -AAG6AAABwAAFAAYAAP////sABgAAAcAAAAHHAAUABgAAAAD/+wAGAAABxwAAAc0ABQAGAAD////7 -AAYAAgHNAAAB1AAHAAYAAAAA//sABQAAAdQAAAHZAAUABgAAAAH/+QAFAAEB2QAAAd0ACAAGAAAA -Av/6AAMAAQHdAAAB3gAHAAYAAAAA//kABAABAd4AAAHiAAgABgAAAAD/+wAF//0B4gAAAecAAgAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYAAAAB -//sAAwACAecAAAHpAAcABgAAAAD/+QAFAAEB6QAAAe4ACAAGAAAAAP/5AAYAAAHuAAAB9AAHAAYA -AAAA//oABf//AfQAAAH5AAUABgAAAAD/+QAGAAAB+QAAAf8ABwAGAAAAAv/5AAMAAgH/AAACAAAJ -AAYAAAAA//kABQABAgAAAAIFAAgABgAAAAH/+gAE//sCBQAAAggAAQAGAAAAAP/5AAYAAAIIAAAC -DgAHAAYAAAAB//kABf/+Ag4AAAISAAUABgAA////+wAGAAACEgAAAhkABQAGAAAAAP/7AAX//gIZ -AAACHgADAAYAAAAA//wABf/9Ah4AAAIjAAEABgAAAAD/+QAHAAACIwAAAioABwAGAAAAAP/6AAT/ -+wIqAAACLgABAAYAAAAA//kABP/8Ai4AAAIyAAMABgAAAAD/+gAFAAACMgAAAjcABgAGAAAAAf/5 -AAT//QI3AAACOgAEAAYAAAAB//kABP/9AjoAAAI9AAQABgAAAAL/+QAE//sCPQAAAj8AAgAGAAD/ -///7AAYAAgI/AAACRgAHAAYAAAAA//kABgABAkYAAAJMAAgABgAAAAH//AAD//0CTAAAAk4AAQAG -AAAAAf//AAQAAgJOAAACUQADAAYAAAAB//kABP/9AlEAAAJUAAQABgAAAAH/+QAF//4CVAAAAlgA -BQAGAAD////7AAYAAAJYAAACXwAFAAYAAP////kABgAAAl8AAAJmAAcABgAA////+QAGAAACZgAA -Am0ABwAGAAD////5AAYAAAJtAAACdAAHAAYAAAAA//sABQACAnQAAAJ5AAcABgAA////9wAGAAAC -eQAAAoAACQAGAAD////3AAYAAAKAAAAChwAJAAYAAP////cABgAAAocAAAKOAAkABgAA////9wAG -AAACjgAAApUACQAGAAD////4AAYAAAKVAAACnAAIAAYAAP////cABgAAApwAAAKjAAkABgAA//// -+gAGAAACowAAAqoABgAGAAAAAP/6AAUAAgKqAAACrwAIAAYAAP////cABQAAAq8AAAK1AAkABgAA -////9wAFAAACtQAAArsACQAGAAD////3AAUAAAK7AAACwQAJAAYAAP////gABQAAAsEAAALHAAgA -BgAAAAD/9wAEAAACxwAAAssACQAGAAAAAP/3AAQAAALLAAACzwAJAAYAAAAA//cABAAAAs8AAALT -AAkABgAAAAD/+AAEAAAC0wAAAtcACAAGAAD////6AAUAAALXAAAC3QAGAAYAAP////cABgAAAt0A -AALkAAkABgAAAAD/9wAFAAAC5AAAAukACQAGAAAAAP/3AAUAAALpAAAC7gAJAAYAAAAA//cABQAA -Au4AAALzAAkABgAAAAD/9wAFAAAC8wAAAvgACQAGAAAAAP/4AAUAAAL4AAAC/QAIAAYAAAAA//oA -Bf//Av0AAAMCAAUABgAA////+gAGAAADAgAAAwkABgAGAAD////3AAYAAAMJAAADEAAJAAYAAP// -//cABgAAAxAAAAMXAAkABgAA////9wAGAAADFwAAAx4ACQAGAAD////4AAYAAAAAAAoABwASAAYA -AP////cABgAAAAcACgAOABMABgAA////+gAFAAAADgAKABQAEAAGAAD////6AAYAAAAUAAoAGwAQ -AAYAAAAA//gABgAAABsACgAhABIABgAAAAD/+AAGAAAAIQAKACcAEgAGAAAAAP/4AAYAAAAnAAoA -LQASAAYAAAAA//gABgAAAC0ACgAzABIABgAAAAD/+QAGAAAAMwAKADkAEQAGAAAAAP/3AAYAAAA5 -AAoAPwATAAYAAP////sABQAAAD8ACgBFAA8ABgAAAAD/+wAFAAIARQAKAEoAEQAGAAAAAP/4AAUA -AABKAAoATwASAAYAAAAA//gABQAAAE8ACgBUABIABgAAAAD/+AAFAAAAVAAKAFkAEgAGAAAAAP/5 -AAUAAABZAAoAXgARAAYAAAAA//gABgAAAF4ACgBkABIABgAAAAD/+AAGAAAAZAAKAGoAEgAGAAAA -AP/4AAYAAABqAAoAcAASAAYAAAAA//kABgAAAHAACgB2ABEABgAAAAD/+AAFAAAAdgAKAHsAEgAG -AAD////4AAYAAAB7AAoAggASAAYAAAAA//gABQAAAIIACgCHABIABgAAAAD/+AAFAAAAhwAKAIwA -EgAGAAAAAP/4AAUAAACMAAoAkQASAAYAAAAA//gABQAAAJEACgCWABIABgAAAAD/+QAFAAAAlgAK -AJsAEQAGAAAAAP/6AAX//wCbAAoAoAAPAAYAAAAA//oABQABAKAACgClABEABgAA////+AAGAAAA -pQAKAKwAEgAGAAD////4AAYAAACsAAoAswASAAYAAP////gABgAAALMACgC6ABIABgAA////+QAG -AAAAugAKAMEAEQAGAAD////4AAYAAgDBAAoAyAAUAAYAAP////kABQACAMgACgDOABMABgAA//// -+QAGAAIAzgAKANUAEw== -""" - ) - ), - Image.open( - BytesIO( - base64.b64decode( - b""" -iVBORw0KGgoAAAANSUhEUgAAAx4AAAAUAQAAAAArMtZoAAAEwElEQVR4nABlAJr/AHVE4czCI/4u -Mc4b7vuds/xzjz5/3/7u/n9vMe7vnfH/9++vPn/xyf5zhxzjt8GHw8+2d83u8x27199/nxuQ6Od9 -M43/5z2I+9n9ZtmDBwMQECDRQw/eQIQohJXxpBCNVE6QCCAAAAD//wBlAJr/AgALyj1t/wINwq0g -LeNZUworuN1cjTPIzrTX6ofHWeo3v336qPzfEwRmBnHTtf95/fglZK5N0PDgfRTslpGBvz7LFc4F -IUXBWQGjQ5MGCx34EDFPwXiY4YbYxavpnhHFrk14CDAAAAD//wBlAJr/AgKqRooH2gAgPeggvUAA -Bu2WfgPoAwzRAABAAAAAAACQgLz/3Uv4Gv+gX7BJgDeeGP6AAAD1NMDzKHD7ANWr3loYbxsAD791 -NAADfcoIDyP44K/jv4Y63/Z+t98Ovt+ub4T48LAAAAD//wBlAJr/AuplMlADJAAAAGuAphWpqhMx -in0A/fRvAYBABPgBwBUgABBQ/sYAyv9g0bCHgOLoGAAAAAAAREAAwI7nr0ArYpow7aX8//9LaP/9 -SjdavWA8ePHeBIKB//81/83ndznOaXx379wAAAD//wBlAJr/AqDxW+D3AABAAbUh/QMnbQag/gAY -AYDAAACgtgD/gOqAAAB5IA/8AAAk+n9w0AAA8AAAmFRJuPo27ciC0cD5oeW4E7KA/wD3ECMAn2tt -y8PgwH8AfAxFzC0JzeAMtratAsC/ffwAAAD//wBlAJr/BGKAyCAA4AAAAvgeYTAwHd1kmQF5chkG -ABoMIHcL5xVpTfQbUqzlAAAErwAQBgAAEOClA5D9il08AEh/tUzdCBsXkbgACED+woQg8Si9VeqY -lODCn7lmF6NhnAEYgAAA/NMIAAAAAAD//2JgjLZgVGBg5Pv/Tvpc8hwGBjYGJADjHDrAwPzAjv/H -/Wf3PzCwtzcwHmBgYGcwbZz8wHaCAQMDOwMDQ8MCBgYOC3W7mp+f0w+wHOYxO3OG+e376hsMZjk3 -AAAAAP//YmCMY2A4wMAIN5e5gQETPD6AZisDAwMDgzSDAAPjByiHcQMDAwMDg1nOze1lByRu5/47 -c4859311AYNZzg0AAAAA//9iYGDBYihOIIMuwIjGL39/fwffA8b//xv/P2BPtzzHwCBjUQAAAAD/ -/yLFBrIBAAAA//9i1HhcwdhizX7u8NZNzyLbvT97bfrMf/QHI8evOwcSqGUJAAAA//9iYBB81iSw -pEE170Qrg5MIYydHqwdDQRMrAwcVrQAAAAD//2J4x7j9AAMDn8Q/BgYLBoaiAwwMjPdvMDBYM1Tv -oJodAAAAAP//Yqo/83+dxePWlxl3npsel9lvLfPcqlE9725C+acfVLMEAAAA//9i+s9gwCoaaGMR -evta/58PTEWzr21hufPjA8N+qlnBwAAAAAD//2JiWLci5v1+HmFXDqcnULE/MxgYGBj+f6CaJQAA -AAD//2Ji2FrkY3iYpYC5qDeGgeEMAwPDvwQBBoYvcTwOVLMEAAAA//9isDBgkP///0EOg9z35v// -Gc/eeW7BwPj5+QGZhANUswMAAAD//2JgqGBgYGBgqEMXlvhMPUsAAAAA//8iYDd1AAAAAP//AwDR -w7IkEbzhVQAAAABJRU5ErkJggg== -""" - ) - ) - ), - ) - return f From d8751fe8ae4f805a83a6a02175523db282177aaa Mon Sep 17 00:00:00 2001 From: RoyYang0714 Date: Mon, 2 Dec 2024 18:22:08 +0100 Subject: [PATCH 19/19] feat: Add AspectRationBatchSampler. --- vis4d/data/loader.py | 14 ++++++-- vis4d/data/samplers.py | 68 ++++++++++++++++++++++++++++++++++++ vis4d/zoo/base/dataloader.py | 2 ++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/vis4d/data/loader.py b/vis4d/data/loader.py index 482697e5..dc3c4380 100644 --- a/vis4d/data/loader.py +++ b/vis4d/data/loader.py @@ -16,7 +16,7 @@ from .const import CommonKeys as K from .data_pipe import DataPipe from .datasets import VideoDataset -from .samplers import VideoInferenceSampler +from .samplers import AspectRatioBatchSampler, VideoInferenceSampler from .transforms import compose from .transforms.to_tensor import ToTensor from .typing import DictData, DictDataOrList @@ -123,6 +123,7 @@ def build_train_dataloader( pin_memory: bool = True, shuffle: bool = True, seed: int | None = None, + aspect_ratio_grouping: bool = False, disable_subprocess_warning: bool = False, ) -> DataLoader[DictDataOrList]: """Build training dataloader.""" @@ -169,6 +170,14 @@ def _worker_init_fn(worker_id: int) -> None: sampler = DistributedSampler(dataset, shuffle=shuffle) shuffle = False + batch_sampler = None + if aspect_ratio_grouping: + batch_sampler = AspectRatioBatchSampler( + sampler, batch_size=samples_per_gpu + ) + samples_per_gpu = 1 + shuffle = None + dataloader = DataLoader( dataset, batch_size=samples_per_gpu, @@ -176,7 +185,8 @@ def _worker_init_fn(worker_id: int) -> None: collate_fn=( _collate_fn_multi if dataset.has_reference else _collate_fn_single ), - sampler=sampler, + sampler=sampler if not aspect_ratio_grouping else None, + batch_sampler=batch_sampler, worker_init_fn=_worker_init_fn, persistent_workers=workers_per_gpu > 0, pin_memory=pin_memory, diff --git a/vis4d/data/samplers.py b/vis4d/data/samplers.py index ae3d00a1..3821b9e9 100644 --- a/vis4d/data/samplers.py +++ b/vis4d/data/samplers.py @@ -7,6 +7,9 @@ import numpy as np from torch.utils.data import Dataset from torch.utils.data.distributed import DistributedSampler +from torch.utils.data.sampler import BatchSampler, Sampler + +from vis4d.data.const import CommonKeys as K from .datasets.base import VideoDataset from .typing import DictDataOrList @@ -76,3 +79,68 @@ def __iter__(self) -> Iterator[list[int]]: def __len__(self) -> int: """Return length of sampler instance.""" return len(self._local_idcs) + + +class AspectRatioBatchSampler(BatchSampler): + """A sampler wrapper for grouping images with similar aspect ratio. + + Moidified from: + https://github.com/open-mmlab/mmdetection/blob/main/mmdet/datasets/samplers/batch_sampler.py + + Args: + sampler (Sampler): Base sampler. + batch_size (int): Size of mini-batch. + drop_last (bool): If ``True``, the sampler will drop the last batch if + its size would be less than ``batch_size``. + """ + + def __init__( + self, sampler: Sampler, batch_size: int, drop_last: bool = False + ) -> None: + if not isinstance(sampler, Sampler): + raise TypeError( + "sampler should be an instance of ``Sampler``, " + f"but got {sampler}" + ) + if not isinstance(batch_size, int) or batch_size <= 0: + raise ValueError( + "batch_size should be a positive integer value, " + f"but got batch_size={batch_size}" + ) + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + # two groups for w < h and w >= h + self._aspect_ratio_buckets = [[] for _ in range(2)] + + def __iter__(self): + for idx in self.sampler: + data_dict = self.sampler.dataset[idx] + height, width = data_dict[K.input_hw] + bucket_id = 0 if width < height else 1 + bucket = self._aspect_ratio_buckets[bucket_id] + bucket.append(idx) + # yield a batch of indices in the same aspect ratio group + if len(bucket) == self.batch_size: + yield bucket[:] + del bucket[:] + + # yield the rest data and reset the bucket + left_data = ( + self._aspect_ratio_buckets[0] + self._aspect_ratio_buckets[1] + ) + self._aspect_ratio_buckets = [[] for _ in range(2)] + while len(left_data) > 0: + if len(left_data) <= self.batch_size: + if not self.drop_last: + yield left_data[:] + left_data = [] + else: + yield left_data[: self.batch_size] + left_data = left_data[self.batch_size :] + + def __len__(self) -> int: + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size diff --git a/vis4d/zoo/base/dataloader.py b/vis4d/zoo/base/dataloader.py index 6dcb70bd..0998852d 100644 --- a/vis4d/zoo/base/dataloader.py +++ b/vis4d/zoo/base/dataloader.py @@ -32,6 +32,7 @@ def get_train_dataloader_cfg( sensors: Sequence[str] | None = None, pin_memory: bool | FieldReference = True, shuffle: bool | FieldReference = True, + aspect_ratio_grouping: bool | FieldReference = False, ) -> ConfigDict: """Creates dataloader configuration given dataset and preprocessing. @@ -84,6 +85,7 @@ def get_train_dataloader_cfg( sensors=sensors, pin_memory=pin_memory, shuffle=shuffle, + aspect_ratio_grouping=aspect_ratio_grouping, )