From 4d1ea646cb1e8da633d4e5933ebdc1ec536af01a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Sep 2023 09:29:09 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .vscode/settings.json | 2 +- configs/code_snapshot/dsgen_deps.yaml | 1 - configs/code_snapshot/happypose.yaml | 4 +- configs/dsgen/default.yaml | 2 +- configs/dsgen/fastrun.yaml | 2 +- configs/dsgen/gso_1M.yaml | 2 +- configs/job_env/happypose.yaml | 2 +- configs/job_env/jz_yann.yaml | 2 +- configs/job_env/lda.yaml | 2 +- configs/local_job/single_gpu.yaml | 2 +- configs/local_node/lda.yaml | 2 +- configs/run_ds_postproc/default.yaml | 4 +- configs/run_dsgen/default.yaml | 4 +- configs/run_dsgen/fastrun.yaml | 4 +- configs/run_dsgen/gso_1M.yaml | 2 +- configs/run_dsgen/shapenet_1M.yaml | 2 +- configs/runner/yann_sgpu.yaml | 4 +- configs/slurm_job/jz.yaml | 2 +- configs/slurm_job/single_gpu.yaml | 2 +- configs/slurm_job/yann.yaml | 2 +- configs/slurm_queue/gpu_p2.yaml | 2 +- configs/slurm_queue/v100.yaml | 2 +- configs/snapshot_ignore.txt | 2 +- environment.yml | 24 +- experiments/generate_dataset.py | 21 +- experiments/job-runner/job_runner/configs.py | 1 + experiments/job-runner/job_runner/utils.py | 13 +- experiments/job-runner/setup.py | 5 +- experiments/make_gso_ids.py | 5 +- experiments/make_shapenet_ids.py | 26 +- experiments/postprocess_dataset.py | 27 +- happypose/pose_estimators/megapose/CLA | 2 +- happypose/pose_estimators/megapose/LICENSE | 2 +- happypose/pose_estimators/megapose/README.md | 18 +- .../megapose/conda/environment.yaml | 2 +- .../megapose/conda/environment_full.yaml | 22 +- .../megapose/docker/Dockerfile.megapose | 6 +- .../license_files/python_license_header.txt | 1 - .../pose_estimators/megapose/pyproject.toml | 37 +- .../pose_estimators/megapose/rclone.conf | 3 +- .../megapose/src/megapose/__init__.py | 15 +- .../megapose/src/megapose/bop_config.py | 345 +++++++++--------- .../megapose/src/megapose/config.py | 21 +- .../src/megapose/datasets/__init__.py | 4 +- .../src/megapose/evaluation/__init__.py | 4 +- .../megapose/src/megapose/evaluation/bop.py | 63 ++-- .../src/megapose/evaluation/data_utils.py | 23 +- .../src/megapose/evaluation/eval_config.py | 11 +- .../src/megapose/evaluation/evaluation.py | 51 ++- .../megapose/evaluation/evaluation_runner.py | 39 +- .../megapose/evaluation/meters/__init__.py | 4 +- .../src/megapose/evaluation/meters/base.py | 4 +- .../megapose/evaluation/meters/lf_utils.py | 19 +- .../evaluation/meters/modelnet_meters.py | 19 +- .../src/megapose/evaluation/meters/utils.py | 34 +- .../megapose/evaluation/prediction_runner.py | 48 ++- .../src/megapose/evaluation/runner_utils.py | 28 +- .../megapose/src/megapose/evaluation/utils.py | 71 ++-- .../src/megapose/inference/__init__.py | 4 +- .../src/megapose/inference/depth_refiner.py | 4 +- .../src/megapose/inference/detector.py | 38 +- .../src/megapose/inference/icp_refiner.py | 74 ++-- .../src/megapose/inference/pose_estimator.py | 118 +++--- .../src/megapose/inference/refiner_utils.py | 12 +- .../megapose/inference/teaserpp_refiner.py | 41 ++- .../megapose/src/megapose/inference/types.py | 36 +- .../megapose/src/megapose/lib3d/__init__.py | 4 +- .../megapose/src/megapose/models/mask_rcnn.py | 4 +- .../src/megapose/models/pose_rigid.py | 118 +++--- .../megapose/src/megapose/models/resnet.py | 41 +-- .../src/megapose/models/torchvision_resnet.py | 94 +++-- .../src/megapose/models/wide_resnet.py | 30 +- .../megapose/src/megapose/scripts/__init__.py | 4 +- .../src/megapose/scripts/bop_calc_gt_info.py | 138 ++++--- .../src/megapose/scripts/bop_calc_masks.py | 125 ++++--- .../src/megapose/scripts/distributed.py | 7 +- .../megapose/src/megapose/scripts/download.py | 62 ++-- .../megapose/scripts/generate_shapenet_pbr.py | 159 ++++---- .../src/megapose/scripts/make_gso_meshes.py | 29 +- .../src/megapose/scripts/make_gso_subsets.py | 151 ++++---- .../megapose/scripts/make_shapenet_panda3d.py | 29 +- .../scripts/make_shapenet_ply_scaled.py | 16 +- .../scripts/make_shapenet_pointclouds.py | 16 +- .../scripts/make_shapenet_statistics.py | 33 +- .../megapose/scripts/make_shapenet_subsets.py | 192 +++++----- .../scripts/run_full_megapose_eval.py | 51 ++- .../scripts/run_inference_on_example.py | 44 ++- .../megapose/scripts/run_megapose_training.py | 42 ++- .../src/megapose/scripts/test_distributed.py | 4 +- .../megapose/src/megapose/tests/__init__.py | 4 +- .../src/megapose/training/__init__.py | 4 +- .../megapose/training/detector_models_cfg.py | 11 +- .../training/megapose_forward_loss.py | 92 +++-- .../src/megapose/training/pose_models_cfg.py | 30 +- .../src/megapose/training/train_megapose.py | 134 ++++--- .../src/megapose/training/training_config.py | 5 +- .../megapose/src/megapose/training/utils.py | 51 ++- .../megapose/src/megapose/utils/__init__.py | 4 +- happypose/toolbox/datasets/augmentations.py | 81 ++-- .../toolbox/datasets/bop_object_datasets.py | 10 +- .../toolbox/datasets/bop_scene_dataset.py | 25 +- happypose/toolbox/datasets/datasets_cfg.py | 190 ++++++---- happypose/toolbox/datasets/deepim_modelnet.py | 42 ++- happypose/toolbox/datasets/gso_dataset.py | 7 +- .../datasets/modelnet_object_dataset.py | 14 +- happypose/toolbox/datasets/object_dataset.py | 30 +- happypose/toolbox/datasets/pickle_dataset.py | 12 +- happypose/toolbox/datasets/pose_dataset.py | 80 ++-- happypose/toolbox/datasets/samplers.py | 16 +- happypose/toolbox/datasets/scene_dataset.py | 130 ++++--- .../datasets/scene_dataset_wrappers.py | 3 +- .../datasets/shapenet_object_dataset.py | 13 +- happypose/toolbox/datasets/urdf_dataset.py | 10 +- happypose/toolbox/datasets/utils.py | 18 +- .../toolbox/datasets/web_scene_dataset.py | 16 +- happypose/toolbox/inference/detector.py | 6 +- happypose/toolbox/inference/pose_estimator.py | 13 +- happypose/toolbox/inference/types.py | 36 +- happypose/toolbox/inference/utils.py | 80 ++-- happypose/toolbox/lib3d/camera_geometry.py | 32 +- happypose/toolbox/lib3d/cropping.py | 59 ++- happypose/toolbox/lib3d/distances.py | 4 +- happypose/toolbox/lib3d/mesh_losses.py | 8 +- happypose/toolbox/lib3d/mesh_ops.py | 4 +- happypose/toolbox/lib3d/multiview.py | 27 +- .../toolbox/lib3d/rigid_mesh_database.py | 27 +- happypose/toolbox/lib3d/rotations.py | 42 ++- happypose/toolbox/lib3d/symmetries.py | 11 +- happypose/toolbox/lib3d/transform.py | 12 +- happypose/toolbox/lib3d/transform_ops.py | 24 +- happypose/toolbox/renderer/geometry.py | 33 +- .../renderer/panda3d_batch_renderer.py | 50 ++- .../renderer/panda3d_scene_renderer.py | 57 +-- happypose/toolbox/renderer/types.py | 30 +- happypose/toolbox/renderer/utils.py | 20 +- happypose/toolbox/utils/conversion.py | 8 +- happypose/toolbox/utils/distributed.py | 17 +- happypose/toolbox/utils/download.py | 239 +++++++----- happypose/toolbox/utils/load_model.py | 9 +- happypose/toolbox/utils/logging.py | 9 +- happypose/toolbox/utils/logs_bokeh.py | 27 +- happypose/toolbox/utils/models_compat.py | 8 +- happypose/toolbox/utils/omegaconf.py | 4 +- happypose/toolbox/utils/random.py | 15 +- happypose/toolbox/utils/resources.py | 9 +- happypose/toolbox/utils/tensor_collection.py | 27 +- happypose/toolbox/utils/timer.py | 4 +- happypose/toolbox/utils/transform_utils.py | 21 +- happypose/toolbox/utils/types.py | 3 +- happypose/toolbox/utils/webdataset.py | 9 +- happypose/toolbox/utils/xarray.py | 4 +- happypose/toolbox/visualization/__init__.py | 4 +- .../toolbox/visualization/bokeh_plotter.py | 27 +- .../toolbox/visualization/bokeh_utils.py | 7 +- .../toolbox/visualization/meshcat_utils.py | 63 ++-- .../visualization/meshcat_visualizer.py | 20 +- happypose/toolbox/visualization/utils.py | 11 +- pyproject.toml | 8 +- rclone.conf | 1 - 159 files changed, 2736 insertions(+), 2218 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 19055a87..3b4a49c6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,4 +9,4 @@ "python.linting.enabled": true, "python.linting.flake8Enabled": true, "python.linting.mypyEnabled": true, -} \ No newline at end of file +} diff --git a/configs/code_snapshot/dsgen_deps.yaml b/configs/code_snapshot/dsgen_deps.yaml index 3afefd27..0133309b 100644 --- a/configs/code_snapshot/dsgen_deps.yaml +++ b/configs/code_snapshot/dsgen_deps.yaml @@ -5,4 +5,3 @@ python_packages_dir: - ${oc.env:PROJECTS_DIR}/happypose - ${oc.env:PROJECTS_DIR}/blenderproc - ${oc.env:PROJECTS_DIR}/bop_toolkit_lib - diff --git a/configs/code_snapshot/happypose.yaml b/configs/code_snapshot/happypose.yaml index fdcf7cfc..c2645962 100644 --- a/configs/code_snapshot/happypose.yaml +++ b/configs/code_snapshot/happypose.yaml @@ -5,5 +5,5 @@ python_packages_dir: - ${oc.env:PROJECTS_DIR}/happypose snapshot_dir: ${oc.env:HP_DATA_DIR}/code_snapshots/${hydra:job.id} - -exclude_path: ${oc.env:HP_ROOT_DIR}/configs/snapshot_ignore.txt \ No newline at end of file + +exclude_path: ${oc.env:HP_ROOT_DIR}/configs/snapshot_ignore.txt diff --git a/configs/dsgen/default.yaml b/configs/dsgen/default.yaml index 67ae4486..568ba619 100644 --- a/configs/dsgen/default.yaml +++ b/configs/dsgen/default.yaml @@ -1,2 +1,2 @@ defaults: - - base_dsgen \ No newline at end of file + - base_dsgen diff --git a/configs/dsgen/fastrun.yaml b/configs/dsgen/fastrun.yaml index ac7c7367..67430e19 100644 --- a/configs/dsgen/fastrun.yaml +++ b/configs/dsgen/fastrun.yaml @@ -2,4 +2,4 @@ defaults: - default few: True verbose: True -debug: True \ No newline at end of file +debug: True diff --git a/configs/dsgen/gso_1M.yaml b/configs/dsgen/gso_1M.yaml index a34ffc20..3afef1cd 100644 --- a/configs/dsgen/gso_1M.yaml +++ b/configs/dsgen/gso_1M.yaml @@ -1,3 +1,3 @@ defaults: - default -dataset_id: gso_1M \ No newline at end of file +dataset_id: gso_1M diff --git a/configs/job_env/happypose.yaml b/configs/job_env/happypose.yaml index a7f2630b..eac801c6 100644 --- a/configs/job_env/happypose.yaml +++ b/configs/job_env/happypose.yaml @@ -1,3 +1,3 @@ defaults: - base_job_env -conda_env: 'happypose' \ No newline at end of file +conda_env: 'happypose' diff --git a/configs/job_env/jz_yann.yaml b/configs/job_env/jz_yann.yaml index ca799a1c..7a328d08 100644 --- a/configs/job_env/jz_yann.yaml +++ b/configs/job_env/jz_yann.yaml @@ -5,4 +5,4 @@ env: HP_DATA_DIR: $WORK/data/happypose MEGAPOSE_DATA_DIR: $WORK/data/megapose BLENDER_INSTALL_DIR: $WORK/blender/blender-2.93.0-linux-x64 - BLENDERPROC_DIR: $WORK/projects/blenderproc \ No newline at end of file + BLENDERPROC_DIR: $WORK/projects/blenderproc diff --git a/configs/job_env/lda.yaml b/configs/job_env/lda.yaml index 2c39a227..66706f34 100644 --- a/configs/job_env/lda.yaml +++ b/configs/job_env/lda.yaml @@ -5,4 +5,4 @@ env: HP_DATA_DIR: /home/ylabbe/data/happypose MEGAPOSE_DATA_DIR: /home/ylabbe/data/megapose-private BLENDER_INSTALL_DIR: $HOME/blenderproc/blender-2.93.88-linux-x64 - BLENDER_PROC_DIR: /home/ylabbe/projects/blenderproc \ No newline at end of file + BLENDER_PROC_DIR: /home/ylabbe/projects/blenderproc diff --git a/configs/local_job/single_gpu.yaml b/configs/local_job/single_gpu.yaml index 6386e9e0..b67a42f1 100644 --- a/configs/local_job/single_gpu.yaml +++ b/configs/local_job/single_gpu.yaml @@ -2,4 +2,4 @@ defaults: - base_local_job nodes: 1 tasks_per_node: 1 -gpus_per_node: 1 \ No newline at end of file +gpus_per_node: 1 diff --git a/configs/local_node/lda.yaml b/configs/local_node/lda.yaml index 34d0eaf6..0b04d3a2 100644 --- a/configs/local_node/lda.yaml +++ b/configs/local_node/lda.yaml @@ -3,4 +3,4 @@ defaults: gpus_per_node: 1 mem_per_gpu: '8GB' cpus_per_gpu: 12 -mem_per_cpu: '2.7GB' \ No newline at end of file +mem_per_cpu: '2.7GB' diff --git a/configs/run_ds_postproc/default.yaml b/configs/run_ds_postproc/default.yaml index 3b6910b3..aa419664 100644 --- a/configs/run_ds_postproc/default.yaml +++ b/configs/run_ds_postproc/default.yaml @@ -16,8 +16,8 @@ runner: cpus_per_task: 1 hydra: - run: + run: dir: ${oc.env:HP_DATA_DIR}/hydra_outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} n_jobs: 128 -ds_dir: ${oc.env:HP_DATA_DIR}/blender_pbr_datasets/gso_1M \ No newline at end of file +ds_dir: ${oc.env:HP_DATA_DIR}/blender_pbr_datasets/gso_1M diff --git a/configs/run_dsgen/default.yaml b/configs/run_dsgen/default.yaml index ce8ee19d..19d65cdc 100644 --- a/configs/run_dsgen/default.yaml +++ b/configs/run_dsgen/default.yaml @@ -20,5 +20,5 @@ runner: cpus_per_task: 1 hydra: - run: - dir: ${oc.env:HP_DATA_DIR}/hydra_outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} \ No newline at end of file + run: + dir: ${oc.env:HP_DATA_DIR}/hydra_outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} diff --git a/configs/run_dsgen/fastrun.yaml b/configs/run_dsgen/fastrun.yaml index 3e0d22ce..1aa887d2 100644 --- a/configs/run_dsgen/fastrun.yaml +++ b/configs/run_dsgen/fastrun.yaml @@ -2,8 +2,8 @@ start_chunk: 0 n_jobs: 4 n_chunks: 4 -ds: +ds: debug: True verbose: True overwrite: True - few: True \ No newline at end of file + few: True diff --git a/configs/run_dsgen/gso_1M.yaml b/configs/run_dsgen/gso_1M.yaml index 3a5fe8b7..4ff2e19e 100644 --- a/configs/run_dsgen/gso_1M.yaml +++ b/configs/run_dsgen/gso_1M.yaml @@ -3,4 +3,4 @@ defaults: - override /dsgen@ds: gso_1M n_jobs: 128 -n_chunks: 25000 \ No newline at end of file +n_chunks: 25000 diff --git a/configs/run_dsgen/shapenet_1M.yaml b/configs/run_dsgen/shapenet_1M.yaml index 435351da..546e1872 100644 --- a/configs/run_dsgen/shapenet_1M.yaml +++ b/configs/run_dsgen/shapenet_1M.yaml @@ -3,4 +3,4 @@ defaults: - override /dsgen@ds: shapenet_1M n_jobs: 350 -n_chunks: 50000 \ No newline at end of file +n_chunks: 50000 diff --git a/configs/runner/yann_sgpu.yaml b/configs/runner/yann_sgpu.yaml index 26786b8f..a92b0adf 100644 --- a/configs/runner/yann_sgpu.yaml +++ b/configs/runner/yann_sgpu.yaml @@ -7,9 +7,9 @@ defaults: - yann - single_gpu - jz - - /job_env: + - /job_env: - happypose - lda - _self_ -log_dir: ${oc.env:HP_DATA_DIR}/submitit_logs \ No newline at end of file +log_dir: ${oc.env:HP_DATA_DIR}/submitit_logs diff --git a/configs/slurm_job/jz.yaml b/configs/slurm_job/jz.yaml index 83cebb06..697f9ff8 100644 --- a/configs/slurm_job/jz.yaml +++ b/configs/slurm_job/jz.yaml @@ -4,4 +4,4 @@ defaults: qos: 'qos_gpu-t3' time: '20:00:00' additional_parameters: - hint: nomultithread \ No newline at end of file + hint: nomultithread diff --git a/configs/slurm_job/single_gpu.yaml b/configs/slurm_job/single_gpu.yaml index 007c9d06..8e82d1a2 100644 --- a/configs/slurm_job/single_gpu.yaml +++ b/configs/slurm_job/single_gpu.yaml @@ -3,4 +3,4 @@ defaults: nodes: 1 tasks_per_node: 1 -gpus_per_node: 1 \ No newline at end of file +gpus_per_node: 1 diff --git a/configs/slurm_job/yann.yaml b/configs/slurm_job/yann.yaml index 79cac88e..68387641 100644 --- a/configs/slurm_job/yann.yaml +++ b/configs/slurm_job/yann.yaml @@ -1,4 +1,4 @@ defaults: - base_slurm_job -account: 'vuw@v100' \ No newline at end of file +account: 'vuw@v100' diff --git a/configs/slurm_queue/gpu_p2.yaml b/configs/slurm_queue/gpu_p2.yaml index 47796384..b9fa99c6 100644 --- a/configs/slurm_queue/gpu_p2.yaml +++ b/configs/slurm_queue/gpu_p2.yaml @@ -4,4 +4,4 @@ partition: "gpu_p2" gpus_per_node: 8 cpus_per_gpu: 3 mem_per_gpu: '32GB' -mem_per_cpu: ??? \ No newline at end of file +mem_per_cpu: ??? diff --git a/configs/slurm_queue/v100.yaml b/configs/slurm_queue/v100.yaml index 2993db9c..06d0d8f9 100644 --- a/configs/slurm_queue/v100.yaml +++ b/configs/slurm_queue/v100.yaml @@ -4,4 +4,4 @@ partition: "gpu_p13" gpus_per_node: 4 cpus_per_gpu: 10 mem_per_gpu: '32GB' -mem_per_cpu: '10GB' \ No newline at end of file +mem_per_cpu: '10GB' diff --git a/configs/snapshot_ignore.txt b/configs/snapshot_ignore.txt index 834b22c3..b3131c8b 100644 --- a/configs/snapshot_ignore.txt +++ b/configs/snapshot_ignore.txt @@ -23,4 +23,4 @@ blenderproc/scripts/ blenderproc/images/ my-notebooks/*.json colors.json -*.ttf \ No newline at end of file +*.ttf diff --git a/environment.yml b/environment.yml index dd8fec7f..7f509a86 100644 --- a/environment.yml +++ b/environment.yml @@ -55,7 +55,7 @@ dependencies: - selenium - omegaconf - simplejson - - line_profiler + - line_profiler - opencv-python - torchnet - tqdm @@ -63,24 +63,24 @@ dependencies: - transforms3d - panda3d - joblib - - xarray + - xarray - pandas - matplotlib - bokeh - - plyfile - - trimesh - - ipdb + - plyfile + - trimesh + - ipdb - panda3d-gltf - colorama - pyyaml - ipykernel - scipy - pypng - - h5py + - h5py - seaborn - - kornia - - pyarrow - - dt_apriltags + - kornia + - pyarrow + - dt_apriltags - open3d - structlog - imageio @@ -88,6 +88,6 @@ dependencies: - pyyaml - psutil - webdataset - - opencv-contrib-python - - roma - - torchgeometry \ No newline at end of file + - opencv-contrib-python + - roma + - torchgeometry diff --git a/experiments/generate_dataset.py b/experiments/generate_dataset.py index a568d989..a8e3d1d1 100644 --- a/experiments/generate_dataset.py +++ b/experiments/generate_dataset.py @@ -1,18 +1,14 @@ +import copy +import time import typing as tp +from dataclasses import dataclass + import hydra -import omegaconf -import tqdm -import time -import copy import numpy as np +import omegaconf import submitit -from dataclasses import dataclass from hydra.core.config_store import ConfigStore - - -from job_runner.configs import ( - RunnerConfig, -) +from job_runner.configs import RunnerConfig from job_runner.utils import make_setup @@ -52,10 +48,9 @@ def generate_chunks(ds_cfg: DatasetGenerationConfig): @hydra.main( - version_base=None, config_path="../configs", config_name="run_dsgen/default" + version_base=None, config_path="../configs", config_name="run_dsgen/default", ) def main(cfg: Config): - if cfg.runner.use_slurm: executor = submitit.AutoExecutor(folder=cfg.runner.log_dir) executor.update_parameters( @@ -84,7 +79,7 @@ def main(cfg: Config): jobs = [] with executor.batch(): - for n, chunk_split_ in enumerate(chunk_splits): + for _n, chunk_split_ in enumerate(chunk_splits): ds_cfg = copy.deepcopy(cfg.ds) ds_cfg.chunk_ids = chunk_split_.tolist() if cfg.dry_run: diff --git a/experiments/job-runner/job_runner/configs.py b/experiments/job-runner/job_runner/configs.py index db314421..43cf695f 100644 --- a/experiments/job-runner/job_runner/configs.py +++ b/experiments/job-runner/job_runner/configs.py @@ -1,5 +1,6 @@ import typing as tp from dataclasses import dataclass + from hydra.core.config_store import ConfigStore diff --git a/experiments/job-runner/job_runner/utils.py b/experiments/job-runner/job_runner/utils.py index 427b598d..35239d6e 100644 --- a/experiments/job-runner/job_runner/utils.py +++ b/experiments/job-runner/job_runner/utils.py @@ -1,11 +1,8 @@ -import typing as tp import pathlib -import submitit +import typing as tp -from job_runner.configs import ( - JobEnvironmentConfig, - RunnerConfig -) +import submitit +from job_runner.configs import JobEnvironmentConfig, RunnerConfig def make_setup(cfg: JobEnvironmentConfig) -> tp.List[str]: @@ -23,7 +20,7 @@ def make_snapshots( ): for code_dir in code_directories: snapshot = submitit.helpers.RsyncSnapshot( - snapshot_dir=output_dir / code_dir.name, root_dir=code_dir, exclude=exclude + snapshot_dir=output_dir / code_dir.name, root_dir=code_dir, exclude=exclude, ) with snapshot: pass @@ -59,4 +56,4 @@ def make_submitit_executor( cpus_per_task=cfg.local_job.cpus_per_task, ) - return executor \ No newline at end of file + return executor diff --git a/experiments/job-runner/setup.py b/experiments/job-runner/setup.py index d7776d8f..64299cbe 100644 --- a/experiments/job-runner/setup.py +++ b/experiments/job-runner/setup.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,6 +30,6 @@ entry_points={ "console_scripts": [ "runjob=job_runner.runjob:main", - ] + ], }, ) diff --git a/experiments/make_gso_ids.py b/experiments/make_gso_ids.py index e4abf944..cc62a2e4 100644 --- a/experiments/make_gso_ids.py +++ b/experiments/make_gso_ids.py @@ -1,13 +1,12 @@ -import os import json +import os import pathlib as p - if __name__ == "__main__": hp_data_dir = os.environ["HP_DATA_DIR"] gso_dir = p.Path(hp_data_dir) / "google_scanned_objects" / "models_orig" models = [] for n, model_path in enumerate(gso_dir.glob("**/meshes/model.obj")): - models.append(dict(obj_id=n, gso_id=model_path.parent.parent.name)) + models.append({"obj_id": n, "gso_id": model_path.parent.parent.name}) infos_path = p.Path(hp_data_dir) / "dataset-infos" / "gso_models.json" infos_path.write_text(json.dumps(models, indent=2)) diff --git a/experiments/make_shapenet_ids.py b/experiments/make_shapenet_ids.py index 0c324bc3..cf68d1b7 100644 --- a/experiments/make_shapenet_ids.py +++ b/experiments/make_shapenet_ids.py @@ -1,12 +1,12 @@ -import os -import pandas as pd -from collections import deque import json -import typing as tp +import os import pathlib as p +import typing as tp +from collections import deque from dataclasses import dataclass + @dataclass class ShapeNetSynset: id: str @@ -25,14 +25,14 @@ class ModelInfo: def read_models(shapenet_dir): # TODO: This probably has issues / is poorly implemented and very slow - taxonomy = json.load(open(shapenet_dir / "taxonomy.json", "r")) + taxonomy = json.load(open(shapenet_dir / "taxonomy.json")) - id_to_synset: tp.Dict[int, ShapeNetSynset] = dict() + id_to_synset: tp.Dict[int, ShapeNetSynset] = {} for synset in taxonomy: synset_id = synset["synsetId"] id_to_synset[synset_id] = ShapeNetSynset( - id=synset_id, name=synset["name"], children=synset["children"], parents=[] + id=synset_id, name=synset["name"], children=synset["children"], parents=[], ) for synset in taxonomy: @@ -60,12 +60,12 @@ def get_names(synset_id, id_to_synset): names = get_names(synset_id, id_to_synset) names = ",".join(names) models.append( - dict( - obj_id=n, - shapenet_synset_id=synset_id, - shapenet_source_id=source_id, - shapenet_name=names, - ) + { + "obj_id": n, + "shapenet_synset_id": synset_id, + "shapenet_source_id": source_id, + "shapenet_name": names, + }, ) return models diff --git a/experiments/postprocess_dataset.py b/experiments/postprocess_dataset.py index a78b12df..e1de9b5d 100644 --- a/experiments/postprocess_dataset.py +++ b/experiments/postprocess_dataset.py @@ -1,16 +1,15 @@ -import os -import hydra -from dataclasses import dataclass -import submitit -import numpy as np import json import pathlib as p -from bop_toolkit_lib.dataset.bop_imagewise import io_load_gt -from bop_toolkit_lib import inout +from dataclasses import dataclass -from job_runner.utils import make_submitit_executor -from job_runner.configs import RunnerConfig +import hydra +import numpy as np +import submitit +from bop_toolkit_lib import inout +from bop_toolkit_lib.dataset.bop_imagewise import io_load_gt from hydra.core.config_store import ConfigStore +from job_runner.configs import RunnerConfig +from job_runner.utils import make_submitit_executor def process_key(key, ds_dir, stoi_obj, out_dir): @@ -37,7 +36,7 @@ def process_key(key, ds_dir, stoi_obj, out_dir): if is_valid: out_dir.mkdir(exist_ok=True) - with open(ds_dir / f"{key}.gt.json", "r") as f: + with open(ds_dir / f"{key}.gt.json") as f: gt = io_load_gt(f) for gt_n in gt: gt_n["obj_id"] = stoi_obj[gt_n["obj_id"]] @@ -56,8 +55,8 @@ def load_stoi(ds_dir): if not p.exists(): p = ds_dir / "shapenet_models.json" assert p.exists() - infos = json.load(open(p, "r")) - stoi = dict() + infos = json.load(open(p)) + stoi = {} for info in infos: if "gso_id" in info: stoi[f"gso_{info['gso_id']}"] = info["obj_id"] @@ -83,7 +82,7 @@ class Config: @hydra.main( - version_base=None, config_path="../configs", config_name="run_ds_postproc/default" + version_base=None, config_path="../configs", config_name="run_ds_postproc/default", ) def main(cfg: Config): executor = make_submitit_executor(cfg.runner) @@ -92,7 +91,7 @@ def main(cfg: Config): stoi = load_stoi(ds_dir) paths = (ds_dir / "train_pbr_v2format").glob("*") - keys = list(set([str(p.name).split(".")[0] for p in paths])) + keys = list({str(p.name).split(".")[0] for p in paths}) keys_splits = np.array_split(keys, cfg.n_jobs) jobs = [] diff --git a/happypose/pose_estimators/megapose/CLA b/happypose/pose_estimators/megapose/CLA index c93b751c..f47f66b9 100644 --- a/happypose/pose_estimators/megapose/CLA +++ b/happypose/pose_estimators/megapose/CLA @@ -21,4 +21,4 @@ Disclaimer. To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support. No Obligation. -You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions into the project. The decision to use or incorporate your contributions into the project will be made at the sole discretion of the maintainers or their authorized delegates. \ No newline at end of file +You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions into the project. The decision to use or incorporate your contributions into the project will be made at the sole discretion of the maintainers or their authorized delegates. diff --git a/happypose/pose_estimators/megapose/LICENSE b/happypose/pose_estimators/megapose/LICENSE index ed3b434f..6a3914c9 100644 --- a/happypose/pose_estimators/megapose/LICENSE +++ b/happypose/pose_estimators/megapose/LICENSE @@ -10,4 +10,4 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and -limitations under the License. \ No newline at end of file +limitations under the License. diff --git a/happypose/pose_estimators/megapose/README.md b/happypose/pose_estimators/megapose/README.md index 5f046003..fd349cab 100644 --- a/happypose/pose_estimators/megapose/README.md +++ b/happypose/pose_estimators/megapose/README.md @@ -1,5 +1,5 @@ # MegaPose -This repository contains code, models and dataset for our MegaPose paper. +This repository contains code, models and dataset for our MegaPose paper. Yann Labbé, Lucas Manuelli, Arsalan Mousavian, Stephen Tyree, Stan Birchfield, Jonathan Tremblay, Justin Carpentier, Mathieu Aubry, Dieter Fox, Josef Sivic. “MegaPose: 6D Pose Estimation of Novel Objects via Render & Compare.” In: CoRL 2022. @@ -33,15 +33,15 @@ This repository contains pre-trained models for pose estimation of novel objects ## Pose estimation of novel objects -We provide pre-trained models for 6D pose estimation of novel objects. +We provide pre-trained models for 6D pose estimation of novel objects. -Given as inputs: +Given as inputs: - an RGB image (depth can also be used but is optional), - the intrinsic parameters of the camera, - a mesh of the object, - a bounding box of that object in the image, -our approach estimates the 6D pose of the object (3D rotation + 3D translation) with respect to the camera. +our approach estimates the 6D pose of the object (3D rotation + 3D translation) with respect to the camera. We provide a script and an example for inference on novel objects. After installation, please see the [Inference tutorial](#inference-tutorial). @@ -52,7 +52,7 @@ We provide the synthetic dataset we used to train MegaPose. The dataset contains # Installation -Once you are done with the installation, we recommend you head to the [Inference tutorial](#inference-tutorial). +Once you are done with the installation, we recommend you head to the [Inference tutorial](#inference-tutorial). The first step is to clone the repo and submodules: ``` @@ -84,7 +84,7 @@ pip install -e . Click for details... ### Create a conda environment -Creat a conda environment with `python==3.9`. We will use this conda environment to manage a small number of dependencies needed for +Creat a conda environment with `python==3.9`. We will use this conda environment to manage a small number of dependencies needed for ``` conda env create -f conda/environment.yaml @@ -257,7 +257,7 @@ python -m megapose.scripts.run_inference_on_example barbecue-sauce --vis-detecti ## 3. Run pose estimation and visualize results Run inference with the following command: ``` -python -m megapose.scripts.run_inference_on_example barbecue-sauce --run-inference +python -m megapose.scripts.run_inference_on_example barbecue-sauce --run-inference ``` by default, the model only uses the RGB input. You can use of our RGB-D megapose models using the `--model` argument. Please see our [Model Zoo](#model-zoo) for all models available. @@ -272,7 +272,7 @@ This file contains a list of objects with their estimated poses . For each objec [{"label": "barbecue-sauce", "TWO": [[0.5453961536730983, 0.6226545207599095, -0.43295293693197473, 0.35692612413663855], [0.10723329335451126, 0.07313819974660873, 0.45735278725624084]]}] -Finally, you can visualize the results using: +Finally, you can visualize the results using: ``` python -m megapose.scripts.run_inference_on_example barbecue-sauce --vis-outputs @@ -312,7 +312,7 @@ For optimal performance, we recommend using `megapose-1.0-RGB-multi-hypothesis` ## Dataset information The dataset is available at this [url](https://drive.google.com/drive/folders/1CXc_GG11jNVMeGr-Mb4o4iiNjYeKDkKd?usp=sharing). It is split into two datasets: `gso_1M` (Google Scanned Objects) and `shapenet_1M` (ShapeNet objects). Each dataset has 1 million images which were generated using [BlenderProc](https://github.com/DLR-RM/BlenderProc). -Datasets are released in the [webdataset](https://github.com/webdataset/webdataset) format for high reading performance. Each dataset is split into chunks of size ~600MB containing 1000 images each. +Datasets are released in the [webdataset](https://github.com/webdataset/webdataset) format for high reading performance. Each dataset is split into chunks of size ~600MB containing 1000 images each. We provide the pre-processed meshes ready to be used for rendering and training in this [directory](https://drive.google.com/drive/folders/1AYxkv7jpDniOnTcMAxiWbdhPo8WBJaZG): - `google_scanned_objects.zip` diff --git a/happypose/pose_estimators/megapose/conda/environment.yaml b/happypose/pose_estimators/megapose/conda/environment.yaml index c780d9ce..4a6ec2e3 100644 --- a/happypose/pose_estimators/megapose/conda/environment.yaml +++ b/happypose/pose_estimators/megapose/conda/environment.yaml @@ -11,4 +11,4 @@ dependencies: - python-wget - joblib - pip: - - meshcat \ No newline at end of file + - meshcat diff --git a/happypose/pose_estimators/megapose/conda/environment_full.yaml b/happypose/pose_estimators/megapose/conda/environment_full.yaml index caa60071..6f13a11b 100644 --- a/happypose/pose_estimators/megapose/conda/environment_full.yaml +++ b/happypose/pose_estimators/megapose/conda/environment_full.yaml @@ -31,7 +31,7 @@ dependencies: - selenium - omegaconf - simplejson - - line_profiler + - line_profiler - opencv-python - torchnet - tqdm @@ -39,24 +39,24 @@ dependencies: - transforms3d - panda3d - joblib - - xarray + - xarray - pandas - matplotlib - bokeh - - plyfile - - trimesh - - ipdb + - plyfile + - trimesh + - ipdb - panda3d-gltf - colorama - pyyaml - ipykernel - scipy - pypng - - h5py + - h5py - seaborn - - kornia - - pyarrow - - dt_apriltags + - kornia + - pyarrow + - dt_apriltags - open3d - structlog - imageio @@ -64,6 +64,6 @@ dependencies: - pyyaml - psutil - webdataset - - opencv-contrib-python - - roma + - opencv-contrib-python + - roma - torchgeometry diff --git a/happypose/pose_estimators/megapose/docker/Dockerfile.megapose b/happypose/pose_estimators/megapose/docker/Dockerfile.megapose index c5236c60..f5d5d38e 100644 --- a/happypose/pose_estimators/megapose/docker/Dockerfile.megapose +++ b/happypose/pose_estimators/megapose/docker/Dockerfile.megapose @@ -10,7 +10,7 @@ SHELL ["/bin/bash", "-c"] ENV DEBIAN_FRONTEND noninteractive -# Needed to deal with this issue +# Needed to deal with this issue # https://forums.developer.nvidia.com/t/the-repository-https-developer-download-nvidia-com-compute-cuda-repos-ubuntu1804-x86-64-release-does-not-have-a-release-file/175263 RUN apt-get install -y --no-install-recommends ca-certificates \ && rm -rf /var/lib/apt/lists/* \ @@ -26,7 +26,7 @@ RUN rm -f /etc/apt/sources.list.d/nvidia-ml.list RUN apt update && apt upgrade -y && \ - apt install -y vim tar wget htop xorg openbox bzip2 \ + apt install -y vim tar wget htop xorg openbox bzip2 \ tar apt-utils # Install Anaconda @@ -59,7 +59,7 @@ RUN source $CONDA_DIR/bin/activate && \ scipy pypng h5py seaborn kornia meshcat pyarrow dt_apriltags open3d structlog \ imageio -# Blender +# Blender RUN cd $HOME && \ wget https://mirrors.dotsrc.org/blender/release/Blender2.93/blender-2.93.8-linux-x64.tar.xz && \ tar -xvf blender-2.93.8-linux-x64.tar.xz && rm blender-2.93.8-linux-x64.tar.xz diff --git a/happypose/pose_estimators/megapose/license_files/python_license_header.txt b/happypose/pose_estimators/megapose/license_files/python_license_header.txt index 73a7b275..1d77caef 100644 --- a/happypose/pose_estimators/megapose/license_files/python_license_header.txt +++ b/happypose/pose_estimators/megapose/license_files/python_license_header.txt @@ -13,4 +13,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/pyproject.toml b/happypose/pose_estimators/megapose/pyproject.toml index a0278d2e..6c7aba42 100644 --- a/happypose/pose_estimators/megapose/pyproject.toml +++ b/happypose/pose_estimators/megapose/pyproject.toml @@ -27,12 +27,12 @@ requires = ["setuptools>=45", "wheel"] extend-exclude = "deprecated|.ipynb|deps|local_data" line-length = 100 -# See the `setuptools_scm` documentation for the description of the schemes used below. -# https://pypi.org/project/setuptools-scm/ -# NOTE: If these values are updated, they need to be also updated in `srl/__init__.py`. -# [tool.setuptools_scm] -# local_scheme = "dirty-tag" -# version_scheme = "no-guess-dev" +[tool.coverage.report] +# NOTE: Single-quoted strings are required in TOML for regular expressions +exclude_lines = [ + "pragma: no cover", # Need to re-enable the standard no cover match + '^\s*pass\s*$' # Skip any pass lines +] [tool.isort] # Multi line output mode 3 is used to conform with Black. See @@ -44,27 +44,16 @@ import_heading_thirdparty = "Third Party" known_first_party = ['megapose'] multi_line_output = 3 profile = "black" -skip = ['local_data', 'build', 'deps'] - -[tool.pytest.ini_options] -norecursedirs = [".git", ".venv", "deprecated", "dist"] -python_files = ["*_test.py"] - -[tool.coverage.report] -# NOTE: Single-quoted strings are required in TOML for regular expressions -exclude_lines = [ - "pragma: no cover", # Need to re-enable the standard no cover match - '^\s*pass\s*$', # Skip any pass lines -] +skip = ['build', 'deps', 'local_data'] [tool.mypy] disallow_untyped_defs = true exclude = [ - "venv", - "local_data", + "deprecated/", "deps/", + "local_data", "slurm/", - "deprecated/", + "venv" ] no_implicit_optional = true show_error_codes = true @@ -75,5 +64,9 @@ ignore_missing_imports = true module = [ "importlib.metadata", "setuptools", - "setuptools_scm", + "setuptools_scm" ] + +[tool.pytest.ini_options] +norecursedirs = [".git", ".venv", "deprecated", "dist"] +python_files = ["*_test.py"] diff --git a/happypose/pose_estimators/megapose/rclone.conf b/happypose/pose_estimators/megapose/rclone.conf index bda3fff5..46f0701a 100755 --- a/happypose/pose_estimators/megapose/rclone.conf +++ b/happypose/pose_estimators/megapose/rclone.conf @@ -3,5 +3,4 @@ type = drive scope = drive.readonly root_folder_id = 1uvBWZLqt9JchS5OzU32Wa5Qb1oDNmLwq token = {"access_token":"ya29.a0AX9GBdXgsyjAGO5_zQFX46WgiSW4v-fsqKmh_YhppJ70wzzNgvKqOaknvU74r9cNl2Z5nZ2rGEM3exJTTN3q3NyXZH4nFCxam-y3dAxq2wMvZD57xLaFnZDY6UuWauvnkDYW45YXjbOwGJ0cENWuyxrzNvWbjtqraCgYKAfESAQASFQHUCsbCQmDqw9mtfiIyGcyP6ForhQ0167","token_type":"Bearer","refresh_token":"1//0352qz_FkNHlQCgYIARAAGAMSNwF-L9IrsS8UdbcNPWPLsULKLfJCGmz4bEhQNBKq0g-ZliuBOH6kDqcmhIA8hncg0ZY2AqhdiDo","expiry":"2023-01-09T10:35:18.4941513+01:00"} -team_drive = - +team_drive = diff --git a/happypose/pose_estimators/megapose/src/megapose/__init__.py b/happypose/pose_estimators/megapose/src/megapose/__init__.py index a5524b94..1f208050 100644 --- a/happypose/pose_estimators/megapose/src/megapose/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,10 +14,9 @@ """ - # Standard Library import os -import cv2 + def assign_gpu() -> None: @@ -40,10 +38,13 @@ def assign_gpu() -> None: os.environ["OMP_NUM_THREADS"] = "1" if "EGL_VISIBLE_DEVICES" not in os.environ: - os.environ['EGL_VISIBLE_DEVICES'] = '0' + os.environ["EGL_VISIBLE_DEVICES"] = "0" for k in ( - "MKL_NUM_THREADS", "OMP_NUM_THREADS", - "CUDA_VISIBLE_DEVICES", "EGL_VISIBLE_DEVICES"): + "MKL_NUM_THREADS", + "OMP_NUM_THREADS", + "CUDA_VISIBLE_DEVICES", + "EGL_VISIBLE_DEVICES", +): if k in os.environ: print(f"{k}: {os.environ[k]}") diff --git a/happypose/pose_estimators/megapose/src/megapose/bop_config.py b/happypose/pose_estimators/megapose/src/megapose/bop_config.py index 6c6e752a..fb0d21fa 100644 --- a/happypose/pose_estimators/megapose/src/megapose/bop_config.py +++ b/happypose/pose_estimators/megapose/src/megapose/bop_config.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,183 +16,187 @@ # Cosypose models -BOP_CONFIG = dict() -BOP_CONFIG["hb"] = dict( - input_resize=(640, 480), - urdf_ds_name="hb", - obj_ds_name="hb", - train_pbr_ds_name=["hb.pbr"], - inference_ds_name=["hb.bop19"], - test_ds_name=[], -) - -BOP_CONFIG["icbin"] = dict( - input_resize=(640, 480), - urdf_ds_name="icbin", - obj_ds_name="icbin", - train_pbr_ds_name=["icbin.pbr"], - inference_ds_name=["icbin.bop19"], - test_ds_name=["icbin.bop19"], -) - - -BOP_CONFIG["itodd"] = dict( - input_resize=(1280, 960), - urdf_ds_name="itodd", - obj_ds_name="itodd", - train_pbr_ds_name=["itodd.pbr"], - inference_ds_name=["itodd.bop19"], - test_ds_name=[], - val_ds_name=["itodd.val"], -) - - -BOP_CONFIG["lmo"] = dict( - input_resize=(640, 480), - urdf_ds_name="lm", - obj_ds_name="lm", - train_pbr_ds_name=["lm.pbr"], - train_synt_real_ds_names=[ +BOP_CONFIG = {} +BOP_CONFIG["hb"] = { + "input_resize": (640, 480), + "urdf_ds_name": "hb", + "obj_ds_name": "hb", + "train_pbr_ds_name": ["hb.pbr"], + "inference_ds_name": ["hb.bop19"], + "test_ds_name": [], +} + +BOP_CONFIG["icbin"] = { + "input_resize": (640, 480), + "urdf_ds_name": "icbin", + "obj_ds_name": "icbin", + "train_pbr_ds_name": ["icbin.pbr"], + "inference_ds_name": ["icbin.bop19"], + "test_ds_name": ["icbin.bop19"], +} + + +BOP_CONFIG["itodd"] = { + "input_resize": (1280, 960), + "urdf_ds_name": "itodd", + "obj_ds_name": "itodd", + "train_pbr_ds_name": ["itodd.pbr"], + "inference_ds_name": ["itodd.bop19"], + "test_ds_name": [], + "val_ds_name": ["itodd.val"], +} + + +BOP_CONFIG["lmo"] = { + "input_resize": (640, 480), + "urdf_ds_name": "lm", + "obj_ds_name": "lm", + "train_pbr_ds_name": ["lm.pbr"], + "train_synt_real_ds_names": [ ("lm.pbr", 1), ], - inference_ds_name=["lmo.bop19"], - test_ds_name=["lmo.bop19"], -) - -BOP_CONFIG["lm"] = dict( - input_resize=(640, 480), - urdf_ds_name="lm", - obj_ds_name="lm", - train_pbr_ds_name=["lm.pbr"], - train_synt_real_ds_names=[ + "inference_ds_name": ["lmo.bop19"], + "test_ds_name": ["lmo.bop19"], +} + +BOP_CONFIG["lm"] = { + "input_resize": (640, 480), + "urdf_ds_name": "lm", + "obj_ds_name": "lm", + "train_pbr_ds_name": ["lm.pbr"], + "train_synt_real_ds_names": [ ("lm.pbr", 1), ], -) - - -BOP_CONFIG["tless"] = dict( - input_resize=(720, 540), - urdf_ds_name="tless.cad", - obj_ds_name="tless.cad", - train_pbr_ds_name=["tless.pbr"], - inference_ds_name=["tless.bop19"], - test_ds_name=["tless.bop19"], - train_synt_real_ds_names=[("tless.pbr", 4), ("tless.primesense.train", 1)], - train_opengl_ds_names=[("tless.opengl", 1)], - train_mysynt_ds_names=[("synthetic.tless-1M.train", 1)], -) - -BOP_CONFIG["tudl"] = dict( - input_resize=(640, 480), - urdf_ds_name="tudl", - obj_ds_name="tudl", - train_pbr_ds_name=["tudl.pbr"], - inference_ds_name=["tudl.bop19"], - test_ds_name=["tudl.bop19"], - train_synt_real_ds_names=[("tudl.pbr", 10), ("tudl.train.real", 1)], - train_opengl_ds_names=[("tudl.opengl", 1)], - train_mysynt_ds_names=[("synthetic.tudl-1M.train", 1)], -) - - -BOP_CONFIG["ycbv"] = dict( - input_resize=(640, 480), - urdf_ds_name="ycbv", - obj_ds_name="ycbv", - train_pbr_ds_name=["ycbv.pbr"], - train_pbr_real_ds_names=[("ycbv.pbr", 1), ()], - inference_ds_name=["ycbv.bop19"], - test_ds_name=["ycbv.bop19"], - train_synt_real_ds_names=[("ycbv.pbr", 20), ("ycbv.train.synt", 1), ("ycbv.train.real", 3)], - train_opengl_ds_names=[("ycbv.opengl", 1)], - train_mysynt_ds_names=[("synthetic.ycbv-1M.train", 1)], -) - -BOP_CONFIG["ruapc"] = dict( +} + + +BOP_CONFIG["tless"] = { + "input_resize": (720, 540), + "urdf_ds_name": "tless.cad", + "obj_ds_name": "tless.cad", + "train_pbr_ds_name": ["tless.pbr"], + "inference_ds_name": ["tless.bop19"], + "test_ds_name": ["tless.bop19"], + "train_synt_real_ds_names": [("tless.pbr", 4), ("tless.primesense.train", 1)], + "train_opengl_ds_names": [("tless.opengl", 1)], + "train_mysynt_ds_names": [("synthetic.tless-1M.train", 1)], +} + +BOP_CONFIG["tudl"] = { + "input_resize": (640, 480), + "urdf_ds_name": "tudl", + "obj_ds_name": "tudl", + "train_pbr_ds_name": ["tudl.pbr"], + "inference_ds_name": ["tudl.bop19"], + "test_ds_name": ["tudl.bop19"], + "train_synt_real_ds_names": [("tudl.pbr", 10), ("tudl.train.real", 1)], + "train_opengl_ds_names": [("tudl.opengl", 1)], + "train_mysynt_ds_names": [("synthetic.tudl-1M.train", 1)], +} + + +BOP_CONFIG["ycbv"] = { + "input_resize": (640, 480), + "urdf_ds_name": "ycbv", + "obj_ds_name": "ycbv", + "train_pbr_ds_name": ["ycbv.pbr"], + "train_pbr_real_ds_names": [("ycbv.pbr", 1), ()], + "inference_ds_name": ["ycbv.bop19"], + "test_ds_name": ["ycbv.bop19"], + "train_synt_real_ds_names": [ + ("ycbv.pbr", 20), + ("ycbv.train.synt", 1), + ("ycbv.train.real", 3), + ], + "train_opengl_ds_names": [("ycbv.opengl", 1)], + "train_mysynt_ds_names": [("synthetic.ycbv-1M.train", 1)], +} + +BOP_CONFIG["ruapc"] = { # TODO: input resize - input_resize=(640, 480), - urdf_ds_name="ruapc", - obj_ds_name="ruapc", - train_pbr_ds_name=[], - train_pbr_real_ds_names=[], - inference_ds_name=["ruapc.bop19"], - test_ds_name=["ruapc.bop19"], -) - -BOP_CONFIG["tyol"] = dict( + "input_resize": (640, 480), + "urdf_ds_name": "ruapc", + "obj_ds_name": "ruapc", + "train_pbr_ds_name": [], + "train_pbr_real_ds_names": [], + "inference_ds_name": ["ruapc.bop19"], + "test_ds_name": ["ruapc.bop19"], +} + +BOP_CONFIG["tyol"] = { # TODO: input resize - input_resize=(640, 480), - urdf_ds_name="tyol", - obj_ds_name="tyol", - train_pbr_ds_name=[], - train_pbr_real_ds_names=[], - inference_ds_name=["tyol.bop19"], - test_ds_name=["tyol.bop19"], -) - -BOP_CONFIG["moped"] = dict( - input_resize=(640, 480), - urdf_ds_name="moped", - obj_ds_name="moped", - train_pbr_ds_name=[], - train_pbr_real_ds_names=[], - inference_ds_name=["moped"], - test_ds_name=["moped"], -) - -for k, v in BOP_CONFIG.items(): + "input_resize": (640, 480), + "urdf_ds_name": "tyol", + "obj_ds_name": "tyol", + "train_pbr_ds_name": [], + "train_pbr_real_ds_names": [], + "inference_ds_name": ["tyol.bop19"], + "test_ds_name": ["tyol.bop19"], +} + +BOP_CONFIG["moped"] = { + "input_resize": (640, 480), + "urdf_ds_name": "moped", + "obj_ds_name": "moped", + "train_pbr_ds_name": [], + "train_pbr_real_ds_names": [], + "inference_ds_name": ["moped"], + "test_ds_name": ["moped"], +} + +for _k, v in BOP_CONFIG.items(): v["panda3d_obj_ds_name"] = v["obj_ds_name"] + ".panda3d" -PBR_DETECTORS = dict( - hb="detector-bop-hb-pbr--497808", - icbin="detector-bop-icbin-pbr--947409", - itodd="detector-bop-itodd-pbr--509908", - lmo="detector-bop-lmo-pbr--517542", - tless="detector-bop-tless-pbr--873074", - tudl="detector-bop-tudl-pbr--728047", - ycbv="detector-bop-ycbv-pbr--970850", - hope="detector-bop-hope-pbr--15246", -) - -PBR_COARSE = dict( - hb="coarse-bop-hb-pbr--70752", - icbin="coarse-bop-icbin-pbr--915044", - itodd="coarse-bop-itodd-pbr--681884", - lmo="coarse-bop-lmo-pbr--707448", - tless="coarse-bop-tless-pbr--506801", - tudl="coarse-bop-tudl-pbr--373484", - ycbv="coarse-bop-ycbv-pbr--724183", - hope="bop-hope-pbr-coarse-transnoise-zxyavg-225203", -) - -PBR_REFINER = dict( - hb="refiner-bop-hb-pbr--247731", - icbin="refiner-bop-icbin-pbr--841882", - itodd="refiner-bop-itodd-pbr--834427", - lmo="refiner-bop-lmo-pbr--325214", - tless="refiner-bop-tless-pbr--233420", - tudl="refiner-bop-tudl-pbr--487212", - ycbv="refiner-bop-ycbv-pbr--604090", - hope="bop-hope-pbr-refiner--955392", -) - -SYNT_REAL_DETECTORS = dict( - tudl="detector-bop-tudl-synt+real--298779", - tless="detector-bop-tless-synt+real--452847", - ycbv="detector-bop-ycbv-synt+real--292971", -) - -SYNT_REAL_COARSE = dict( - tudl="coarse-bop-tudl-synt+real--610074", - tless="coarse-bop-tless-synt+real--160982", - ycbv="coarse-bop-ycbv-synt+real--822463", -) - -SYNT_REAL_REFINER = dict( - tudl="refiner-bop-tudl-synt+real--423239", - tless="refiner-bop-tless-synt+real--881314", - ycbv="refiner-bop-ycbv-synt+real--631598", -) +PBR_DETECTORS = { + "hb": "detector-bop-hb-pbr--497808", + "icbin": "detector-bop-icbin-pbr--947409", + "itodd": "detector-bop-itodd-pbr--509908", + "lmo": "detector-bop-lmo-pbr--517542", + "tless": "detector-bop-tless-pbr--873074", + "tudl": "detector-bop-tudl-pbr--728047", + "ycbv": "detector-bop-ycbv-pbr--970850", + "hope": "detector-bop-hope-pbr--15246", +} + +PBR_COARSE = { + "hb": "coarse-bop-hb-pbr--70752", + "icbin": "coarse-bop-icbin-pbr--915044", + "itodd": "coarse-bop-itodd-pbr--681884", + "lmo": "coarse-bop-lmo-pbr--707448", + "tless": "coarse-bop-tless-pbr--506801", + "tudl": "coarse-bop-tudl-pbr--373484", + "ycbv": "coarse-bop-ycbv-pbr--724183", + "hope": "bop-hope-pbr-coarse-transnoise-zxyavg-225203", +} + +PBR_REFINER = { + "hb": "refiner-bop-hb-pbr--247731", + "icbin": "refiner-bop-icbin-pbr--841882", + "itodd": "refiner-bop-itodd-pbr--834427", + "lmo": "refiner-bop-lmo-pbr--325214", + "tless": "refiner-bop-tless-pbr--233420", + "tudl": "refiner-bop-tudl-pbr--487212", + "ycbv": "refiner-bop-ycbv-pbr--604090", + "hope": "bop-hope-pbr-refiner--955392", +} + +SYNT_REAL_DETECTORS = { + "tudl": "detector-bop-tudl-synt+real--298779", + "tless": "detector-bop-tless-synt+real--452847", + "ycbv": "detector-bop-ycbv-synt+real--292971", +} + +SYNT_REAL_COARSE = { + "tudl": "coarse-bop-tudl-synt+real--610074", + "tless": "coarse-bop-tless-synt+real--160982", + "ycbv": "coarse-bop-ycbv-synt+real--822463", +} + +SYNT_REAL_REFINER = { + "tudl": "refiner-bop-tudl-synt+real--423239", + "tless": "refiner-bop-tless-synt+real--881314", + "ycbv": "refiner-bop-ycbv-synt+real--631598", +} for k, v in PBR_COARSE.items(): if k not in SYNT_REAL_COARSE: diff --git a/happypose/pose_estimators/megapose/src/megapose/config.py b/happypose/pose_estimators/megapose/src/megapose/config.py index 4e56d25f..4275a319 100644 --- a/happypose/pose_estimators/megapose/src/megapose/config.py +++ b/happypose/pose_estimators/megapose/src/megapose/config.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,7 +28,9 @@ PROJECT_ROOT = Path(megapose.__file__).parent.parent.parent PROJECT_DIR = PROJECT_ROOT -LOCAL_DATA_DIR = Path(os.environ.get("MEGAPOSE_DATA_DIR", Path(PROJECT_DIR) / "local_data")) +LOCAL_DATA_DIR = Path( + os.environ.get("MEGAPOSE_DATA_DIR", Path(PROJECT_DIR) / "local_data"), +) BOP_DS_DIR = LOCAL_DATA_DIR / "bop_datasets" NB_DATA_DIR = LOCAL_DATA_DIR / "notebook_data" SHAPENET_DIR = LOCAL_DATA_DIR / "shapenetcorev2" @@ -53,10 +54,10 @@ BLENDER_INSTALL_DIR = Path(os.environ["HOME"]) / BLENDER_VERSION PYTHON_BIN_PATH = ( - Path(os.environ["CONDA_PREFIX"]) / "bin/python" - if "CONDA_PREFIX" in os.environ - else Path(sys.executable) - ) + Path(os.environ["CONDA_PREFIX"]) / "bin/python" + if "CONDA_PREFIX" in os.environ + else Path(sys.executable) +) BOP_PANDA3D_DS_DIR = LOCAL_DATA_DIR / "bop_models_panda3d" @@ -106,14 +107,12 @@ "tv_stand", ] -SHAPENET_MODELNET_CATEGORIES = set( - [ +SHAPENET_MODELNET_CATEGORIES = { "guitar", "bathtub,bathing tub,bath,tub", "bookshelf", "sofa,couch,lounge", - ] -) + } YCBV_OBJECT_NAMES = [ ["obj_000001", "01_master_chef_can"], diff --git a/happypose/pose_estimators/megapose/src/megapose/datasets/__init__.py b/happypose/pose_estimators/megapose/src/megapose/datasets/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/datasets/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/datasets/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/__init__.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/bop.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/bop.py index 4193c00d..8427e348 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/bop.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/bop.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import argparse import importlib @@ -31,8 +29,14 @@ from tqdm import tqdm # MegaPose -from happypose.pose_estimators.megapose.src.megapose.config import BOP_TOOLKIT_DIR, LOCAL_DATA_DIR, PROJECT_DIR -from happypose.pose_estimators.megapose.src.megapose.evaluation.eval_config import BOPEvalConfig +from happypose.pose_estimators.megapose.src.megapose.config import ( + BOP_TOOLKIT_DIR, + LOCAL_DATA_DIR, + PROJECT_DIR, +) +from happypose.pose_estimators.megapose.src.megapose.evaluation.eval_config import ( + BOPEvalConfig, +) # Note we are actually using the bop_toolkit_lib that is directly conda installed # inside the docker image. This is just to access the scripts. @@ -42,7 +46,6 @@ # Third Party -import bop_toolkit_lib from bop_toolkit_lib import inout # noqa @@ -84,22 +87,21 @@ def convert_results_to_coco(results_path, out_json_path, detection_method): category_id = int(row.label.split("_")[-1]) mask = predictions.masks[n].numpy().astype(np.uint8) rle = binary_mask_to_polygon(mask) - info = dict( - scene_id=int(row.scene_id), - view_id=int(row.view_id), - category_id=category_id, - bbox=[x, y, w, h], - score=score, - segmentation=rle, - ) + info = { + "scene_id": int(row.scene_id), + "view_id": int(row.view_id), + "category_id": category_id, + "bbox": [x, y, w, h], + "score": score, + "segmentation": rle, + } infos.append(info) Path(out_json_path).write_text(json.dumps(infos)) return def convert_results_to_bop( - results_path: Path, out_csv_path: Path, method: str, - use_pose_score: bool = True + results_path: Path, out_csv_path: Path, method: str, use_pose_score: bool = True, ): predictions = torch.load(results_path)["predictions"] predictions = predictions[method] @@ -122,27 +124,28 @@ def convert_results_to_bop( time = row.time else: time = -1 - pred = dict( - scene_id=row.scene_id, - im_id=row.view_id, - obj_id=obj_id, - score=score, - t=t, - R=R, - time=time, - ) + pred = { + "scene_id": row.scene_id, + "im_id": row.view_id, + "obj_id": obj_id, + "score": score, + "t": t, + "R": R, + "time": time, + } preds.append(pred) print("Wrote:", out_csv_path) Path(out_csv_path).parent.mkdir(exist_ok=True) inout.save_bop_results(out_csv_path, preds) return out_csv_path + def _run_bop_evaluation(filename, eval_dir, eval_detection=False, dummy=False): myenv = os.environ.copy() myenv["PYTHONPATH"] = BOP_TOOLKIT_DIR.as_posix() - ld_library_path = os.environ['LD_LIBRARY_PATH'] - conda_prefix = os.environ['CONDA_PREFIX'] - myenv["LD_LIBRARY_PATH"] = f'{conda_prefix}/lib:{ld_library_path}' + ld_library_path = os.environ["LD_LIBRARY_PATH"] + conda_prefix = os.environ["CONDA_PREFIX"] + myenv["LD_LIBRARY_PATH"] = f"{conda_prefix}/lib:{ld_library_path}" myenv["BOP_DATASETS_PATH"] = str(LOCAL_DATA_DIR / "bop_datasets") myenv["BOP_RESULTS_PATH"] = str(eval_dir) myenv["BOP_EVAL_PATH"] = str(eval_dir) @@ -199,7 +202,9 @@ def run_evaluation(cfg: BOPEvalConfig) -> None: if not cfg.convert_only: _run_bop_evaluation(csv_path, cfg.eval_dir, eval_detection=False) - scores_pose_path = eval_dir / csv_path.with_suffix("").name / "scores_bop19.json" + scores_pose_path = ( + eval_dir / csv_path.with_suffix("").name / "scores_bop19.json" + ) scores_detection_path = None if cfg.detection_method is not None: diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/data_utils.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/data_utils.py index 08dbdbca..259df05a 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/data_utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/data_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,11 +35,13 @@ def parse_obs_data( """Parses object data into PandasTensorCollection. Args: + ---- obs: The scene observation. object_labels: If specified will only parse information for these object labels. Returns: + ------- PandasTensorCollection infos: pd.DataFrame with fields ['label', 'scene_id', 'view_id', 'visib_fract'] @@ -52,23 +53,23 @@ def parse_obs_data( masks: (optional) """ - - raise ValueError("This function is deprecated.") + msg = "This function is deprecated." + raise ValueError(msg) infos = [] TWO = [] bboxes = [] masks = [] TWC = torch.as_tensor(obs.camera_data.TWC.matrix).float() - for n, obj_data in enumerate(obs.object_datas): + for _n, obj_data in enumerate(obs.object_datas): if object_labels is not None and obj_data.label not in object_labels: continue - info = dict( - label=obj_data.label, - scene_id=obs.infos.scene_id, - view_id=obs.infos.view_id, - visib_fract=getattr(obj_data, "visib_fract", 1), - ) + info = { + "label": obj_data.label, + "scene_id": obs.infos.scene_id, + "view_id": obs.infos.view_id, + "visib_fract": getattr(obj_data, "visib_fract", 1), + } infos.append(info) TWO.append(torch.tensor(obj_data.TWO.matrix).float()) bboxes.append(torch.tensor(obj_data.bbox_modal).float()) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/eval_config.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/eval_config.py index bffa46ee..60606acd 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/eval_config.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/eval_config.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,7 +19,9 @@ from typing import List, Optional # MegaPose -from happypose.pose_estimators.megapose.src.megapose.inference.types import InferenceConfig +from happypose.pose_estimators.megapose.src.megapose.inference.types import ( + InferenceConfig, +) BOP_TEST_DATASETS = [ "lmo.bop19", @@ -41,7 +42,7 @@ class HardwareConfig: @dataclass class EvalConfig: - """Eval Config + """Eval Config. Two options for creating an eval configuration: 1. Create it manually, and set `run_id`. @@ -84,7 +85,6 @@ class EvalConfig: @dataclass class FullEvalConfig(EvalConfig): - # Full eval detection_coarse_types: Optional[List] = None ds_names: Optional[List[str]] = None @@ -94,7 +94,6 @@ class FullEvalConfig(EvalConfig): @dataclass class BOPEvalConfig: - results_path: str dataset: str split: str diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation.py index 2aa2b2d8..cf9ca50b 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,7 +24,6 @@ # MegaPose import happypose -import happypose.pose_estimators.megapose.src.megapose as megapose import happypose.pose_estimators.megapose.src.megapose.evaluation.evaluation_runner import happypose.toolbox.datasets.datasets_cfg import happypose.toolbox.inference.utils @@ -70,15 +68,17 @@ def generate_save_key(detection_type: str, coarse_estimation_type: str) -> str: def get_save_dir(cfg: EvalConfig) -> Path: """Returns a save dir. - Example - + Example: + ------- .../ycbv.bop19/gt+SO3_grid You must remove the '.bop19' from the name in order for the bop_toolkit_lib to process it correctly. """ - save_key = generate_save_key(cfg.inference.detection_type, cfg.inference.coarse_estimation_type) + save_key = generate_save_key( + cfg.inference.detection_type, cfg.inference.coarse_estimation_type, + ) assert cfg.save_dir is not None assert cfg.ds_name is not None @@ -99,12 +99,14 @@ def run_eval( cfg.save_dir / ds_name / eval_key / results.pth.tar - Returns: + Returns + ------- dict: If you are rank_0 process, otherwise returns None """ - - save_key = generate_save_key(cfg.inference.detection_type, cfg.inference.coarse_estimation_type) + save_key = generate_save_key( + cfg.inference.detection_type, cfg.inference.coarse_estimation_type, + ) if save_dir is None: save_dir = get_save_dir(cfg) @@ -113,22 +115,31 @@ def run_eval( logger.info(f"Running eval on ds_name={cfg.ds_name} with setting={save_key}") # Load the dataset - ds_kwargs = dict(load_depth=True) - scene_ds = happypose.toolbox.datasets.datasets_cfg.make_scene_dataset(cfg.ds_name, **ds_kwargs) - urdf_ds_name, obj_ds_name = happypose.toolbox.datasets.datasets_cfg.get_obj_ds_info(cfg.ds_name) + ds_kwargs = {"load_depth": True} + scene_ds = happypose.toolbox.datasets.datasets_cfg.make_scene_dataset( + cfg.ds_name, **ds_kwargs, + ) + urdf_ds_name, obj_ds_name = happypose.toolbox.datasets.datasets_cfg.get_obj_ds_info( + cfg.ds_name, + ) # drop frames if this was specified if cfg.n_frames is not None: - scene_ds.frame_index = scene_ds.frame_index[: cfg.n_frames].reset_index(drop=True) + scene_ds.frame_index = scene_ds.frame_index[: cfg.n_frames].reset_index( + drop=True, + ) # Load detector model if cfg.inference.detection_type == "detector": assert cfg.detector_run_id is not None - detector_model = happypose.toolbox.inference.utils.load_detector(cfg.detector_run_id) + detector_model = happypose.toolbox.inference.utils.load_detector( + cfg.detector_run_id, + ) elif cfg.inference.detection_type == "gt": detector_model = None else: - raise ValueError(f"Unknown detection_type={cfg.inference.detection_type}") + msg = f"Unknown detection_type={cfg.inference.detection_type}" + raise ValueError(msg) # Load the coarse and mrefiner models # Needed to deal with the fact that str and Optional[str] are incompatible types. @@ -148,15 +159,17 @@ def run_eval( """ object_ds = make_object_dataset(obj_ds_name) - - coarse_model, refiner_model, mesh_db = happypose.toolbox.inference.utils.load_pose_models( + ( + coarse_model, + refiner_model, + mesh_db, + ) = happypose.toolbox.inference.utils.load_pose_models( coarse_run_id=cfg.coarse_run_id, refiner_run_id=cfg.refiner_run_id, object_dataset=object_ds, force_panda3d_renderer=True, ) - renderer = refiner_model.renderer if cfg.inference.run_depth_refiner: @@ -201,7 +214,7 @@ def run_eval( # Compute eval metrics # TODO (lmanuelli): Fix this up. # TODO (ylabbe): Clean this. - eval_metrics, eval_dfs = dict(), dict() + eval_metrics, eval_dfs = {}, {} if not cfg.skip_evaluation: assert "modelnet" in cfg.ds_name object_ds = make_object_dataset(obj_ds_name) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation_runner.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation_runner.py index fe9f9ff2..182ca092 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation_runner.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/evaluation_runner.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,17 +26,21 @@ # MegaPose import happypose.toolbox.utils.tensor_collection as tc -from happypose.pose_estimators.megapose.src.megapose.evaluation.data_utils import ( - parse_obs_data, -) from happypose.toolbox.datasets.samplers import DistributedSceneSampler -from happypose.toolbox.datasets.scene_dataset import SceneDataset, SceneObservation +from happypose.toolbox.datasets.scene_dataset import SceneObservation from happypose.toolbox.utils.distributed import get_rank, get_tmp_dir, get_world_size class EvaluationRunner: - def __init__(self, scene_ds, meters, batch_size=64, cache_data=True, n_workers=4, sampler=None): - + def __init__( + self, + scene_ds, + meters, + batch_size=64, + cache_data=True, + n_workers=4, + sampler=None, + ): self.rank = get_rank() self.world_size = get_world_size() self.tmp_dir = get_tmp_dir() @@ -45,7 +48,7 @@ def __init__(self, scene_ds, meters, batch_size=64, cache_data=True, n_workers=4 self.scene_ds = scene_ds if sampler is None: sampler = DistributedSceneSampler( - scene_ds, num_replicas=self.world_size, rank=self.rank, shuffle=True + scene_ds, num_replicas=self.world_size, rank=self.rank, shuffle=True, ) dataloader = DataLoader( scene_ds, @@ -62,17 +65,17 @@ def __init__(self, scene_ds, meters, batch_size=64, cache_data=True, n_workers=4 self.meters = meters self.meters = OrderedDict( - {k: v for k, v in sorted(self.meters.items(), key=lambda item: item[0])} + dict(sorted(self.meters.items(), key=lambda item: item[0])), ) @staticmethod def make_empty_predictions(): - infos = dict( - view_id=np.empty(0, dtype=np.int), - scene_id=np.empty(0, dtype=np.int), - label=np.empty(0, dtype=np.object), - score=np.empty(0, dtype=np.float), - ) + infos = { + "view_id": np.empty(0, dtype=np.int), + "scene_id": np.empty(0, dtype=np.int), + "label": np.empty(0, dtype=np.object), + "score": np.empty(0, dtype=np.float), + } poses = torch.empty(0, 4, 4, dtype=torch.float) return tc.PandasTensorCollection(infos=pd.DataFrame(infos), poses=poses) @@ -83,12 +86,12 @@ def evaluate(self, obj_predictions, device="cuda"): meter.reset() obj_predictions = obj_predictions.to(device) for data in tqdm(self.dataloader): - for k, meter in self.meters.items(): + for _k, meter in self.meters.items(): meter.add(obj_predictions, data["gt_data"].to(device)) return self.summary() def summary(self): - summary, dfs = dict(), dict() + summary, dfs = {}, {} for meter_k, meter in self.meters.items(): if len(meter.datas) > 0: meter.gather_distributed(tmp_dir=self.tmp_dir) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/__init__.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/base.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/base.py index d1998982..2e8f16df 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/base.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/base.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from collections import defaultdict from pathlib import Path diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/lf_utils.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/lf_utils.py index c170da84..96f445ed 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/lf_utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/lf_utils.py @@ -1,27 +1,36 @@ import torch from torch.nn import functional as F + def normalize(quaternion: torch.Tensor, eps: float = 1e-12) -> torch.Tensor: r"""Normalizes a quaternion. The quaternion should be in (x, y, z, w) format. + Args: + ---- quaternion (torch.Tensor): a tensor containing a quaternion to be normalized. The tensor can be of shape :math:`(*, 4)`. eps (Optional[bool]): small value to avoid division by zero. Default: 1e-12. + Return: + ------ torch.Tensor: the normalized quaternion of shape :math:`(*, 4)`. """ if not isinstance(quaternion, torch.Tensor): - raise TypeError("Input type is not a torch.Tensor. Got {}".format( - type(quaternion))) + msg = f"Input type is not a torch.Tensor. Got {type(quaternion)}" + raise TypeError( + msg, + ) if not quaternion.shape[-1] == 4: + msg = f"Input must be a tensor of shape (*, 4). Got {quaternion.shape}" raise ValueError( - "Input must be a tensor of shape (*, 4). Got {}".format( - quaternion.shape)) + msg, + ) return F.normalize(quaternion, p=2.0, dim=-1, eps=eps) + def angular_distance(q1, q2, eps: float = 1e-7): q1 = normalize(q1) q2 = normalize(q2) @@ -32,4 +41,4 @@ def angular_distance(q1, q2, eps: float = 1e-7): @torch.jit.script def acos_safe(t, eps: float = 1e-7): - return torch.acos(torch.clamp(t, min=-1.0 + eps, max=1.0 - eps)) \ No newline at end of file + return torch.acos(torch.clamp(t, min=-1.0 + eps, max=1.0 - eps)) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/modelnet_meters.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/modelnet_meters.py index 8aa66f1e..0f694ac1 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/modelnet_meters.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/modelnet_meters.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,15 +32,16 @@ from happypose.toolbox.lib3d.camera_geometry import project_points from happypose.toolbox.lib3d.distances import dists_add from happypose.toolbox.lib3d.transform import Transform -from happypose.toolbox.lib3d.transform_ops import transform_pts -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") class ModelNetErrorMeter(Meter): def __init__(self, mesh_db, sample_n_points=None): self.reset() - self.mesh_db = mesh_db.batched(resample_n_points=sample_n_points).to(device).float() + self.mesh_db = ( + mesh_db.batched(resample_n_points=sample_n_points).to(device).float() + ) def is_data_valid(self, data): valid = False @@ -52,9 +52,12 @@ def is_data_valid(self, data): def add(self, pred_data, gt_data): pred_data = pred_data.float() gt_data = gt_data.float() - + matches = one_to_one_matching( - pred_data.infos, gt_data.infos, keys=("scene_id", "view_id"), allow_pred_missing=False + pred_data.infos, + gt_data.infos, + keys=("scene_id", "view_id"), + allow_pred_missing=False, ) pred_data = pred_data[matches.pred_id] @@ -84,7 +87,7 @@ def add(self, pred_data, gt_data): uv_dists = torch.norm(uv_pred - uv_gt, dim=-1) uv_avg = uv_dists.mean() - df = xr.Dataset(matches).rename(dict(dim_0="match_id")) + df = xr.Dataset(matches).rename({"dim_0": "match_id"}) df["add"] = "match_id", np.array([dist_add.item()]) df["diameter"] = "match_id", np.array([diameter_1]) df["proj_error"] = "match_id", np.array([uv_avg.item()]) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/utils.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/utils.py index f586b97e..107ade95 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/meters/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,9 +14,7 @@ """ - # Standard Library -from collections import OrderedDict # Third Party import numpy as np @@ -25,7 +22,7 @@ def one_to_one_matching( - pred_infos, gt_infos, keys=("scene_id", "view_id"), allow_pred_missing=False + pred_infos, gt_infos, keys=("scene_id", "view_id"), allow_pred_missing=False, ): keys = list(keys) pred_infos["pred_id"] = np.arange(len(pred_infos)) @@ -36,30 +33,34 @@ def one_to_one_matching( print("matches_gb =", matches_gb) for v in matches_gb.values(): print("v matched = ", v) - assert all([len(v) == 1 for v in matches_gb.values()]) + assert all(len(v) == 1 for v in matches_gb.values()) if not allow_pred_missing: assert len(matches) == len(gt_infos) return matches -def add_inst_num(infos, group_keys=["scene_id", "view_id", "label"], key="pred_inst_num"): - +def add_inst_num( + infos, group_keys=["scene_id", "view_id", "label"], key="pred_inst_num", +): inst_num = np.empty(len(infos), dtype=int) - for group_name, group_ids in infos.groupby(group_keys).groups.items(): + for _group_name, group_ids in infos.groupby(group_keys).groups.items(): inst_num[group_ids.values] = np.arange(len(group_ids)) infos[key] = inst_num return infos def get_top_n_ids( - infos, group_keys=("scene_id", "view_id", "label"), top_key="score", n_top=-1, targets=None + infos, + group_keys=("scene_id", "view_id", "label"), + top_key="score", + n_top=-1, + targets=None, ): - infos["id_before_top_n"] = np.arange(len(infos)) group_keys = list(group_keys) if targets is not None: - targets_inst_count = dict() + targets_inst_count = {} for k, ids in targets.groupby(group_keys).groups.items(): targets_inst_count[k] = targets.loc[ids[0], "inst_count"] @@ -87,18 +88,17 @@ def get_top_n(group_k): def add_valid_gt( - gt_infos, group_keys=("scene_id", "view_id", "label"), visib_gt_min=-1, targets=None + gt_infos, group_keys=("scene_id", "view_id", "label"), visib_gt_min=-1, targets=None, ): - if visib_gt_min > 0: gt_infos["valid"] = gt_infos["visib_fract"] >= visib_gt_min if targets is not None: gt_infos["valid"] = np.logical_and( - gt_infos["valid"], np.isin(gt_infos["label"], targets["label"]) + gt_infos["valid"], np.isin(gt_infos["label"], targets["label"]), ) elif targets is not None: valid_ids = get_top_n_ids( - gt_infos, group_keys=group_keys, top_key="visib_fract", targets=targets + gt_infos, group_keys=group_keys, top_key="visib_fract", targets=targets, ) gt_infos["valid"] = False gt_infos.loc[valid_ids, "valid"] = True @@ -108,7 +108,7 @@ def add_valid_gt( def get_candidate_matches( - pred_infos, gt_infos, group_keys=["scene_id", "view_id", "label"], only_valids=True + pred_infos, gt_infos, group_keys=["scene_id", "view_id", "label"], only_valids=True, ): pred_infos["pred_id"] = np.arange(len(pred_infos)) gt_infos["gt_id"] = np.arange(len(gt_infos)) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/prediction_runner.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/prediction_runner.py index ede37c42..91b1a6b4 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/prediction_runner.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/prediction_runner.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -55,13 +54,14 @@ def __init__( batch_size: int = 1, n_workers: int = 4, ) -> None: - self.inference_cfg = inference_cfg self.rank = get_rank() self.world_size = get_world_size() self.tmp_dir = get_tmp_dir() - sampler = DistributedSceneSampler(scene_ds, num_replicas=self.world_size, rank=self.rank) + sampler = DistributedSceneSampler( + scene_ds, num_replicas=self.world_size, rank=self.rank, + ) self.sampler = sampler self.scene_ds = scene_ds dataloader = DataLoader( @@ -92,7 +92,6 @@ def run_inference_pipeline( """ - if self.inference_cfg.detection_type == "gt": detections = gt_detections run_detector = False @@ -100,13 +99,18 @@ def run_inference_pipeline( detections = None run_detector = True else: - raise ValueError(f"Unknown detection type {self.inference_cfg.detection_type}") + msg = f"Unknown detection type {self.inference_cfg.detection_type}" + raise ValueError( + msg, + ) coarse_estimates = None if self.inference_cfg.coarse_estimation_type == "external": # TODO (ylabbe): This is hacky, clean this for modelnet eval. coarse_estimates = initial_estimates - coarse_estimates = happypose.toolbox.inference.utils.add_instance_id(coarse_estimates) + coarse_estimates = happypose.toolbox.inference.utils.add_instance_id( + coarse_estimates, + ) coarse_estimates.infos["instance_id"] = 0 run_detector = False @@ -122,7 +126,7 @@ def run_inference_pipeline( bsz_images=self.inference_cfg.bsz_images, bsz_objects=self.inference_cfg.bsz_objects, ) - elapsed = time.time() - t + time.time() - t # TODO (lmanuelli): Process this into a dict with keys like # - 'refiner/iteration=1` @@ -131,7 +135,7 @@ def run_inference_pipeline( # Note: Since we support multi-hypotheses we need to potentially # go back and extract out the 'refiner/iteration=1`, `refiner/iteration=5` things for the ones that were actually the highest scoring at the end. - all_preds = dict() + all_preds = {} data_TCO_refiner = extra_data["refiner"]["preds"] all_preds = { @@ -142,10 +146,10 @@ def run_inference_pipeline( } if self.inference_cfg.run_depth_refiner: - all_preds[f"depth_refiner"] = extra_data["depth_refiner"]["preds"] + all_preds["depth_refiner"] = extra_data["depth_refiner"]["preds"] # Remove any mask tensors - for k, v in all_preds.items(): + for _k, v in all_preds.items(): v.infos["scene_id"] = np.unique(gt_detections.infos["scene_id"]).item() v.infos["view_id"] = np.unique(gt_detections.infos["view_id"]).item() if "mask" in v.tensors: @@ -153,8 +157,10 @@ def run_inference_pipeline( return all_preds - def get_predictions(self, pose_estimator: PoseEstimator) -> Dict[str, PoseEstimatesType]: - """Runs predictions + def get_predictions( + self, pose_estimator: PoseEstimator, + ) -> Dict[str, PoseEstimatesType]: + """Runs predictions. Returns: A dict with keys - 'refiner/iteration=1` @@ -165,10 +171,8 @@ def get_predictions(self, pose_estimator: PoseEstimator) -> Dict[str, PoseEstima """ - predictions_list = defaultdict(list) for n, data in enumerate(tqdm(self.dataloader)): - # data is a dict rgb = data["rgb"] depth = data["depth"] @@ -186,23 +190,29 @@ def get_predictions(self, pose_estimator: PoseEstimator) -> Dict[str, PoseEstima if n == 0: with torch.no_grad(): self.run_inference_pipeline( - pose_estimator, obs_tensor, gt_detections, initial_estimates=initial_data + pose_estimator, + obs_tensor, + gt_detections, + initial_estimates=initial_data, ) cuda_timer = CudaTimer() cuda_timer.start() with torch.no_grad(): all_preds = self.run_inference_pipeline( - pose_estimator, obs_tensor, gt_detections, initial_estimates=initial_data + pose_estimator, + obs_tensor, + gt_detections, + initial_estimates=initial_data, ) cuda_timer.end() - duration = cuda_timer.elapsed() + cuda_timer.elapsed() for k, v in all_preds.items(): predictions_list[k].append(v) # Concatenate the lists of PandasTensorCollections - predictions = dict() + predictions = {} for k, v in predictions_list.items(): predictions[k] = tc.concatenate(v) diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/runner_utils.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/runner_utils.py index 903b1258..2e90079d 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/runner_utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/runner_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from collections import OrderedDict, defaultdict @@ -30,7 +28,7 @@ def run_pred_eval(pred_runner, pred_kwargs, eval_runner, eval_preds=None): - all_predictions = dict() + all_predictions = {} for pred_prefix, pred_kwargs_n in pred_kwargs.items(): print("Prediction :", pred_prefix) preds = pred_runner.get_predictions(**pred_kwargs_n) @@ -38,9 +36,9 @@ def run_pred_eval(pred_runner, pred_kwargs, eval_runner, eval_preds=None): all_predictions[f"{pred_prefix}/{preds_name}"] = preds_n all_predictions = OrderedDict( - {k: v for k, v in sorted(all_predictions.items(), key=lambda item: item[0])} + dict(sorted(all_predictions.items(), key=lambda item: item[0])), ) - eval_metrics, eval_dfs = dict(), dict() + eval_metrics, eval_dfs = {}, {} for preds_k, preds in all_predictions.items(): print("Evaluation :", preds_k) @@ -63,7 +61,7 @@ def gather_predictions(all_predictions): def format_results(predictions, eval_metrics, eval_dfs, print_metrics=True): - summary = dict() + summary = {} df = defaultdict(list) summary_txt = "" for k, v in eval_metrics.items(): @@ -79,12 +77,12 @@ def format_results(predictions, eval_metrics, eval_dfs, print_metrics=True): logger.info(summary_txt) df = pd.DataFrame(df) - results = dict( - summary=summary, - summary_txt=summary_txt, - predictions=predictions, - metrics=eval_metrics, - summary_df=df, - dfs=eval_dfs, - ) + results = { + "summary": summary, + "summary_txt": summary_txt, + "predictions": predictions, + "metrics": eval_metrics, + "summary_df": df, + "dfs": eval_dfs, + } return results diff --git a/happypose/pose_estimators/megapose/src/megapose/evaluation/utils.py b/happypose/pose_estimators/megapose/src/megapose/evaluation/utils.py index d5d8c533..e11b279d 100644 --- a/happypose/pose_estimators/megapose/src/megapose/evaluation/utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/evaluation/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -48,13 +47,12 @@ def get_symmetry_transformations_torch(trans_list): def compute_pose_error(T1, T2): - """ - Args: + """Args: + ---- Two sets of poses in world frame T1: [B,4,4] - T2: [B,4,4] + T2: [B,4,4]. """ - trans_err = torch.linalg.norm(T1[..., :3, 3] - T2[..., :3, 3], dim=-1) R1 = T1[..., :3, :3] R2 = T2[..., :3, :3] @@ -67,27 +65,25 @@ def compute_pose_error(T1, T2): def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): - """ - Compute the errors between gt_pose and predicted pose. + """Compute the errors between gt_pose and predicted pose. Args: - + ---- preds: This is results['predictions'] where results is from results.pth.tar method: The type of method we should use for evaluation methods: str, e.g. 'gt_detections+coarse_init' """ - preds_gt = preds[f"{method}/ground_truth"] TCO_gt = preds_gt.poses.cuda() # [B,4,4] device = TCO_gt.device - TOC_gt = torch.linalg.inv(TCO_gt) + torch.linalg.inv(TCO_gt) for key, p in preds.items(): if not key.startswith(method): continue - if re.search("refiner/iteration=\d*$", key) or re.search("refiner/init$", key): + if re.search("refiner/iteration=\\d*$", key) or re.search("refiner/init$", key): pass else: continue @@ -97,7 +93,7 @@ def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): object_labels = p.infos.label.unique() object_labels.sort() - obj_info_dict = dict() + obj_info_dict = {} for val in obj_dataset.objects: obj_info_dict[val["label"]] = val @@ -109,7 +105,7 @@ def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): bop_info = obj_info["bop_info"] max_sym_rot_step = np.deg2rad(max_sym_rot_step_deg) trans_list = get_symmetry_transformations( - bop_info, max_sym_disc_step=max_sym_rot_step + bop_info, max_sym_disc_step=max_sym_rot_step, ) syms = get_symmetry_transformations_torch(trans_list) else: @@ -123,7 +119,6 @@ def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): TCO_pred_obj = p.poses[idx_list].cuda() TCO_gt_obj = TCO_gt[idx_list] - # Assumes symmetries don't have any offsets pts = create_default_object_pts().to(device) mssd_out = mssd_torch(TCO_pred_obj, TCO_gt_obj, pts, syms) @@ -137,13 +132,11 @@ def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): p.infos.loc[idx_list, "trans_err"] = trans_err.tolist() p.infos.loc[idx_list, "rot_err_deg"] = roterr_deg.tolist() - - p_init = preds[f"{method}/refiner/init"] for key, p in preds.items(): if not key.startswith(method): continue - if re.search("refiner/iteration=\d*$", key): + if re.search("refiner/iteration=\\d*$", key): pass else: continue @@ -155,7 +148,7 @@ def compute_errors(preds, method, obj_dataset, max_sym_rot_step_deg=1): def create_plots(result_name): - """Make the png figures from the""" + """Make the png figures from the.""" pass @@ -178,6 +171,7 @@ def mssd_torch(T_est, T_gt, pts, syms): Based on https://github.com/thodan/bop_toolkit/blob/master/bop_toolkit_lib/pose_error.py#L96 Args: + ---- T_est: [B,4,4] tensor, estimated pose T_gt: [B,4,4] tensor, ground-truth pose pts: [N,3] tensor, 3D model points @@ -185,6 +179,7 @@ def mssd_torch(T_est, T_gt, pts, syms): Returns: + ------- err: [B,] mssd T_gt_sym: [B,4,4] the closest symmetry aware transform sym: [B,4,4] symmetry transform that led to T_gt_sym @@ -239,19 +234,20 @@ def mssd_torch(T_est, T_gt, pts, syms): def load_zephyr_hypotheses(ds_name, device="cuda", debug=False, hypotheses_type="all"): - """Load Zephyr ppf hypotheses (and SIFT) + """Load Zephyr ppf hypotheses (and SIFT). Args: + ---- ds_name: str ['ycbv.bop19', 'lmo.bop19'] hypotheses_type: ['all', 'ppf', 'sift'] Returns: + ------- PandasTensorCollection: poses: [N,4,4] infos: has columns ['pose_hypothesis_id'] """ - assert hypotheses_type in ["ppf", "sift", "all"] zephyr_dir = LOCAL_DATA_DIR / "external_detections/zephyr" if ds_name == "ycbv.bop19": @@ -259,7 +255,8 @@ def load_zephyr_hypotheses(ds_name, device="cuda", debug=False, hypotheses_type= elif ds_name == "lmo.bop19": fname = zephyr_dir / f"lmo_test_pose_hypotheses_{hypotheses_type}.pth" else: - raise ValueError(f"Unknown dataset {ds_name}") + msg = f"Unknown dataset {ds_name}" + raise ValueError(msg) p = torch.load(fname) p.infos = p.infos.rename(columns={"object_label": "label"}) @@ -268,7 +265,7 @@ def load_zephyr_hypotheses(ds_name, device="cuda", debug=False, hypotheses_type= def load_ppf_hypotheses(ds_name, device="cuda", debug=False): - """Load Zephyr ppf hypotheses + """Load Zephyr ppf hypotheses. The columns of the dataframe are @@ -282,7 +279,8 @@ def load_ppf_hypotheses(ds_name, device="cuda", debug=False): elif ds_name == "lmo.bop19": fname = zephyr_dir / "lmo_list_bop_test_v1.txt" else: - raise ValueError(f"Unknown dataset {ds_name}") + msg = f"Unknown dataset {ds_name}" + raise ValueError(msg) df = pd.read_csv(fname, delim_whitespace=True) @@ -341,14 +339,15 @@ def load_dtoid_detections(ds_name): elif ds_name == "lm.bop19": fname = dtoid_dir / "lm_preds.csv" else: - raise ValueError(f"Unknown dataset {ds_name}") + msg = f"Unknown dataset {ds_name}" + raise ValueError(msg) df = pd.read_csv(fname) def parse_image_fn(image_fn): ds, split, scene_id, modality, ext = image_fn.split("/") scene_id = int(scene_id) view_id = int(ext.split(".")[0]) - return dict(scene_id=scene_id, view_id=view_id) + return {"scene_id": scene_id, "view_id": view_id} x1 = df.loc[:, "x"].values y1 = df.loc[:, "y"].values @@ -358,7 +357,9 @@ def parse_image_fn(image_fn): infos = pd.DataFrame([parse_image_fn(image_fn) for image_fn in df["image_fn"]]) infos.loc[:, "label"] = [f"obj_{object_id:06d}" for object_id in df["object_id"]] infos.loc[:, "score"] = -1 - bboxes = np.concatenate([x1[:, None], y1[:, None], x2[:, None], y2[:, None]], axis=1) + bboxes = np.concatenate( + [x1[:, None], y1[:, None], x2[:, None], y2[:, None]], axis=1, + ) bboxes = torch.tensor(bboxes).float() ids_valids = (bboxes >= 0).all(dim=1).nonzero().flatten().tolist() bboxes = bboxes[ids_valids] @@ -368,21 +369,21 @@ def parse_image_fn(image_fn): return detections -def compute_errors_single_object(TCO_gt, TCO_pred, obj_label, obj_dataset, max_sym_rot_step_deg=1): - """ - Compute the errors between gt_pose and predicted pose. +def compute_errors_single_object( + TCO_gt, TCO_pred, obj_label, obj_dataset, max_sym_rot_step_deg=1, +): + """Compute the errors between gt_pose and predicted pose. Args: - + ---- TCO_gt: [4,4] The pose you want to compute error relative to poses: [B,4,4] obj_dataset: """ - device = TCO_pred.device B = TCO_pred.shape[0] - obj_info_dict = dict() + obj_info_dict = {} for val in obj_dataset.objects: obj_info_dict[val["label"]] = val @@ -391,7 +392,9 @@ def compute_errors_single_object(TCO_gt, TCO_pred, obj_label, obj_dataset, max_s if obj_info["is_symmetric"]: bop_info = obj_info["bop_info"] max_sym_rot_step = np.deg2rad(max_sym_rot_step_deg) - trans_list = get_symmetry_transformations(bop_info, max_sym_disc_step=max_sym_rot_step) + trans_list = get_symmetry_transformations( + bop_info, max_sym_disc_step=max_sym_rot_step, + ) syms = get_symmetry_transformations_torch(trans_list) else: syms = torch.eye(4, device=device).unsqueeze(0) diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/__init__.py b/happypose/pose_estimators/megapose/src/megapose/inference/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/depth_refiner.py b/happypose/pose_estimators/megapose/src/megapose/inference/depth_refiner.py index a552aa49..49034ec3 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/depth_refiner.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/depth_refiner.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -38,6 +37,7 @@ def refine_poses( """Run the depth refinement. Args: + ---- predictions: len(predictions) = N, index into depth, masks, K using the batch_im_id field. depth: [B, H, W] diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/detector.py b/happypose/pose_estimators/megapose/src/megapose/inference/detector.py index 8a5cd8c1..9bf97a0b 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/detector.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/detector.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from typing import Any, Optional @@ -28,9 +26,9 @@ # MegaPose import happypose.pose_estimators.megapose.src.megapose import happypose.toolbox.utils.tensor_collection as tc +from happypose.toolbox.inference.detector import DetectorModule from happypose.toolbox.inference.types import DetectionsType, ObservationTensor -from happypose.toolbox.inference.detector import DetectorModule class Detector(DetectorModule): def __init__(self, model: torch.nn.Module) -> None: @@ -38,7 +36,9 @@ def __init__(self, model: torch.nn.Module) -> None: self.model = model self.model.eval() self.config = model.config - self.category_id_to_label = {v: k for k, v in self.config.label_to_category_id.items()} + self.category_id_to_label = { + v: k for k, v in self.config.label_to_category_id.items() + } def image_tensor_from_numpy( self, @@ -46,11 +46,12 @@ def image_tensor_from_numpy( ) -> torch.tensor: """Convert numpy image to torch tensor. - Args: + ---- rgb: [H,W,3] Returns: + ------- rgb_tensor: [3,H,W] torch.tensor with dtype torch.float """ assert rgb.dtype == np.uint8 @@ -74,6 +75,7 @@ def get_detections( """Runs the detector on the given images. Args: + ---- detection_th: If specified only keep detections above this threshold. mask_th: Threshold to use when computing masks @@ -82,28 +84,28 @@ def get_detections( """ - # [B,3,H,W] RGB_DIMS = [0, 1, 2] images = observation.images[:, RGB_DIMS] # TODO (lmanuelli): Why are we splitting this up into a list of tensors? - outputs_ = self.model([image_n for image_n in images]) + outputs_ = self.model(list(images)) infos = [] bboxes = [] masks = [] for n, outputs_n in enumerate(outputs_): outputs_n["labels"] = [ - self.category_id_to_label[category_id.item()] for category_id in outputs_n["labels"] + self.category_id_to_label[category_id.item()] + for category_id in outputs_n["labels"] ] for obj_id in range(len(outputs_n["boxes"])): bbox = outputs_n["boxes"][obj_id] - info = dict( - batch_im_id=n, - label=outputs_n["labels"][obj_id], - score=outputs_n["scores"][obj_id].item(), - ) + info = { + "batch_im_id": n, + "label": outputs_n["labels"][obj_id], + "score": outputs_n["scores"][obj_id].item(), + } mask = outputs_n["masks"][obj_id, 0] > mask_th bboxes.append(torch.as_tensor(bbox)) masks.append(torch.as_tensor(mask)) @@ -113,9 +115,11 @@ def get_detections( bboxes = torch.stack(bboxes).cuda().float() masks = torch.stack(masks).cuda() else: - infos = dict(score=[], label=[], batch_im_id=[]) + infos = {"score": [], "label": [], "batch_im_id": []} bboxes = torch.empty(0, 4).cuda().float() - masks = torch.empty(0, images.shape[1], images.shape[2], dtype=torch.bool).cuda() + masks = torch.empty( + 0, images.shape[1], images.shape[2], dtype=torch.bool, + ).cuda() outputs = tc.PandasTensorCollection( infos=pd.DataFrame(infos), @@ -130,7 +134,7 @@ def get_detections( # Keep only the top-detection for each class label if one_instance_per_class: outputs = happypose.toolbox.inference.utils.filter_detections( - outputs, one_instance_per_class=True + outputs, one_instance_per_class=True, ) # Add instance_id column to dataframe diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/icp_refiner.py b/happypose/pose_estimators/megapose/src/megapose/inference/icp_refiner.py index 14d6738d..9202e290 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/icp_refiner.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/icp_refiner.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,7 +15,7 @@ # Standard Library -from typing import List, Optional, Tuple +from typing import Optional, Tuple # Third Party import cv2 @@ -25,20 +24,23 @@ from scipy import ndimage # MegaPose -from happypose.pose_estimators.megapose.src.megapose.config import DEBUG_DATA_DIR -from happypose.pose_estimators.megapose.src.megapose.inference.depth_refiner import DepthRefiner -from happypose.pose_estimators.megapose.src.megapose.inference.refiner_utils import compute_masks +from happypose.pose_estimators.megapose.src.megapose.inference.depth_refiner import ( + DepthRefiner, +) +from happypose.pose_estimators.megapose.src.megapose.inference.refiner_utils import ( + compute_masks, +) from happypose.toolbox.inference.types import PoseEstimatesType from happypose.toolbox.lib3d.rigid_mesh_database import BatchedMeshes from happypose.toolbox.renderer.panda3d_batch_renderer import Panda3dBatchRenderer from happypose.toolbox.renderer.types import Panda3dLightData -def get_normal(depth_refine, fx=-1, fy=-1, cx=-1, cy=-1, bbox=np.array([0]), refine=True): +def get_normal( + depth_refine, fx=-1, fy=-1, cx=-1, cy=-1, bbox=np.array([0]), refine=True, +): # Copied from https://github.com/kirumang/Pix2Pose/blob/master/pix2pose_util/common_util.py - """ - fast normal computation - """ + """Fast normal computation.""" res_y = depth_refine.shape[0] res_x = depth_refine.shape[1] centerX = cx @@ -64,22 +66,26 @@ def get_normal(depth_refine, fx=-1, fy=-1, cx=-1, cy=-1, bbox=np.array([0]), ref uv_table = uv_table[bbox[0] : bbox[2], bbox[1] : bbox[3]] v_x = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3)) v_y = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3)) - normals = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3)) + np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3)) depth_refine = depth_refine[bbox[0] : bbox[2], bbox[1] : bbox[3]] else: v_x = np.zeros((res_y, res_x, 3)) v_y = np.zeros((res_y, res_x, 3)) - normals = np.zeros((res_y, res_x, 3)) + np.zeros((res_y, res_x, 3)) uv_table_sign = np.copy(uv_table) uv_table = np.abs(np.copy(uv_table)) dig = np.gradient(depth_refine, 2, edge_order=2) v_y[:, :, 0] = uv_table_sign[:, :, 1] * constant_x * dig[0] - v_y[:, :, 1] = depth_refine * constant_y + (uv_table_sign[:, :, 0] * constant_y) * dig[0] + v_y[:, :, 1] = ( + depth_refine * constant_y + (uv_table_sign[:, :, 0] * constant_y) * dig[0] + ) v_y[:, :, 2] = dig[0] - v_x[:, :, 0] = depth_refine * constant_x + uv_table_sign[:, :, 1] * constant_x * dig[1] + v_x[:, :, 0] = ( + depth_refine * constant_x + uv_table_sign[:, :, 1] * constant_x * dig[1] + ) v_x[:, :, 1] = uv_table_sign[:, :, 0] * constant_y * dig[1] v_x[:, :, 2] = dig[1] @@ -126,27 +132,46 @@ def getXYZ(depth, fx, fy, cx, cy, bbox=np.array([0])): def icp_refinement( - depth_measured, depth_rendered, object_mask_measured, cam_K, TCO_pred, n_min_points=1000 + depth_measured, + depth_rendered, + object_mask_measured, + cam_K, + TCO_pred, + n_min_points=1000, ): # Inspired from https://github.com/kirumang/Pix2Pose/blob/843effe0097e9982f4b07dd90b04ede2b9ee9294/tools/5_evaluation_bop_icp3d.py#L57 - points_tgt = np.zeros((depth_measured.shape[0], depth_measured.shape[1], 6), np.float32) + points_tgt = np.zeros( + (depth_measured.shape[0], depth_measured.shape[1], 6), np.float32, + ) points_tgt[:, :, :3] = getXYZ( - depth_measured, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2] + depth_measured, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2], ) points_tgt[:, :, 3:] = get_normal( - depth_measured, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2], refine=True + depth_measured, + fx=cam_K[0, 0], + fy=cam_K[1, 1], + cx=cam_K[0, 2], + cy=cam_K[1, 2], + refine=True, ) depth_valid = np.logical_and(depth_measured > 0.2, depth_measured < 5) depth_valid = np.logical_and(depth_valid, object_mask_measured) points_tgt = points_tgt[depth_valid] - points_src = np.zeros((depth_measured.shape[0], depth_measured.shape[1], 6), np.float32) + points_src = np.zeros( + (depth_measured.shape[0], depth_measured.shape[1], 6), np.float32, + ) points_src[:, :, :3] = getXYZ( - depth_rendered, cam_K[0, 0], cam_K[1, 1], cam_K[0, 2], cam_K[1, 2] + depth_rendered, cam_K[0, 0], cam_K[1, 1], cam_K[0, 2], cam_K[1, 2], ) points_src[:, :, 3:] = get_normal( - depth_rendered, fx=cam_K[0, 0], fy=cam_K[1, 1], cx=cam_K[0, 2], cy=cam_K[1, 2], refine=True + depth_rendered, + fx=cam_K[0, 0], + fy=cam_K[1, 1], + cx=cam_K[0, 2], + cy=cam_K[1, 2], + refine=True, ) points_src = points_src[np.logical_and(depth_valid, depth_rendered > 0)] @@ -165,7 +190,7 @@ def icp_refinement( tolerence = 0.05 icp_fnc = cv2.ppf_match_3d_ICP(100, tolerence=tolerence, numLevels=4) retval, residual, pose = icp_fnc.registerModelToScene( - points_src.reshape(-1, 6), points_tgt.reshape(-1, 6) + points_src.reshape(-1, 6), points_tgt.reshape(-1, 6), ) TCO_pred_refined = pose @ TCO_pred_refined TCO_pred_refined = torch.tensor(TCO_pred_refined, dtype=torch.float32).cuda() @@ -200,7 +225,6 @@ def refine_poses( K: Optional[torch.tensor] = None, ) -> Tuple[PoseEstimatesType, dict]: """Runs icp refinement. See superclass DepthRefiner for full documentation.""" - assert depth is not None assert K is not None @@ -250,7 +274,7 @@ def refine_poses( mask = masks[view_id].squeeze().cpu().numpy() TCO_refined, retval = icp_refinement( - depth_measured, depth_rendered, mask, cam_K, TCO_pred, n_min_points=1000 + depth_measured, depth_rendered, mask, cam_K, TCO_pred, n_min_points=1000, ) # Assign poses to predictions refined @@ -258,5 +282,5 @@ def refine_poses( if retval != -1: predictions_refined.poses[n] = TCO_refined - extra_data = dict() + extra_data = {} return (predictions_refined, extra_data) diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/pose_estimator.py b/happypose/pose_estimators/megapose/src/megapose/inference/pose_estimator.py index 19ecbf59..8b93ca26 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/pose_estimator.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/pose_estimator.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,8 +19,7 @@ # Standard Library import time from collections import defaultdict -from dataclasses import dataclass -from typing import Any, Optional, Tuple +from typing import Any # Third Party import numpy as np @@ -30,7 +28,6 @@ from torch.utils.data import DataLoader, TensorDataset # MegaPose -import happypose.pose_estimators.megapose.src.megapose as megapose import happypose.toolbox.inference.utils import happypose.toolbox.utils.tensor_collection as tc from happypose.pose_estimators.megapose.src.megapose.inference.depth_refiner import ( @@ -54,22 +51,22 @@ logger = get_logger(__name__) -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + class PoseEstimator(PoseEstimationModule): """Performs inference for pose estimation.""" def __init__( self, - refiner_model: Optional[torch.nn.Module] = None, - coarse_model: Optional[torch.nn.Module] = None, - detector_model: Optional[torch.nn.Module] = None, - depth_refiner: Optional[DepthRefiner] = None, + refiner_model: torch.nn.Module | None = None, + coarse_model: torch.nn.Module | None = None, + detector_model: torch.nn.Module | None = None, + depth_refiner: DepthRefiner | None = None, bsz_objects: int = 8, bsz_images: int = 256, SO3_grid_size: int = 576, ) -> None: - super().__init__() self.coarse_model = coarse_model self.refiner_model = refiner_model @@ -90,7 +87,10 @@ def __init__( self.cfg = self.coarse_model.cfg self.mesh_db = self.coarse_model.mesh_db else: - raise ValueError("At least one of refiner_model or " " coarse_model must be specified.") + msg = "At least one of refiner_model or coarse_model must be specified." + raise ValueError( + msg, + ) self.eval() @@ -98,7 +98,7 @@ def __init__( self.keep_all_coarse_outputs = False self.refiner_outputs = None self.coarse_outputs = None - self.debug_dict: dict = dict() + self.debug_dict: dict = {} def load_SO3_grid(self, grid_size: int) -> None: """Loads the SO(3) grid.""" @@ -114,14 +114,14 @@ def forward_refiner( keep_all_outputs: bool = False, cuda_timer: bool = False, **refiner_kwargs, - ) -> Tuple[dict, dict]: + ) -> tuple[dict, dict]: """Runs the refiner model for the specified number of iterations. - Will actually use the batched_model_predictions to stay within batch size limit. - Returns: + Returns + ------- (preds, extra_data) preds: @@ -134,7 +134,6 @@ def forward_refiner( A dict containing additional information such as timing """ - timer = Timer() timer.start() @@ -153,7 +152,7 @@ def forward_refiner( model_time = 0.0 - for (batch_idx, (batch_ids,)) in enumerate(dl): + for batch_idx, (batch_ids,) in enumerate(dl): data_TCO_input_ = data_TCO_input[batch_ids] df_ = data_TCO_input_.infos TCO_input_ = data_TCO_input_.poses @@ -218,7 +217,8 @@ def forward_refiner( } logger.debug( - f"Pose prediction on {B} poses (n_iterations={n_iterations}):" f" {timer.stop()}" + f"Pose prediction on {B} poses (n_iterations={n_iterations}):" + f" {timer.stop()}", ) return preds, extra_data @@ -230,16 +230,13 @@ def forward_scoring_model( data_TCO: PoseEstimatesType, cuda_timer: bool = False, return_debug_data: bool = False, - ) -> Tuple[PoseEstimatesType, dict]: - + ) -> tuple[PoseEstimatesType, dict]: """Score the estimates using the coarse model. - Adds the 'pose_score' field to data_TCO.infos Modifies PandasTensorCollection in-place. """ - start_time = time.time() assert self.coarse_model is not None @@ -290,7 +287,7 @@ def forward_scoring_model( images_crop_list.append(out_["images_crop"]) renders_list.append(out_["renders"]) - debug_data = dict() + debug_data = {} # Combine together the data from the different batches logits = torch.cat(logits_list) @@ -299,8 +296,8 @@ def forward_scoring_model( images_crop: torch.tensor = torch.cat(images_crop_list) renders: torch.tensor = torch.cat(renders_list) - H = images_crop.shape[2] - W = images_crop.shape[3] + images_crop.shape[2] + images_crop.shape[3] debug_data = { "images_crop": images_crop, @@ -312,9 +309,7 @@ def forward_scoring_model( elapsed = time.time() - start_time - timing_str = ( - f"time: {elapsed:.2f}, model_time: {model_time:.2f}, render_time: {render_time:.2f}" - ) + timing_str = f"time: {elapsed:.2f}, model_time: {model_time:.2f}, render_time: {render_time:.2f}" extra_data = { "render_time": render_time, @@ -337,13 +332,12 @@ def forward_coarse_model( detections: DetectionsType, cuda_timer: bool = False, return_debug_data: bool = False, - ) -> Tuple[PoseEstimatesType, dict]: + ) -> tuple[PoseEstimatesType, dict]: """Generates pose hypotheses and scores them with the coarse model. - Generates coarse hypotheses using the SO(3) grid. - Scores them using the coarse model. """ - start_time = time.time() happypose.toolbox.inference.types.assert_detections_valid(detections) @@ -384,7 +378,6 @@ def forward_coarse_model( TCO_init = [] for (batch_ids,) in dl: - # b = bsz_images df_ = df_hypotheses.iloc[batch_ids.cpu().numpy()] @@ -453,7 +446,7 @@ def forward_coarse_model( TCO = torch.cat(TCO_init) TCO_reshape = TCO.reshape([B, M, 4, 4]) - debug_data = dict() + debug_data = {} if return_debug_data: images_crop = torch.cat(images_crop_list) @@ -472,9 +465,7 @@ def forward_coarse_model( elapsed = time.time() - start_time - timing_str = ( - f"time: {elapsed:.2f}, model_time: {model_time:.2f}, render_time: {render_time:.2f}" - ) + timing_str = f"time: {elapsed:.2f}, model_time: {model_time:.2f}, render_time: {render_time:.2f}" extra_data = { "render_time": render_time, @@ -499,20 +490,21 @@ def forward_detection_model( **kwargs: Any, ) -> DetectionsType: """Runs the detector.""" - return self.detector_model.get_detections(observation, *args, **kwargs) def run_depth_refiner( self, observation: ObservationTensor, predictions: PoseEstimatesType, - ) -> Tuple[PoseEstimatesType, dict]: + ) -> tuple[PoseEstimatesType, dict]: """Runs the depth refiner.""" assert self.depth_refiner is not None, "You must specify a depth refiner" depth = observation.depth K = observation.K - refined_preds, extra_data = self.depth_refiner.refine_poses(predictions, depth=depth, K=K) + refined_preds, extra_data = self.depth_refiner.refine_poses( + predictions, depth=depth, K=K, + ) return refined_preds, extra_data @@ -520,18 +512,18 @@ def run_depth_refiner( def run_inference_pipeline( self, observation: ObservationTensor, - detections: Optional[DetectionsType] = None, - run_detector: Optional[bool] = None, + detections: DetectionsType | None = None, + run_detector: bool | None = None, n_refiner_iterations: int = 5, n_pose_hypotheses: int = 1, keep_all_refiner_outputs: bool = False, - detection_filter_kwargs: Optional[dict] = None, + detection_filter_kwargs: dict | None = None, run_depth_refiner: bool = False, - bsz_images: Optional[int] = None, - bsz_objects: Optional[int] = None, + bsz_images: int | None = None, + bsz_objects: int | None = None, cuda_timer: bool = False, - coarse_estimates: Optional[PoseEstimatesType] = None, - ) -> Tuple[PoseEstimatesType, dict]: + coarse_estimates: PoseEstimatesType | None = None, + ) -> tuple[PoseEstimatesType, dict]: """Runs the entire pose estimation pipeline. Performs the following steps @@ -543,13 +535,13 @@ def run_inference_pipeline( 5. Score refined hypotheses 6. Select highest scoring refined hypotheses. - Returns: + Returns + ------- data_TCO_final: final predictions data: Dict containing additional data about the different steps in the pipeline. """ - timing_str = "" timer = SimpleTimer() timer.start() @@ -578,7 +570,7 @@ def run_inference_pipeline( # Filter detections if detection_filter_kwargs is not None: detections = happypose.toolbox.inference.utils.filter_detections( - detections, **detection_filter_kwargs + detections, **detection_filter_kwargs, ) # Run the coarse estimator using gt_detections @@ -591,7 +583,7 @@ def run_inference_pipeline( # Extract top-K coarse hypotheses data_TCO_filtered = self.filter_pose_estimates( - data_TCO_coarse, top_K=n_pose_hypotheses, filter_field="coarse_logit" + data_TCO_coarse, top_K=n_pose_hypotheses, filter_field="coarse_logit", ) else: data_TCO_coarse = coarse_estimates @@ -618,13 +610,15 @@ def run_inference_pipeline( # Extract the highest scoring pose estimate for each instance_id data_TCO_final_scored = self.filter_pose_estimates( - data_TCO_scored, top_K=1, filter_field="pose_logit" + data_TCO_scored, top_K=1, filter_field="pose_logit", ) # Optionally run ICP or TEASER++ if run_depth_refiner: depth_refiner_start = time.time() - data_TCO_depth_refiner, _ = self.run_depth_refiner(observation, data_TCO_final_scored) + data_TCO_depth_refiner, _ = self.run_depth_refiner( + observation, data_TCO_final_scored, + ) data_TCO_final = data_TCO_depth_refiner depth_refiner_time = time.time() - depth_refiner_start timing_str += f"depth refiner={depth_refiner_time:.2f}" @@ -635,12 +629,18 @@ def run_inference_pipeline( timer.stop() timing_str = f"total={timer.elapsed():.2f}, {timing_str}" - extra_data: dict = dict() + extra_data: dict = {} extra_data["coarse"] = {"preds": data_TCO_coarse, "data": coarse_extra_data} extra_data["coarse_filter"] = {"preds": data_TCO_filtered} - extra_data["refiner_all_hypotheses"] = {"preds": preds, "data": refiner_extra_data} + extra_data["refiner_all_hypotheses"] = { + "preds": preds, + "data": refiner_extra_data, + } extra_data["scoring"] = {"preds": data_TCO_scored, "data": scoring_extra_data} - extra_data["refiner"] = {"preds": data_TCO_final_scored, "data": refiner_extra_data} + extra_data["refiner"] = { + "preds": data_TCO_final_scored, + "data": refiner_extra_data, + } extra_data["timing_str"] = timing_str extra_data["time"] = timer.elapsed() @@ -661,15 +661,19 @@ def filter_pose_estimates( Retain only the top_K estimates corresponding to each hypothesis_id Args: + ---- top_K: how many estimates to retain filter_field: The field to filter estimates by """ - df = data_TCO.infos group_cols = ["batch_im_id", "label", "instance_id"] # Logic from https://stackoverflow.com/a/40629420 - df = df.sort_values(filter_field, ascending=ascending).groupby(group_cols).head(top_K) + df = ( + df.sort_values(filter_field, ascending=ascending) + .groupby(group_cols) + .head(top_K) + ) data_TCO_filtered = data_TCO[df.index.tolist()] diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/refiner_utils.py b/happypose/pose_estimators/megapose/src/megapose/inference/refiner_utils.py index 10608f1a..7905172f 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/refiner_utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/refiner_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +17,6 @@ # Third Party import numpy as np import open3d as o3d -import transforms3d as t3d def numpy_to_open3d(xyz): @@ -28,17 +26,16 @@ def numpy_to_open3d(xyz): def compute_masks(mask_type, depth_rendered, depth_measured, depth_delta_thresh=0.1): - """ - Function for computing masks + """Function for computing masks. Args: + ---- mask_type: str depth_rendered: [H,W] depth_measured: [H,W] depth_delta_thresh: 0.1 """ - mask_rendered = depth_rendered > 0 mask_measured = np.logical_and(depth_measured > 0, depth_rendered > 0) @@ -48,7 +45,8 @@ def compute_masks(mask_type, depth_rendered, depth_measured, depth_delta_thresh= depth_delta = np.abs(depth_measured - depth_rendered) mask_measured[depth_delta > depth_delta_thresh] = 0 else: - raise ValueError(f"Unknown mask type {mask_type}") + msg = f"Unknown mask type {mask_type}" + raise ValueError(msg) # Most conservative mask_rendered = mask_measured diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/teaserpp_refiner.py b/happypose/pose_estimators/megapose/src/megapose/inference/teaserpp_refiner.py index 3515a0e6..e3523bb0 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/teaserpp_refiner.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/teaserpp_refiner.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,12 +25,23 @@ import torch # MegaPose -from happypose.pose_estimators.megapose.src.megapose.inference.depth_refiner import DepthRefiner -from happypose.pose_estimators.megapose.src.megapose.inference.refiner_utils import compute_masks, numpy_to_open3d -from happypose.pose_estimators.megapose.src.megapose.inference.types import PoseEstimatesType +from happypose.pose_estimators.megapose.src.megapose.inference.depth_refiner import ( + DepthRefiner, +) +from happypose.pose_estimators.megapose.src.megapose.inference.refiner_utils import ( + compute_masks, + numpy_to_open3d, +) +from happypose.pose_estimators.megapose.src.megapose.inference.types import ( + PoseEstimatesType, +) +from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_batch_renderer import ( + Panda3dBatchRenderer, +) +from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.types import ( + Panda3dLightData, +) from happypose.toolbox.lib3d.rigid_mesh_database import BatchedMeshes -from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_batch_renderer import Panda3dBatchRenderer -from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.types import Panda3dLightData from happypose.toolbox.visualization.meshcat_utils import get_pointcloud @@ -59,13 +69,14 @@ def compute_teaserpp_refinement( max_num_points=None, normals_src=None, use_farthest_point_sampling: bool = True, - **solver_params_kwargs + **solver_params_kwargs, ) -> dict: - """Compute registration using Teaser++ + """Compute registration using Teaser++. Follows the example of https://github.com/MIT-SPARK/TEASER-plusplus#minimal-python-3-example Args: + ---- depth_src: [H,W,3] depth_tgt: [H,W, 3] cam_K: [3,3] intrinsics matrix @@ -74,12 +85,12 @@ def compute_teaserpp_refinement( normals_src: (optional) normals for the src pointcloud Returns: + ------- A dict. - 'T_tgt_src': The rigid transform that aligns src to tgt. """ - if solver_params is None: solver_params = get_solver_params(**solver_params_kwargs) @@ -127,11 +138,11 @@ def compute_teaserpp_refinement( pc_tgt = pc_src_mask solver = teaserpp_python.RobustRegistrationSolver(solver_params) - start = time.time() + time.time() # teaserpp wants [3,N] pointclouds solver.solve(pc_src.transpose(), pc_tgt.transpose()) - end = time.time() + time.time() solution = solver.getSolution() @@ -207,6 +218,7 @@ def refine_poses( 3. Estimate a mask to filter out some outliers in our generated correspondences. Args: + ---- predictions: PandasTensorCollection Index into depth, K with batch_im_id depth: [B, H, W] @@ -214,7 +226,6 @@ def refine_poses( K: [B,3,3] """ - assert depth is not None assert K is not None @@ -281,7 +292,9 @@ def refine_poses( TCO_refined = T_tgt_src @ TCO_pred device = predictions_refined.poses_input[n].device predictions_refined.poses_input[n] = predictions.poses[n].clone() - predictions_refined.poses[n] = torch.tensor(TCO_refined, device=device) + predictions_refined.poses[n] = torch.tensor( + TCO_refined, device=device, + ) self.debug = out diff --git a/happypose/pose_estimators/megapose/src/megapose/inference/types.py b/happypose/pose_estimators/megapose/src/megapose/inference/types.py index 969cdb88..31af1d27 100644 --- a/happypose/pose_estimators/megapose/src/megapose/inference/types.py +++ b/happypose/pose_estimators/megapose/src/megapose/inference/types.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,7 +18,6 @@ # Standard Library from dataclasses import dataclass -from typing import Optional, Tuple # Third Party import numpy as np @@ -95,23 +93,21 @@ class InferenceConfig: n_refiner_iterations: int = 5 n_pose_hypotheses: int = 5 run_depth_refiner: bool = False - depth_refiner: Optional[str] = None # ['icp', 'teaserpp'] + depth_refiner: str | None = None # ['icp', 'teaserpp'] bsz_objects: int = 16 # How many parallel refiners to run bsz_images: int = 288 # How many images to push through coarse model @dataclass class ObservationTensor: - """ - - images: [B,C,H,W] with C=3 (rgb) or C=4 (rgbd). RGB dimensions should already - be normalized to be in [0,1] by diving the uint8 values by 255 + """images: [B,C,H,W] with C=3 (rgb) or C=4 (rgbd). RGB dimensions should already + be normalized to be in [0,1] by diving the uint8 values by 255. K: [B,3,3] camera intrinsics """ images: torch.Tensor # [B,C,H,W] - K: Optional[torch.Tensor] = None # [B,3,3] + K: torch.Tensor | None = None # [B,3,3] def cuda(self) -> ObservationTensor: self.images = self.images.cuda() @@ -122,14 +118,14 @@ def cuda(self) -> ObservationTensor: @property def batch_size(self) -> int: """Returns the batch size.""" - return self.images.shape[0] @property def depth(self) -> torch.tensor: """Returns depth tensor. - Returns: + Returns + ------- torch.tensor with shape [B,H,W] """ assert self.channel_dim == 4 @@ -141,7 +137,6 @@ def channel_dim(self) -> int: return self.images.shape[1] def is_valid(self) -> bool: - if not self.images.ndim == 4: return False @@ -169,18 +164,18 @@ def is_valid(self) -> bool: @staticmethod def from_numpy( rgb: np.ndarray, - depth: Optional[np.ndarray] = None, - K: Optional[np.ndarray] = None, + depth: np.ndarray | None = None, + K: np.ndarray | None = None, ) -> ObservationTensor: """Create an ObservationData type from numpy data. Args: + ---- rgb: [H,W,3] np.uint8 depth: [H,W] np.float K: [3,3] np.float """ - assert rgb.dtype == np.uint8 rgb_tensor = torch.as_tensor(rgb).float() / 255 @@ -201,17 +196,15 @@ def from_numpy( @staticmethod def from_torch_batched( - rgb: torch.Tensor, depth: torch.Tensor, K: torch.Tensor + rgb: torch.Tensor, depth: torch.Tensor, K: torch.Tensor, ) -> ObservationTensor: - """ - - Args: + """Args: + ---- rgb: [B,3,H,W] torch.uint8 depth: [B,1,H,W] torch.float - K: [B,3,3] torch.float + K: [B,3,3] torch.float. """ - assert rgb.dtype == torch.uint8 # [B,3,H,W] @@ -221,7 +214,6 @@ def from_torch_batched( # [C,H,W] if depth is not None: - if depth.ndim == 3: depth.unsqueeze(1) diff --git a/happypose/pose_estimators/megapose/src/megapose/lib3d/__init__.py b/happypose/pose_estimators/megapose/src/megapose/lib3d/__init__.py index 33599f44..b9c7cf25 100644 --- a/happypose/pose_estimators/megapose/src/megapose/lib3d/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/lib3d/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,4 +15,3 @@ # Local Folder -from .transform import Transform diff --git a/happypose/pose_estimators/megapose/src/megapose/models/mask_rcnn.py b/happypose/pose_estimators/megapose/src/megapose/models/mask_rcnn.py index 26622cc3..ef2ff3e6 100644 --- a/happypose/pose_estimators/megapose/src/megapose/models/mask_rcnn.py +++ b/happypose/pose_estimators/megapose/src/megapose/models/mask_rcnn.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,7 +28,6 @@ def __init__( backbone_str="resnet50-fpn", anchor_sizes=((32,), (64,), (128,), (256,), (512,)), ): - assert backbone_str == "resnet50-fpn" backbone = resnet_fpn_backbone("resnet50", pretrained=False) diff --git a/happypose/pose_estimators/megapose/src/megapose/models/pose_rigid.py b/happypose/pose_estimators/megapose/src/megapose/models/pose_rigid.py index 42f4dfbb..ba1ff908 100644 --- a/happypose/pose_estimators/megapose/src/megapose/models/pose_rigid.py +++ b/happypose/pose_estimators/megapose/src/megapose/models/pose_rigid.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -52,6 +51,7 @@ logger = get_logger(__name__) + @dataclass class PosePredictorOutputCosypose: TCO_output: torch.Tensor @@ -64,6 +64,7 @@ class PosePredictorOutputCosypose: boxes_rend: torch.Tensor boxes_crop: torch.Tensor + @dataclass class PosePredictorOutput: TCO_output: torch.Tensor @@ -134,7 +135,7 @@ def __init__( assert isinstance(n_features, int) # TODO: Change to torch ModuleDict - self.heads: Dict[str, Union[torch.nn.Linear, Callable]] = dict() + self.heads: Dict[str, Union[torch.nn.Linear, Callable]] = {} self.predict_pose_update = predict_pose_update if self.predict_pose_update: self._pose_dim = 9 @@ -143,7 +144,9 @@ def __init__( self.predict_rendered_views_logits = predict_rendered_views_logits if self.predict_rendered_views_logits: - self.views_logits_head = nn.Linear(n_features, self.n_rendered_views, bias=True) + self.views_logits_head = nn.Linear( + n_features, self.n_rendered_views, bias=True, + ) self.heads["renderings_logits"] = self.views_logits_head # Dimensions for indexing into input and rendered images @@ -210,6 +213,7 @@ def crop_inputs( box for cropping. Args: + ---- images (torch.Tensor): (bsz, ndims, h, w) where ndims is 3 or 4. K (torch.Tensor): (bsz, 3, 3), intrinsics of input images TCO (torch.Tensor): (bsz, 4, 4) @@ -217,13 +221,13 @@ def crop_inputs( labels (List[str]): Object labels Returns: + ------- images_cropped: Images cropped and resized to self.render_size K_crop: Intrinsics of the fictive cropped camera. boxes_rend: smallest bounding box defined by the reprojection of object points in pose TCO. boxes_crop: bounding box used to crop the input image. """ - bsz = images.shape[0] assert K.shape == (bsz, 3, 3) assert tCR.shape == (bsz, 3) @@ -246,20 +250,23 @@ def crop_inputs( ) K_crop = get_K_crop_resize( - K=K.clone(), boxes=boxes_crop, orig_size=images.shape[-2:], crop_resize=self.render_size + K=K.clone(), + boxes=boxes_crop, + orig_size=images.shape[-2:], + crop_resize=self.render_size, ).detach() if self.debug: TCR = TCO.clone() TCR[:, :3, -1] = tCR self.debug_data.ref_point_uv = project_points_robust( - torch.zeros(bsz, 1, 3).to(K.device), K, TCR + torch.zeros(bsz, 1, 3).to(K.device), K, TCR, ) self.debug_data.origin_uv = project_points_robust( - torch.zeros(bsz, 1, 3).to(K.device), K, TCO + torch.zeros(bsz, 1, 3).to(K.device), K, TCO, ) self.debug_data.origin_uv_crop = project_points_robust( - torch.zeros(bsz, 1, 3).to(K.device), K_crop, TCO + torch.zeros(bsz, 1, 3).to(K.device), K_crop, TCO, ) return images_cropped, K_crop, boxes_rend, boxes_crop @@ -275,6 +282,7 @@ def compute_crops_multiview( render the additional viewpoints. Args: + ---- images (torch.Tensor): _description_ K (torch.Tensor): _description_ TCV_O (torch.Tensor): _description_ @@ -282,9 +290,9 @@ def compute_crops_multiview( labels (List[str]): _description_ Returns: + ------- K_crop """ - labels_mv = [] bsz = len(labels) n_views = TCV_O.shape[1] @@ -314,13 +322,20 @@ def compute_crops_multiview( return_crops=False, ) K_crop = get_K_crop_resize( - K=K.clone(), boxes=boxes_crop, orig_size=images.shape[-2:], crop_resize=self.render_size + K=K.clone(), + boxes=boxes_crop, + orig_size=images.shape[-2:], + crop_resize=self.render_size, ) K_crop = K_crop.view(bsz, n_views, 3, 3) return K_crop def update_pose( - self, TCO: torch.Tensor, K_crop: torch.Tensor, pose_outputs: torch.Tensor, tCR: torch.Tensor + self, + TCO: torch.Tensor, + K_crop: torch.Tensor, + pose_outputs: torch.Tensor, + tCR: torch.Tensor, ) -> torch.Tensor: assert pose_outputs.shape[-1] == 9 dR = compute_rotation_matrix_from_ortho6d(pose_outputs[:, 0:6]) @@ -332,9 +347,11 @@ def net_forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: """Forward pass of the neural network. Args: + ---- x (torch.Tensor): input tensor (images + renderings) Returns: + ------- Dict[str, torch.Tensor]: Output of each network head. """ x = self.backbone(x) @@ -345,7 +362,7 @@ def net_forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: x = x.flatten(2).mean(dim=-1) else: raise ValueError - outputs = dict() + outputs = {} for k, head in self.heads.items(): outputs[k] = head(x) return outputs @@ -360,15 +377,16 @@ def render_images_multiview( """Render multiple images. Args: + ---- labels: list[str] with length bsz TCV_O: [bsz, n_views, 4, 4] pose of the cameras defining each view KV: [bsz, n_views, 4, 4] intrinsics of the associated cameras random_ambient_light: Whether to use randomize ambient light parameter. - Returns + Returns: + ------- renders: [bsz, n_views*n_channels, H, W] """ - labels_mv = [] bsz = len(labels) n_views = TCV_O.shape[1] @@ -384,12 +402,14 @@ def render_images_multiview( Panda3dLightData( light_type="ambient", color=(intensity, intensity, intensity, 1.0), - ) + ), ] light_datas.append(lights) else: if self.render_normals: - ambient_light = Panda3dLightData(light_type="ambient", color=(1.0, 1.0, 1.0, 1.0)) + ambient_light = Panda3dLightData( + light_type="ambient", color=(1.0, 1.0, 1.0, 1.0), + ) light_datas = [[ambient_light] for _ in range(len(labels_mv))] else: light_datas = [make_scene_lights() for _ in range(len(labels_mv))] @@ -421,7 +441,9 @@ def render_images_multiview( renders = torch.cat(cat_list, dim=1) n_channels = renders.shape[1] - renders = renders.view(bsz, n_views, n_channels, *renders.shape[-2:]).flatten(1, 2) + renders = renders.view(bsz, n_views, n_channels, *renders.shape[-2:]).flatten( + 1, 2, + ) return renders # [bsz, n_views*n_channels, H, W] def normalize_images( @@ -432,17 +454,17 @@ def normalize_images( images_inplace: bool = False, renders_inplace: bool = False, ) -> Tuple[torch.Tensor, torch.Tensor]: - """Normalize the depth images by the distance from the camera + """Normalize the depth images by the distance from the camera. If we are using depth then this involves inplace ops so to be safe we will make copies of the tensors Args: + ---- images: [bsz, C, H, W] renders: [bsz, n_view*n_render_channels, H, W] tCR: [bsz, 3] anchor point for rendering """ - # NOTE (lmanuelli): Avoid errors with inplace ops as the same # input might be used in multiple iterations. Since we re-crop # on each iteration this might not be a problem but err'ing on @@ -454,7 +476,6 @@ def normalize_images( renders = renders.clone() if self.input_depth: - if not images_inplace: images = images.clone() C = images.shape[1] @@ -467,13 +488,12 @@ def normalize_images( images[:, self._input_depth_dims] = depth_norm if self.render_depth: - # Need to index into the right channels, assuming no normals # 1-view --> depth_dims = [3] # 2-view --> depth_dims = [3,7] - depth_dims = self._render_depth_dims[0] + self._n_single_render_channels * torch.arange( - 0, self.n_rendered_views - ) + depth_dims = self._render_depth_dims[ + 0 + ] + self._n_single_render_channels * torch.arange(0, self.n_rendered_views) depth = renders[:, depth_dims] renders[:, depth_dims] = self.normalize_depth(depth, tCR) @@ -481,15 +501,15 @@ def normalize_images( return images, renders def normalize_depth(self, depth: torch.Tensor, tCR: torch.Tensor) -> torch.Tensor: - """ - Args: + """Args: + ---- depth: [B,-1,1,H,W] - tCR: [B,3] + tCR: [B,3]. - Returns: + Returns + ------- depth_norm: same shape as depth """ - # [B,] z_norm = tCR[:, 2] @@ -504,11 +524,15 @@ def normalize_depth(self, depth: torch.Tensor, tCR: torch.Tensor) -> torch.Tenso elif self.depth_normalization_type == "tCR_center_clamp": depth_norm = torch.clamp(depth - z_norm_unsqz, -2, 2) elif self.depth_normalization_type == "tCR_center_obj_diam": - raise NotImplementedError("Not yet implemented") + msg = "Not yet implemented" + raise NotImplementedError(msg) elif self.depth_normalization_type == "none": depth_norm = depth else: - raise ValueError(f"Unknown depth_normalization_type = {self.depth_normalization_type}") + msg = f"Unknown depth_normalization_type = {self.depth_normalization_type}" + raise ValueError( + msg, + ) return depth_norm @@ -521,7 +545,6 @@ def forward( n_iterations: int = 1, random_ambient_light: bool = False, ) -> Dict[str, PosePredictorOutput]: - timing_dict: Dict[str, float] = defaultdict(float) if not self.input_depth: @@ -535,7 +558,7 @@ def forward( dtype = TCO.dtype device = TCO.device - outputs = dict() + outputs = {} TCO_input = TCO for n in range(n_iterations): TCO_input = normalize_T(TCO_input).detach() @@ -556,21 +579,23 @@ def forward( n_views = TCV_O_input.shape[1] tCV_R = TCV_O_input_flatten[..., :3, [-1]] + TCV_O_input_flatten[ - ..., :3, :3 + ..., :3, :3, ] @ tOR.unsqueeze(1).repeat(1, n_views, 1).flatten(0, 1).unsqueeze(-1) tCV_R = tCV_R.squeeze(-1).view(bsz, TCV_O_input.shape[1], 3) images_crop, K_crop, boxes_rend, boxes_crop = self.crop_inputs( - images, K, TCO_input, tCR, labels + images, K, TCO_input, tCR, labels, ) - KV_crop = self.compute_crops_multiview(images, K, TCV_O_input, tCV_R, labels) + KV_crop = self.compute_crops_multiview( + images, K, TCV_O_input, tCV_R, labels, + ) if not self.remove_TCO_rendering: KV_crop[:, 0] = K_crop t = time.time() renders = self.render_images_multiview( - labels, TCV_O_input, KV_crop, random_ambient_light=random_ambient_light + labels, TCV_O_input, KV_crop, random_ambient_light=random_ambient_light, ) render_time = time.time() - t timing_dict["render"] = render_time @@ -586,7 +611,9 @@ def forward( # would expect this to error out network_outputs = self.net_forward(x) if self.predict_pose_update: - TCO_output = self.update_pose(TCO_input, K_crop, network_outputs["pose"], tCR) + TCO_output = self.update_pose( + TCO_input, K_crop, network_outputs["pose"], tCR, + ) else: TCO_output = TCO_input.detach().clone() @@ -595,7 +622,7 @@ def forward( assert not self.predict_pose_update else: renderings_logits = torch.empty( - bsz, self.n_rendered_views, dtype=dtype, device=device + bsz, self.n_rendered_views, dtype=dtype, device=device, ) outputs[f"iteration={n+1}"] = PosePredictorOutput( @@ -621,15 +648,15 @@ def forward( return outputs def forward_coarse_tensor( - self, x: torch.Tensor, cuda_timer: bool = False + self, x: torch.Tensor, cuda_timer: bool = False, ) -> Dict[str, Union[torch.Tensor, float]]: - """Forward pass on coarse model given an input tensor. The input already contains the concatenated input + rendered images and has been appropriately normalized. Args: + ---- x: [B,C,H,W] where C=9 typically. This is the concatenated input + rendered image @@ -660,12 +687,13 @@ def forward_coarse( return_debug_data: bool = False, ) -> Dict[str, Any]: # TODO: Is this still necessary ? - """Run the coarse model given images + poses + """Run the coarse model given images + poses. Only valid if we are using the coarse model. Args: + ---- images: [B,C,H,W] torch tensor, should already be normalized to [0,255] --> [0,1] K: [B,3,3] camera intrinsics @@ -674,13 +702,13 @@ def forward_coarse( Returns: + ------- dict: - logits: tensor [B,] - scores tensor [B,] """ - assert ( self.predict_rendered_views_logits ), "Method only valid if coarse classification model" @@ -697,7 +725,7 @@ def forward_coarse( TCO_input = normalize_T(TCO_input).detach() tCR = TCO_input[..., :3, -1] images_crop, K_crop, boxes_rend, boxes_crop = self.crop_inputs( - images, K, TCO_input, tCR, labels + images, K, TCO_input, tCR, labels, ) # [B,1,4,4], hack to use the multi-view function diff --git a/happypose/pose_estimators/megapose/src/megapose/models/resnet.py b/happypose/pose_estimators/megapose/src/megapose/models/resnet.py index 2a3159e3..d624c2ad 100644 --- a/happypose/pose_estimators/megapose/src/megapose/models/resnet.py +++ b/happypose/pose_estimators/megapose/src/megapose/models/resnet.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,7 +24,6 @@ from typing import Any, Callable, List, Optional, Type, Union # Third Party -import torch import torch.nn as nn from torch import Tensor @@ -44,9 +42,9 @@ def conv3x3( - in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1 + in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1, ) -> nn.Conv2d: - """3x3 convolution with padding""" + """3x3 convolution with padding.""" return nn.Conv2d( in_planes, out_planes, @@ -60,7 +58,7 @@ def conv3x3( def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: - """1x1 convolution""" + """1x1 convolution.""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) @@ -78,13 +76,15 @@ def __init__( dilation: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: - super(BasicBlock, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d if groups != 1 or base_width != 64: - raise ValueError("BasicBlock only supports groups=1 and base_width=64") + msg = "BasicBlock only supports groups=1 and base_width=64" + raise ValueError(msg) if dilation > 1: - raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + msg = "Dilation > 1 not supported in BasicBlock" + raise NotImplementedError(msg) # Both self.conv1 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = norm_layer(planes) @@ -133,7 +133,7 @@ def __init__( dilation: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: - super(Bottleneck, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.0)) * groups @@ -184,7 +184,7 @@ def __init__( replace_stride_with_dilation: Optional[List[bool]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: - super(ResNet, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer @@ -196,28 +196,27 @@ def __init__( # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: + msg = f"replace_stride_with_dilation should be None or a 3-element tuple, got {replace_stride_with_dilation}" raise ValueError( - "replace_stride_with_dilation should be None or a 3-element tuple, got {}".format( - replace_stride_with_dilation - ) + msg, ) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d( - n_inputs, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False + n_inputs, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False, ) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer( - block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] + block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0], ) self.layer3 = self._make_layer( - block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] + block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1], ) self.layer4 = self._make_layer( - block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2] + block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2], ) for m in self.modules(): @@ -268,7 +267,7 @@ def _make_layer( self.base_width, previous_dilation, norm_layer, - ) + ), ) self.inplanes = planes * block.expansion for _ in range(1, blocks): @@ -280,7 +279,7 @@ def _make_layer( base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer, - ) + ), ) return nn.Sequential(*layers) @@ -308,7 +307,7 @@ def _resnet( layers: List[int], pretrained: bool, progress: bool, - **kwargs: Any + **kwargs: Any, ) -> ResNet: model = ResNet(block, layers, **kwargs) if pretrained: diff --git a/happypose/pose_estimators/megapose/src/megapose/models/torchvision_resnet.py b/happypose/pose_estimators/megapose/src/megapose/models/torchvision_resnet.py index ac4ee5b9..9bff36d0 100644 --- a/happypose/pose_estimators/megapose/src/megapose/models/torchvision_resnet.py +++ b/happypose/pose_estimators/megapose/src/megapose/models/torchvision_resnet.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,9 +50,9 @@ def conv3x3( - in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1 + in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1, ) -> nn.Conv2d: - """3x3 convolution with padding""" + """3x3 convolution with padding.""" return nn.Conv2d( in_planes, out_planes, @@ -67,7 +66,7 @@ def conv3x3( def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: - """1x1 convolution""" + """1x1 convolution.""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) @@ -85,13 +84,15 @@ def __init__( dilation: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: - super(BasicBlock, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d if groups != 1 or base_width != 64: - raise ValueError("BasicBlock only supports groups=1 and base_width=64") + msg = "BasicBlock only supports groups=1 and base_width=64" + raise ValueError(msg) if dilation > 1: - raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + msg = "Dilation > 1 not supported in BasicBlock" + raise NotImplementedError(msg) # Both self.conv1 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = norm_layer(planes) @@ -140,7 +141,7 @@ def __init__( dilation: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: - super(Bottleneck, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.0)) * groups @@ -191,7 +192,7 @@ def __init__( norm_layer: Optional[Callable[..., nn.Module]] = None, n_input_channels: int = 3, ) -> None: - super(ResNet, self).__init__() + super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer @@ -203,28 +204,32 @@ def __init__( # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: + msg = f"replace_stride_with_dilation should be None or a 3-element tuple, got {replace_stride_with_dilation}" raise ValueError( - "replace_stride_with_dilation should be None or a 3-element tuple, got {}".format( - replace_stride_with_dilation - ) + msg, ) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d( - n_input_channels, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False + n_input_channels, + self.inplanes, + kernel_size=7, + stride=2, + padding=3, + bias=False, ) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer( - block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] + block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0], ) self.layer3 = self._make_layer( - block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] + block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1], ) self.layer4 = self._make_layer( - block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2] + block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2], ) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) @@ -277,7 +282,7 @@ def _make_layer( self.base_width, previous_dilation, norm_layer, - ) + ), ) self.inplanes = planes * block.expansion for _ in range(1, blocks): @@ -289,7 +294,7 @@ def _make_layer( base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer, - ) + ), ) return nn.Sequential(*layers) @@ -322,7 +327,7 @@ def _resnet( layers: List[int], pretrained: bool, progress: bool, - **kwargs: Any + **kwargs: Any, ) -> ResNet: model = ResNet(block, layers, **kwargs) if pretrained: @@ -336,6 +341,7 @@ def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> `"Deep Residual Learning for Image Recognition" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ @@ -347,6 +353,7 @@ def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> `"Deep Residual Learning for Image Recognition" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ @@ -358,6 +365,7 @@ def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> `"Deep Residual Learning for Image Recognition" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ @@ -369,10 +377,13 @@ def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> `"Deep Residual Learning for Image Recognition" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _resnet("resnet101", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs) + return _resnet( + "resnet101", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs, + ) def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: @@ -380,39 +391,54 @@ def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> `"Deep Residual Learning for Image Recognition" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _resnet("resnet152", Bottleneck, [3, 8, 36, 3], pretrained, progress, **kwargs) + return _resnet( + "resnet152", Bottleneck, [3, 8, 36, 3], pretrained, progress, **kwargs, + ) -def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: +def resnext50_32x4d( + pretrained: bool = False, progress: bool = True, **kwargs: Any, +) -> ResNet: r"""ResNeXt-50 32x4d model from `"Aggregated Residual Transformation for Deep Neural Networks" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ kwargs["groups"] = 32 kwargs["width_per_group"] = 4 - return _resnet("resnext50_32x4d", Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs) + return _resnet( + "resnext50_32x4d", Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs, + ) -def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: +def resnext101_32x8d( + pretrained: bool = False, progress: bool = True, **kwargs: Any, +) -> ResNet: r"""ResNeXt-101 32x8d model from `"Aggregated Residual Transformation for Deep Neural Networks" `_. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ kwargs["groups"] = 32 kwargs["width_per_group"] = 8 - return _resnet("resnext101_32x8d", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs) + return _resnet( + "resnext101_32x8d", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs, + ) -def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: +def wide_resnet50_2( + pretrained: bool = False, progress: bool = True, **kwargs: Any, +) -> ResNet: r"""Wide ResNet-50-2 model from `"Wide Residual Networks" `_. @@ -422,14 +448,19 @@ def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: A channels, and in Wide ResNet-50-2 has 2048-1024-2048. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ kwargs["width_per_group"] = 64 * 2 - return _resnet("wide_resnet50_2", Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs) + return _resnet( + "wide_resnet50_2", Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs, + ) -def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: +def wide_resnet101_2( + pretrained: bool = False, progress: bool = True, **kwargs: Any, +) -> ResNet: r"""Wide ResNet-101-2 model from `"Wide Residual Networks" `_. @@ -439,8 +470,11 @@ def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: channels, and in Wide ResNet-50-2 has 2048-1024-2048. Args: + ---- pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ kwargs["width_per_group"] = 64 * 2 - return _resnet("wide_resnet101_2", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs) + return _resnet( + "wide_resnet101_2", Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs, + ) diff --git a/happypose/pose_estimators/megapose/src/megapose/models/wide_resnet.py b/happypose/pose_estimators/megapose/src/megapose/models/wide_resnet.py index 55b0f3d2..251d9886 100644 --- a/happypose/pose_estimators/megapose/src/megapose/models/wide_resnet.py +++ b/happypose/pose_estimators/megapose/src/megapose/models/wide_resnet.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,20 +21,25 @@ def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, + ) class BasicBlockV2(nn.Module): r"""BasicBlock V2 from `"Identity Mappings in Deep Residual Networks"`_ paper. This is used for ResNet V2 for 18, 34 layers. + Args: + ---- inplanes (int): number of input channels. planes (int): number of output channels. stride (int): stride size. downsample (Module) optional downsample module to downsample the input. """ + expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): @@ -63,7 +67,7 @@ def __init__(self, block, layers, width, num_inputs=3, maxpool=True): config = [int(v * width) for v in (64, 128, 256, 512)] self.inplanes = config[0] self.conv1 = nn.Conv2d( - num_inputs, self.inplanes, kernel_size=5, stride=2, padding=2, bias=False + num_inputs, self.inplanes, kernel_size=5, stride=2, padding=2, bias=False, ) self.bn1 = nn.BatchNorm2d(self.inplanes) self.relu = nn.ReLU(inplace=True) @@ -87,14 +91,18 @@ def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Conv2d( - self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, ) layers = [ block(self.inplanes, planes, stride, downsample), ] self.inplanes = planes * block.expansion - for i in range(1, blocks): + for _i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) @@ -116,13 +124,17 @@ def forward(self, x): class WideResNet18(WideResNet): def __init__(self, n_inputs=3, width=1.0): - super().__init__(block=BasicBlockV2, layers=CONFIG[18], width=width, num_inputs=n_inputs) + super().__init__( + block=BasicBlockV2, layers=CONFIG[18], width=width, num_inputs=n_inputs, + ) self.n_features = int(512 * width) class WideResNet34(WideResNet): def __init__(self, n_inputs=3, width=1.0): - super().__init__(block=BasicBlockV2, layers=CONFIG[34], width=width, num_inputs=n_inputs) + super().__init__( + block=BasicBlockV2, layers=CONFIG[34], width=width, num_inputs=n_inputs, + ) self.n_features = int(512 * width) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/__init__.py b/happypose/pose_estimators/megapose/src/megapose/scripts/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_gt_info.py b/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_gt_info.py index 64ea52e9..b55519bd 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_gt_info.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_gt_info.py @@ -1,13 +1,9 @@ import argparse -import glob import json import os -import sys -from copy import deepcopy from pathlib import Path import numpy as np -import trimesh # from bop_toolkit_lib import config # from bop_toolkit_lib import dataset_params @@ -16,22 +12,27 @@ ################################################################################ ################################################################################ parser = argparse.ArgumentParser() -parser.add_argument('--chunk-dir', type=str) -parser.add_argument('--shapenet-dir', type=str) -parser.add_argument('--gso-dir', type=str) -parser.add_argument('--renderer-type', type=str, default='cpp') -parser.add_argument('--overwrite-models', action='store_true') +parser.add_argument("--chunk-dir", type=str) +parser.add_argument("--shapenet-dir", type=str) +parser.add_argument("--gso-dir", type=str) +parser.add_argument("--renderer-type", type=str, default="cpp") +parser.add_argument("--overwrite-models", action="store_true") args = parser.parse_args() chunk_dir = Path(args.chunk_dir) -chunk_infos = json.loads((chunk_dir / 'chunk_infos.json').read_text()) -cam_infos_path = (chunk_dir / 'bop_data/camera.json') +chunk_infos = json.loads((chunk_dir / "chunk_infos.json").read_text()) +cam_infos_path = chunk_dir / "bop_data/camera.json" cam_infos = json.loads(cam_infos_path.read_text()) -scene_gt_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/scene_gt.json') -scene_gt_info_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/scene_gt_info.json') -depth_gt_info_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/depth/{im_id:06d}.png') -vis_mask_visib_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/mask_visib/{im_id:06d}_{inst_id:06d}.png') +scene_gt_tpath = chunk_dir / "bop_data/train_pbr/{scene_id:06d}/scene_gt.json" +scene_gt_info_tpath = chunk_dir / "bop_data/train_pbr/{scene_id:06d}/scene_gt_info.json" +depth_gt_info_tpath = ( + chunk_dir / "bop_data/train_pbr/{scene_id:06d}/depth/{im_id:06d}.png" +) +vis_mask_visib_tpath = ( + chunk_dir + / "bop_data/train_pbr/{scene_id:06d}/mask_visib/{im_id:06d}_{inst_id:06d}.png" +) if args.shapenet_dir: shapenet_dir = Path(args.shapenet_dir) @@ -39,81 +40,97 @@ else: is_shapenet = False gso_dir = Path(args.gso_dir) -scale = chunk_infos['scale'] +scale = chunk_infos["scale"] -p = dict( - dataset=chunk_dir, - dataset_split='train_pbr', - dataset_split_type='train_pbr', +p = { + "dataset": chunk_dir, + "dataset_split": "train_pbr", + "dataset_split_type": "train_pbr", # renderer_type='python', - delta=15, -) -p['renderer_type'] = args.renderer_type + "delta": 15, +} +p["renderer_type"] = args.renderer_type # Initialize a renderer. -im_width, im_height = cam_infos['width'], cam_infos['height'] +im_width, im_height = cam_infos["width"], cam_infos["height"] ren_width, ren_height = 3 * im_width, 3 * im_height ren_cx_offset, ren_cy_offset = im_width, im_height large_ren = renderer.create_renderer( - ren_width, ren_height, p['renderer_type'], - mode='depth') + ren_width, ren_height, p["renderer_type"], mode="depth", +) -misc.log('Initializing renderer...') -obj_name_to_id = dict() -for obj_id, obj in enumerate(chunk_infos['scene_infos']['objects']): +misc.log("Initializing renderer...") +obj_name_to_id = {} +for obj_id, obj in enumerate(chunk_infos["scene_infos"]["objects"]): if is_shapenet: - synset_id, source_id = obj['synset_id'], obj['source_id'] - obj_name = obj['category_id'] - ply_path = Path(shapenet_dir) / f'{synset_id}/{source_id}' / 'models/model_normalized_scaled.ply' + synset_id, source_id = obj["synset_id"], obj["source_id"] + obj_name = obj["category_id"] + ply_path = ( + Path(shapenet_dir) + / f"{synset_id}/{source_id}" + / "models/model_normalized_scaled.ply" + ) else: - obj_name = obj['category_id'] - gso_id = obj_name.split('gso_')[1] - ply_path = Path(gso_dir) / f'{gso_id}' / 'meshes/model.ply' + obj_name = obj["category_id"] + gso_id = obj_name.split("gso_")[1] + ply_path = Path(gso_dir) / f"{gso_id}" / "meshes/model.ply" obj_name_to_id[obj_name] = obj_id large_ren.add_object(obj_id, str(ply_path)) scene_ids = [0] -misc.log(f'Processing scene ids: {scene_ids}') +misc.log(f"Processing scene ids: {scene_ids}") for scene_id in scene_ids: # Load scene info and ground-truth poses. - scene_dir = chunk_dir / f'bop_data/train_pbr/{scene_id:06d}' - scene_camera = inout.load_scene_camera(scene_dir / 'scene_camera.json') + scene_dir = chunk_dir / f"bop_data/train_pbr/{scene_id:06d}" + scene_camera = inout.load_scene_camera(scene_dir / "scene_camera.json") scene_gt = inout.load_scene_gt(str(scene_gt_tpath).format(scene_id=scene_id)) scene_gt_info = {} im_ids = sorted(scene_gt.keys()) for im_counter, im_id in enumerate(im_ids): - depth_path = str(scene_dir / f'depth/{im_id:06d}.png') - K = scene_camera[im_id]['cam_K'] + depth_path = str(scene_dir / f"depth/{im_id:06d}.png") + K = scene_camera[im_id]["cam_K"] fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Load depth image. depth_im = inout.load_depth(depth_path) - depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] + depth_im *= scene_camera[im_id]["depth_scale"] # to [mm] dist_im = misc.depth_im_to_dist_im_fast(depth_im, K) im_size = (depth_im.shape[1], depth_im.shape[0]) - # Calc gt info if im_counter % 5 == 0: misc.log( - 'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}'.format( - p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, im_id)) + "Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}".format( + p["dataset"], + p["dataset_split"], + p["dataset_split_type"], + scene_id, + im_id, + ), + ) scene_gt_info[im_id] = [] - for gt_id, gt in enumerate(scene_gt[im_id]): - if gt['obj_id'] not in obj_name_to_id: + for _gt_id, gt in enumerate(scene_gt[im_id]): + if gt["obj_id"] not in obj_name_to_id: continue # Render depth image of the object model in the ground-truth pose. depth_gt_large = large_ren.render_object( - obj_name_to_id[gt['obj_id']], gt['cam_R_m2c'], gt['cam_t_m2c'], - fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth'] + obj_name_to_id[gt["obj_id"]], + gt["cam_R_m2c"], + gt["cam_t_m2c"], + fx, + fy, + cx + ren_cx_offset, + cy + ren_cy_offset, + )["depth"] depth_gt = depth_gt_large[ - ren_cy_offset:(ren_cy_offset + im_height), - ren_cx_offset:(ren_cx_offset + im_width)] + ren_cy_offset : (ren_cy_offset + im_height), + ren_cx_offset : (ren_cx_offset + im_width), + ] # Convert depth images to distance images. # dist_gt = misc.depth_im_to_dist_im(depth_gt, K) @@ -124,7 +141,8 @@ # Estimation of the visibility mask. visib_gt = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, p['delta'], visib_mode='bop19') + dist_im, dist_gt, p["delta"], visib_mode="bop19", + ) # Mask of the object in the GT pose. obj_mask_gt_large = depth_gt_large > 0 @@ -163,14 +181,16 @@ bbox_visib = misc.calc_2d_bbox(xs, ys, im_size) # Store the calculated info. - scene_gt_info[im_id].append({ - 'px_count_all': int(px_count_all), - 'px_count_valid': int(px_count_valid), - 'px_count_visib': int(px_count_visib), - 'visib_fract': float(visib_fract), - 'bbox_obj': [int(e) for e in bbox], - 'bbox_visib': [int(e) for e in bbox_visib] - }) + scene_gt_info[im_id].append( + { + "px_count_all": int(px_count_all), + "px_count_valid": int(px_count_valid), + "px_count_visib": int(px_count_visib), + "visib_fract": float(visib_fract), + "bbox_obj": [int(e) for e in bbox], + "bbox_visib": [int(e) for e in bbox_visib], + }, + ) # Save the info for the current scene. scene_gt_info_path = str(scene_gt_info_tpath).format(scene_id=scene_id) misc.ensure_dir(os.path.dirname(scene_gt_info_path)) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_masks.py b/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_masks.py index 73410480..2acb1b25 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_masks.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/bop_calc_masks.py @@ -1,32 +1,32 @@ import argparse -import glob import json -import os -import sys -from copy import deepcopy from pathlib import Path import numpy as np -import trimesh from bop_toolkit_lib import inout, misc, renderer, visibility parser = argparse.ArgumentParser() -parser.add_argument('--chunk-dir', type=str) -parser.add_argument('--shapenet-dir', type=str) -parser.add_argument('--gso-dir', type=str) -parser.add_argument('--renderer-type', type=str, default='cpp') -parser.add_argument('--overwrite-models', action='store_true') +parser.add_argument("--chunk-dir", type=str) +parser.add_argument("--shapenet-dir", type=str) +parser.add_argument("--gso-dir", type=str) +parser.add_argument("--renderer-type", type=str, default="cpp") +parser.add_argument("--overwrite-models", action="store_true") args = parser.parse_args() chunk_dir = Path(args.chunk_dir) -chunk_infos = json.loads((chunk_dir / 'chunk_infos.json').read_text()) -cam_infos_path = (chunk_dir / 'bop_data/camera.json') +chunk_infos = json.loads((chunk_dir / "chunk_infos.json").read_text()) +cam_infos_path = chunk_dir / "bop_data/camera.json" cam_infos = json.loads(cam_infos_path.read_text()) -scene_gt_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/scene_gt.json') -scene_gt_info_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/scene_gt_info.json') -depth_gt_info_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/depth/{im_id:06d}.png') -vis_mask_visib_tpath = (chunk_dir / 'bop_data/train_pbr/{scene_id:06d}/mask_visib/{im_id:06d}_{inst_id:06d}.png') +scene_gt_tpath = chunk_dir / "bop_data/train_pbr/{scene_id:06d}/scene_gt.json" +scene_gt_info_tpath = chunk_dir / "bop_data/train_pbr/{scene_id:06d}/scene_gt_info.json" +depth_gt_info_tpath = ( + chunk_dir / "bop_data/train_pbr/{scene_id:06d}/depth/{im_id:06d}.png" +) +vis_mask_visib_tpath = ( + chunk_dir + / "bop_data/train_pbr/{scene_id:06d}/mask_visib/{im_id:06d}_{inst_id:06d}.png" +) if args.shapenet_dir: shapenet_dir = Path(args.shapenet_dir) @@ -34,82 +34,96 @@ else: is_shapenet = False gso_dir = Path(args.gso_dir) -scale = chunk_infos['scale'] +scale = chunk_infos["scale"] -p = dict( - dataset=chunk_dir, - dataset_split='train_pbr', - dataset_split_type='train_pbr', +p = { + "dataset": chunk_dir, + "dataset_split": "train_pbr", + "dataset_split_type": "train_pbr", # renderer_type='python', - delta=15, -) -p['renderer_type'] = args.renderer_type + "delta": 15, +} +p["renderer_type"] = args.renderer_type # Initialize a renderer. -im_width, im_height = cam_infos['width'], cam_infos['height'] +im_width, im_height = cam_infos["width"], cam_infos["height"] ren_width, ren_height = 3 * im_width, 3 * im_height ren_cx_offset, ren_cy_offset = im_width, im_height -ren = renderer.create_renderer( - im_width, im_height, p['renderer_type'], - mode='depth') +ren = renderer.create_renderer(im_width, im_height, p["renderer_type"], mode="depth") -misc.log('Initializing renderer...') -obj_name_to_id = dict() -for obj_id, obj in enumerate(chunk_infos['scene_infos']['objects']): +misc.log("Initializing renderer...") +obj_name_to_id = {} +for obj_id, obj in enumerate(chunk_infos["scene_infos"]["objects"]): if is_shapenet: - synset_id, source_id = obj['synset_id'], obj['source_id'] - obj_name = obj['category_id'] - ply_path = Path(shapenet_dir) / f'{synset_id}/{source_id}' / 'models/model_normalized_scaled.ply' + synset_id, source_id = obj["synset_id"], obj["source_id"] + obj_name = obj["category_id"] + ply_path = ( + Path(shapenet_dir) + / f"{synset_id}/{source_id}" + / "models/model_normalized_scaled.ply" + ) else: - obj_name = obj['category_id'] - gso_id = obj_name.split('gso_')[1] - ply_path = Path(gso_dir) / f'{gso_id}' / 'meshes/model.ply' + obj_name = obj["category_id"] + gso_id = obj_name.split("gso_")[1] + ply_path = Path(gso_dir) / f"{gso_id}" / "meshes/model.ply" obj_name_to_id[obj_name] = obj_id ren.add_object(obj_id, str(ply_path)) scene_ids = [0] -misc.log(f'Processing scene ids: {scene_ids}') +misc.log(f"Processing scene ids: {scene_ids}") for scene_id in scene_ids: # Load scene info and ground-truth poses. - scene_dir = chunk_dir / f'bop_data/train_pbr/{scene_id:06d}' - scene_camera = inout.load_scene_camera(scene_dir / 'scene_camera.json') + scene_dir = chunk_dir / f"bop_data/train_pbr/{scene_id:06d}" + scene_camera = inout.load_scene_camera(scene_dir / "scene_camera.json") scene_gt = inout.load_scene_gt(str(scene_gt_tpath).format(scene_id=scene_id)) - mask_dir_path = str(scene_dir / 'mask') + mask_dir_path = str(scene_dir / "mask") misc.ensure_dir(mask_dir_path) - mask_visib_dir_path = str(scene_dir / 'mask_visib') + mask_visib_dir_path = str(scene_dir / "mask_visib") misc.ensure_dir(mask_visib_dir_path) scene_gt_info = {} im_ids = sorted(scene_gt.keys()) for im_counter, im_id in enumerate(im_ids): - depth_path = str(scene_dir / f'depth/{im_id:06d}.png') + depth_path = str(scene_dir / f"depth/{im_id:06d}.png") # 1. Calc masks if im_counter % 5 == 0: misc.log( - 'Calculating masks - dataset: {} ({}, {}), scene: {}, im: {}'.format( - p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, im_id)) - - K = scene_camera[im_id]['cam_K'] + "Calculating masks - dataset: {} ({}, {}), scene: {}, im: {}".format( + p["dataset"], + p["dataset_split"], + p["dataset_split_type"], + scene_id, + im_id, + ), + ) + + K = scene_camera[im_id]["cam_K"] fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Load depth image. depth_im = inout.load_depth(depth_path) - depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] + depth_im *= scene_camera[im_id]["depth_scale"] # to [mm] dist_im = misc.depth_im_to_dist_im_fast(depth_im, K) im_size = (depth_im.shape[1], depth_im.shape[0]) for gt_id, gt in enumerate(scene_gt[im_id]): - if gt['obj_id'] not in obj_name_to_id: + if gt["obj_id"] not in obj_name_to_id: continue # Render the depth image depth_gt = ren.render_object( - obj_name_to_id[gt['obj_id']], gt['cam_R_m2c'], gt['cam_t_m2c'], - fx, fy, cx, cy)['depth'] + obj_name_to_id[gt["obj_id"]], + gt["cam_R_m2c"], + gt["cam_t_m2c"], + fx, + fy, + cx, + cy, + )["depth"] # Convert depth image to distance image. dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K) @@ -119,11 +133,14 @@ # Mask of the visible part of the object silhouette. mask_visib = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, p['delta'], visib_mode='bop19') + dist_im, dist_gt, p["delta"], visib_mode="bop19", + ) # Save the calculated masks. - mask_path = str(Path(mask_dir_path) / f'{im_id:06d}_{gt_id:06d}.png') + mask_path = str(Path(mask_dir_path) / f"{im_id:06d}_{gt_id:06d}.png") inout.save_im(mask_path, 255 * mask.astype(np.uint8)) - mask_visib_path = str(Path(mask_visib_dir_path) / f'{im_id:06d}_{gt_id:06d}.png') + mask_visib_path = str( + Path(mask_visib_dir_path) / f"{im_id:06d}_{gt_id:06d}.png", + ) inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/distributed.py b/happypose/pose_estimators/megapose/src/megapose/scripts/distributed.py index e7a4aa4d..70841924 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/distributed.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/distributed.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import datetime import os @@ -126,8 +124,7 @@ def init_distributed_mode(): def reduce_dict(input_dict, average=True): - """ - https://github.com/pytorch/vision/blob/master/references/detection/utils.py + """https://github.com/pytorch/vision/blob/master/references/detection/utils.py Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/download.py b/happypose/pose_estimators/megapose/src/megapose/scripts/download.py index a5a19f5a..2dce595c 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/download.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/download.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import argparse import logging @@ -35,7 +33,11 @@ SYNT_REAL_DETECTORS, SYNT_REAL_REFINER, ) -from happypose.pose_estimators.megapose.src.megapose.config import BOP_DS_DIR, LOCAL_DATA_DIR, PROJECT_DIR +from happypose.pose_estimators.megapose.src.megapose.config import ( + BOP_DS_DIR, + LOCAL_DATA_DIR, + PROJECT_DIR, +) from happypose.toolbox.utils.logging import get_logger logger = get_logger(__name__) @@ -87,12 +89,12 @@ "ruapc": { "test_splits": [ "test_all", - ] + ], }, "tyol": { "test_splits": [ "test_all", - ] + ], }, } @@ -103,7 +105,9 @@ def main(): parser = argparse.ArgumentParser("Megapose download utility") parser.add_argument("--bop_dataset", default="", type=str, choices=BOP_DS_NAMES) parser.add_argument("--bop_src", default="bop", type=str, choices=["bop", "gdrive"]) - parser.add_argument("--bop_extra_files", default="", type=str, choices=["ycbv", "tless"]) + parser.add_argument( + "--bop_extra_files", default="", type=str, choices=["ycbv", "tless"], + ) parser.add_argument("--model", default="", type=str) parser.add_argument("--urdf_models", default="", type=str) parser.add_argument("--ycbv_compat_models", action="store_true") @@ -126,7 +130,8 @@ def main(): if args.bop_src == "bop": download_bop_original( args.bop_dataset, - args.pbr_training_images and BOP_DATASETS[args.bop_dataset].get("has_pbr", True), + args.pbr_training_images + and BOP_DATASETS[args.bop_dataset].get("has_pbr", True), args.train_splits, ) elif args.bop_src == "gdrive": @@ -135,30 +140,37 @@ def main(): if args.bop_extra_files: if args.bop_extra_files == "tless": # https://github.com/kirumang/Pix2Pose#download-pre-trained-weights - gdrive_download(f"bop_datasets/tless/all_target_tless.json", BOP_DS_DIR / "tless") + gdrive_download( + "bop_datasets/tless/all_target_tless.json", BOP_DS_DIR / "tless", + ) elif args.bop_extra_files == "ycbv": # Friendly names used with YCB-Video - gdrive_download(f"bop_datasets/ycbv/ycbv_friendly_names.txt", BOP_DS_DIR / "ycbv") + gdrive_download( + "bop_datasets/ycbv/ycbv_friendly_names.txt", BOP_DS_DIR / "ycbv", + ) # Offsets between YCB-Video and BOP (extracted from BOP readme) - gdrive_download(f"bop_datasets/ycbv/offsets.txt", BOP_DS_DIR / "ycbv") + gdrive_download("bop_datasets/ycbv/offsets.txt", BOP_DS_DIR / "ycbv") # Evaluation models for YCB-Video (used by other works) - gdrive_download(f"bop_datasets/ycbv/models_original", BOP_DS_DIR / "ycbv") + gdrive_download("bop_datasets/ycbv/models_original", BOP_DS_DIR / "ycbv") # Keyframe definition - gdrive_download(f"bop_datasets/ycbv/keyframe.txt", BOP_DS_DIR / "ycbv") + gdrive_download("bop_datasets/ycbv/keyframe.txt", BOP_DS_DIR / "ycbv") if args.urdf_models: gdrive_download(f"urdfs/{args.urdf_models}", LOCAL_DATA_DIR / "urdfs") if args.ycbv_compat_models: - gdrive_download(f"bop_datasets/ycbv/models_bop-compat", BOP_DS_DIR / "ycbv") - gdrive_download(f"bop_datasets/ycbv/models_bop-compat_eval", BOP_DS_DIR / "ycbv") + gdrive_download("bop_datasets/ycbv/models_bop-compat", BOP_DS_DIR / "ycbv") + gdrive_download( + "bop_datasets/ycbv/models_bop-compat_eval", BOP_DS_DIR / "ycbv", + ) if args.model: gdrive_download(f"experiments/{args.model}", LOCAL_DATA_DIR / "experiments") if args.detections: gdrive_download( - f"saved_detections/{args.detections}.pkl", LOCAL_DATA_DIR / "saved_detections" + f"saved_detections/{args.detections}.pkl", + LOCAL_DATA_DIR / "saved_detections", ) if args.result_id: @@ -166,23 +178,27 @@ def main(): if args.bop_result_id: csv_name = args.bop_result_id + ".csv" - gdrive_download(f"bop_predictions/{csv_name}", LOCAL_DATA_DIR / "bop_predictions") gdrive_download( - f"bop_eval_outputs/{args.bop_result_id}", LOCAL_DATA_DIR / "bop_predictions" + f"bop_predictions/{csv_name}", LOCAL_DATA_DIR / "bop_predictions", + ) + gdrive_download( + f"bop_eval_outputs/{args.bop_result_id}", LOCAL_DATA_DIR / "bop_predictions", ) if args.texture_dataset: gdrive_download("zip_files/textures.zip", DOWNLOAD_DIR) logger.info("Extracting textures ...") zipfile.ZipFile(DOWNLOAD_DIR / "textures.zip").extractall( - LOCAL_DATA_DIR / "texture_datasets" + LOCAL_DATA_DIR / "texture_datasets", ) if args.synt_dataset: zip_name = f"{args.synt_dataset}.zip" gdrive_download(f"zip_files/{zip_name}", DOWNLOAD_DIR) logger.info("Extracting textures ...") - zipfile.ZipFile(DOWNLOAD_DIR / zip_name).extractall(LOCAL_DATA_DIR / "synt_datasets") + zipfile.ZipFile(DOWNLOAD_DIR / zip_name).extractall( + LOCAL_DATA_DIR / "synt_datasets", + ) if args.all_bop20_models: for model_dict in ( @@ -218,7 +234,7 @@ def main(): def run_rclone(cmd, args, flags): - rclone_cmd = ["rclone", cmd] + args + flags + ["--config", str(RCLONE_CFG_PATH)] + rclone_cmd = ["rclone", cmd, *args, *flags] + ["--config", str(RCLONE_CFG_PATH)] logger.debug(" ".join(rclone_cmd)) subprocess.run(rclone_cmd) @@ -243,7 +259,9 @@ def download_bop_original(ds_name, download_pbr, download_train): if download_train: suffixes += BOP_DATASETS[ds_name].get("train_splits", []) for suffix in suffixes: - wget_download_and_extract(BOP_SRC + f"{ds_name}_{suffix}.zip", BOP_DS_DIR / ds_name) + wget_download_and_extract( + BOP_SRC + f"{ds_name}_{suffix}.zip", BOP_DS_DIR / ds_name, + ) def download_bop_gdrive(ds_name): diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/generate_shapenet_pbr.py b/happypose/pose_estimators/megapose/src/megapose/scripts/generate_shapenet_pbr.py index a4da96f6..37cfae4d 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/generate_shapenet_pbr.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/generate_shapenet_pbr.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,14 +19,14 @@ import os import shutil import subprocess -from copy import deepcopy from pathlib import Path -from re import I # Third Party import numpy as np import torch.distributed as dist -import yaml +from bop_toolkit_lib.dataset.convert_scenewise_to_imagewise import ( + convert_scene_to_imagewise, +) from colorama import Fore, Style from omegaconf import OmegaConf from tqdm import tqdm @@ -36,7 +35,6 @@ from happypose.pose_estimators.megapose.src.megapose.config import ( BLENDER_INSTALL_DIR, BLENDERPROC_DIR, - BOP_TOOLKIT_DIR, GSO_DIR, GSO_NORMALIZED_DIR, GSO_ORIG_DIR, @@ -49,7 +47,6 @@ # from happypose.toolbox.datasets.bop import BOPDataset from happypose.toolbox.datasets.gso_dataset import ( - GoogleScannedObjectDataset, make_gso_infos, ) @@ -58,21 +55,18 @@ ShapeNetObjectDataset, make_shapenet_infos, ) -from happypose.toolbox.datasets.web_scene_dataset import write_scene_ds_as_wds from happypose.toolbox.utils.distributed import ( get_rank, - get_tmp_dir, init_distributed_mode, ) from happypose.toolbox.utils.logging import get_logger -from bop_toolkit_lib.dataset.convert_scenewise_to_imagewise import convert_scene_to_imagewise logger = get_logger(__name__) CC_TEXTURE_FOLDER = str(LOCAL_DATA_DIR / "cctextures") VERBOSE_KWARGS = { - True: dict(stdout=None, stderr=None), - False: dict(stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL), + True: {"stdout": None, "stderr": None}, + False: {"stdout": subprocess.DEVNULL, "stderr": subprocess.DEVNULL}, } SHAPENET_ORIG_DIR = SHAPENET_DIR / "models_orig" SHAPENET_SCALED_DIR = SHAPENET_DIR / "models_bop-renderer_scale=0.1" @@ -105,7 +99,7 @@ def make_initializer(output_dir): "config": { "global": { "output_dir": str(output_dir), - } + }, }, } @@ -155,7 +149,7 @@ def make_box_scene(used_assets=[]): "location": [0, 0, 10], "scale": [3, 3, 1], }, - ] + ], }, }, { @@ -171,7 +165,12 @@ def make_box_scene(used_assets=[]): "min": [0.5, 0.5, 0.5, 1.0], "max": [1.0, 1.0, 1.0, 1.0], }, - "strength": {"provider": "sampler.Value", "type": "float", "min": 3, "max": 6}, + "strength": { + "provider": "sampler.Value", + "type": "float", + "min": 3, + "max": 6, + }, }, }, }, @@ -185,7 +184,10 @@ def make_box_scene(used_assets=[]): { "module": "manipulators.EntityManipulator", "config": { - "selector": {"provider": "getter.Entity", "conditions": {"name": "ground_plane.*"}}, + "selector": { + "provider": "getter.Entity", + "conditions": {"name": "ground_plane.*"}, + }, "mode": "once_for_all", "cf_randomize_materials": { "randomization_level": 1, @@ -200,7 +202,10 @@ def make_box_scene(used_assets=[]): { "module": "manipulators.EntityManipulator", "config": { - "selector": {"provider": "getter.Entity", "conditions": {"name": ".*plane.*"}}, + "selector": { + "provider": "getter.Entity", + "conditions": {"name": ".*plane.*"}, + }, "cp_physics": False, "cp_physics_collision_shape": "BOX", "cp_category_id": 333, @@ -309,7 +314,10 @@ def make_object_pose_sampler(): object_pose_sampler = { "module": "object.ObjectPoseSampler", "config": { - "objects_to_sample": {"provider": "getter.Entity", "conditions": {"cp_physics": True}}, + "objects_to_sample": { + "provider": "getter.Entity", + "conditions": {"cp_physics": True}, + }, "pos_sampler": { "provider": "sampler.Uniform3d", "min": { @@ -351,8 +359,8 @@ def make_light_sampler(radius_min=1, radius_max=1.5, energy=100): }, "type": "POINT", "energy": 100, - } - ] + }, + ], }, } return light_sampler @@ -430,7 +438,7 @@ def make_camera_sampler(cam_intrinsics, num_samples=25, radius_min=0.4, radius_m "max": 3.14159, }, }, - } + }, ], }, } @@ -452,7 +460,9 @@ def make_writer(depth_scale=0.1, ignore_dist_thresh=5.0): "append_to_existing_output": False, "depth_scale": depth_scale, "ignore_dist_thres": ignore_dist_thresh, - "postprocessing_modules": {"distance": [{"module": "postprocessing.Dist2Depth"}]}, + "postprocessing_modules": { + "distance": [{"module": "postprocessing.Dist2Depth"}], + }, }, } @@ -470,9 +480,9 @@ def make_script(output_dir, objects, textures, cfg, seed): [fx, 0, cx], [0, fy, cy], [0, 0, 1], - ] + ], ).tolist() - intrinsics = dict(cam_K=K, resolution_x=w, resolution_y=h) + intrinsics = {"cam_K": K, "resolution_x": w, "resolution_y": h} modules = [ make_initializer(output_dir), @@ -489,7 +499,7 @@ def make_script(output_dir, objects, textures, cfg, seed): ) elif obj["category_id"].startswith("gso"): modules += make_gso_loader( - obj_id=obj["obj_id"], scale=obj["scale"], category_id=obj["category_id"] + obj_id=obj["obj_id"], scale=obj["scale"], category_id=obj["category_id"], ) else: raise ValueError(obj) @@ -498,7 +508,9 @@ def make_script(output_dir, objects, textures, cfg, seed): make_material_randomization(), make_object_pose_sampler(), make_physics_positioning(), - make_light_sampler(radius_min=cfg.light_radius_min, radius_max=cfg.light_radius_max), + make_light_sampler( + radius_min=cfg.light_radius_min, radius_max=cfg.light_radius_max, + ), make_camera_sampler( cam_intrinsics=intrinsics, num_samples=cfg.camera_num_samples_per_chunk, @@ -523,7 +535,9 @@ def run_script(script, script_path, verbose=True): env["BLENDER_PROC_RANDOM_SEED"] = str(seed) run_path = BLENDERPROC_DIR / "run.py" subprocess.run( - [str(PYTHON_BIN_PATH), str(run_path), str(script_path)], env=env, **VERBOSE_KWARGS[verbose] + [str(PYTHON_BIN_PATH), str(run_path), str(script_path)], + env=env, + **VERBOSE_KWARGS[verbose], ) return @@ -531,7 +545,9 @@ def run_script(script, script_path, verbose=True): @MEMORY.cache def load_textures_names(): texture_names = [ - p.name for p in Path(CC_TEXTURE_FOLDER).iterdir() if len(list(p.glob("*2K_Color.jpg"))) > 0 + p.name + for p in Path(CC_TEXTURE_FOLDER).iterdir() + if len(list(p.glob("*2K_Color.jpg"))) > 0 ] return texture_names @@ -546,26 +562,26 @@ def make_one_scene_script(cfg, output_dir, seed): if len(synset.parents) == 0 and len(synset.models_descendants) > 0 ] objects = [] - for n in range(cfg.n_objects): + for _n in range(cfg.n_objects): synset = np_random.choice(main_synsets) source_id = np_random.choice(synset.models_descendants) - obj = dict( - synset_id=synset.synset_id, - source_id=source_id, - category_id=f"shapenet_{synset.synset_id}_{source_id}", - scale=[cfg.scale, cfg.scale, cfg.scale], - ) + obj = { + "synset_id": synset.synset_id, + "source_id": source_id, + "category_id": f"shapenet_{synset.synset_id}_{source_id}", + "scale": [cfg.scale, cfg.scale, cfg.scale], + } objects.append(obj) elif cfg.model_type == "gso": object_ids = make_gso_infos(GSO_NORMALIZED_DIR) objects = [] - for n in range(cfg.n_objects): + for _n in range(cfg.n_objects): obj_id = np_random.choice(object_ids) - obj = dict( - obj_id=obj_id, - category_id=f"gso_{obj_id}", - scale=[cfg.scale, cfg.scale, cfg.scale], - ) + obj = { + "obj_id": obj_id, + "category_id": f"gso_{obj_id}", + "scale": [cfg.scale, cfg.scale, cfg.scale], + } objects.append(obj) else: raise ValueError(cfg.model_type) @@ -574,12 +590,13 @@ def make_one_scene_script(cfg, output_dir, seed): this_scene_floor_textures = [np_random.choice(textures)] script = make_script(output_dir, objects, this_scene_floor_textures, cfg, seed) script["seed"] = seed - scene_infos = dict(objects=objects, floor_textures=this_scene_floor_textures, seed=seed) + scene_infos = { + "objects": objects, "floor_textures": this_scene_floor_textures, "seed": seed, + } return scene_infos, script def make_masks_and_gt_infos(chunk_dir, is_shapenet=True, verbose=True): - bop_toolkit_dir = BOP_TOOLKIT_DIR env = os.environ.copy() # env["PYTHONPATH"] = env.get("PYTHONPATH", "") + ":" + str(bop_toolkit_dir) # env["COSYPOSE_DIR"] = str(PROJECT_DIR) @@ -630,7 +647,7 @@ def make_dataset_cfg(cfg): cfg.n_scenes = 2 - cfg.hardware = dict() + cfg.hardware = {} cfg.hardware.world_size = int(os.environ.get("WORLD_SIZE", 1)) cfg.hardware.rank = int(os.environ.get("RANK", 0)) cfg.hardware.n_proc_per_gpu = 3 @@ -667,14 +684,17 @@ def make_dataset_cfg(cfg): if cfg.resume_dataset is not None: logger.info(f"{Fore.RED}Resuming {cfg.resume_dataset} {Style.RESET_ALL}") resume_cfg = OmegaConf.load( - LOCAL_DATA_DIR / "blender_pbr_datasets" / cfg.resume_dataset / "config.yaml" + LOCAL_DATA_DIR / "blender_pbr_datasets" / cfg.resume_dataset / "config.yaml", ) resume_cfg = OmegaConf.merge( - resume_cfg, OmegaConf.masked_copy(cfg, ["resume_dataset", "hardware", "verbose"]) + resume_cfg, + OmegaConf.masked_copy(cfg, ["resume_dataset", "hardware", "verbose"]), ) cfg = resume_cfg else: - logger.info(f"{Fore.GREEN}Recording dataset: {cfg.dataset_id} {Style.RESET_ALL}") + logger.info( + f"{Fore.GREEN}Recording dataset: {cfg.dataset_id} {Style.RESET_ALL}", + ) if cfg.debug: cfg.camera_num_samples_per_chunk = 5 @@ -703,13 +723,13 @@ def record_chunk(cfg, ds_dir, chunk_id): # Generate data with Blender run_script(script, script_path, verbose=cfg.verbose) - chunk_info = dict( - chunk_id=chunk_id, - script_path=str(script_path), - output_dir=str(output_dir), - scene_infos=scene_infos, - scale=cfg["scale"], - ) + chunk_info = { + "chunk_id": chunk_id, + "script_path": str(script_path), + "output_dir": str(output_dir), + "scene_infos": scene_infos, + "scale": cfg["scale"], + } gt_path = output_dir / f"bop_data/train_pbr/{0:06d}/scene_gt.json" gt = json.loads(gt_path.read_text()) for im_id, im_gt in gt.items(): @@ -721,13 +741,15 @@ def record_chunk(cfg, ds_dir, chunk_id): # Generate masks and gt infos success = make_masks_and_gt_infos( - output_dir, verbose=cfg.verbose, is_shapenet=cfg.model_type == "shapenet" + output_dir, verbose=cfg.verbose, is_shapenet=cfg.model_type == "shapenet", ) # Third Party if success: chunk_scene_dir = output_dir / f"bop_data/train_pbr/{0:06d}" convert_scene_to_imagewise( - chunk_scene_dir, ds_dir / "train_pbr_v2format", f"{chunk_id:06d}_" + "{image_id:06d}" + chunk_scene_dir, + ds_dir / "train_pbr_v2format", + f"{chunk_id:06d}_" + "{image_id:06d}", ) shutil.rmtree(output_dir) return @@ -783,7 +805,9 @@ def record_chunk(cfg, ds_dir, chunk_id): def find_chunks_to_record(cfg, chunk_ids): - this_chunk_ids = np.array_split(chunk_ids, cfg.hardware.world_size)[cfg.hardware.rank].tolist() + this_chunk_ids = np.array_split(chunk_ids, cfg.hardware.world_size)[ + cfg.hardware.rank + ].tolist() chunk_ids = [] for chunk_id in this_chunk_ids: if not (Path(cfg.ds_dir) / f"train_pbr/{chunk_id:06d}").exists(): @@ -793,15 +817,15 @@ def find_chunks_to_record(cfg, chunk_ids): def main(cli_cfg): cfg = OmegaConf.create( - dict( - dataset_id="test", - resume_dataset=None, - debug=False, - verbose=False, - overwrite=False, - few=False, - chunk_ids=None, - ) + { + "dataset_id": "test", + "resume_dataset": None, + "debug": False, + "verbose": False, + "overwrite": False, + "few": False, + "chunk_ids": None, + }, ) if cli_cfg is not None: cfg = OmegaConf.merge( @@ -824,7 +848,8 @@ def main(cli_cfg): elif cfg.overwrite: shutil.rmtree(cfg.ds_dir) else: - raise ValueError("There is already a dataset with this name") + msg = "There is already a dataset with this name" + raise ValueError(msg) if cfg.resume_dataset is None: ds_dir.mkdir(exist_ok=cfg.chunk_ids is not None) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_meshes.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_meshes.py index 2bbc6c5c..743ce674 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_meshes.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_meshes.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,14 +14,10 @@ """ - # Standard Library import shutil -import time from collections import defaultdict -from concurrent.futures import ProcessPoolExecutor as Pool from copy import deepcopy -from multiprocessing import Process, Queue from pathlib import Path # Third Party @@ -32,7 +27,6 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import ( - GSO_DIR, GSO_NORMALIZED_DIR, GSO_ORIG_DIR, GSO_POINTCLOUD_DIR, @@ -92,21 +86,20 @@ def rescale_mesh(mesh_path): vertices[:, 2] -= (zmax + zmin) / 2.0 vertices[:, :3] /= scale - out = elements["mtllib"][0] - + elements["mtllib"][0] - faces = elements["faces"] + elements["faces"] text = elements["mtllib"][0] text += "\n\n" for vertex_line in vertices.tolist(): - line = ["v"] + list(map(str, vertex_line)) + line = ["v", *list(map(str, vertex_line))] text += " ".join(line) text += "\n" text += "\n" for normal_line in normals.tolist(): - line = ["vn"] + list(map(str, normal_line)) + line = ["vn", *list(map(str, normal_line))] text += " ".join(line) text += "\n" @@ -140,7 +133,10 @@ def make_ply_scaled(obj_id, scale=SCALE): new_mesh_dir = Path(GSO_SCALED_DIR) / obj_id / "meshes" new_mesh_path = new_mesh_dir / "model.ply" mesh = trimesh.load( - str(mesh_dir / "model.obj"), skip_materials=True, process=False, maintain_order=True + str(mesh_dir / "model.obj"), + skip_materials=True, + process=False, + maintain_order=True, ) mesh = as_mesh(mesh) mesh.apply_scale(scale) @@ -155,7 +151,10 @@ def make_obj_pc(obj_id): new_mesh_dir = Path(GSO_POINTCLOUD_DIR) / obj_id / "meshes" new_mesh_path = new_mesh_dir / "model.obj" mesh = trimesh.load( - str(mesh_dir / "model.obj"), skip_materials=True, process=False, maintain_order=True + str(mesh_dir / "model.obj"), + skip_materials=True, + process=False, + maintain_order=True, ) mesh = as_mesh(mesh) points = trimesh.sample.sample_surface(mesh, n_points)[0] @@ -167,7 +166,7 @@ def make_obj_pc(obj_id): if __name__ == "__main__": trimesh.util.log.setLevel("ERROR") obj_dataset = make_object_dataset("gso.orig") - for n, obj in tqdm(enumerate(obj_dataset.objects)): + for _n, obj in tqdm(enumerate(obj_dataset.objects)): obj_id = obj["label"].split("gso_")[1] make_obj_normalized(obj_id) make_ply_scaled(obj_id) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_subsets.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_subsets.py index cb0a98c4..4ed6dfc3 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_subsets.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_gso_subsets.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,19 +14,15 @@ """ - # Standard Library import json -from pathlib import Path # Third Party import numpy as np import pandas as pd -from tqdm import tqdm # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import GSO_DIR -from happypose.toolbox.datasets.datasets_cfg import make_object_dataset def get_labels_split(statistics, max_model_mem_kb, max_tot_mem_kb): @@ -35,7 +30,9 @@ def get_labels_split(statistics, max_model_mem_kb, max_tot_mem_kb): print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) statistics = statistics.iloc[np.where(np.isfinite(statistics["tot_mem_kb"]))[0]] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) - statistics = statistics.iloc[np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0]] + statistics = statistics.iloc[ + np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0] + ] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) np_random = np.random.RandomState(0) @@ -63,76 +60,76 @@ def get_labels_split_max_objects(statistics, max_num_objects): statistics = pd.read_json(ds_stats_path) splits = [ - dict( - name="gso", - max_model_mem_kb=10e3, - max_num_objects=1000, - ), - dict( - name="shapenet_10mb_5k", - max_model_mem_kb=10e3, - max_num_objects=5000, - ), - dict( - name="shapenet_10mb_10k", - max_model_mem_kb=10e3, - max_num_objects=10000, - ), - dict( - name="shapenet_10mb_15k", - max_model_mem_kb=10e3, - max_num_objects=15000, - ), - dict( - name="shapenet_100mb_200gb", - max_model_mem_kb=100e3, - max_tot_mem_kb=200e6, - ), - dict( - name="shapenet_10mb_200gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=200e6, - ), - dict( - name="shapenet_10mb_50gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=50e6, - ), - dict( - name="shapenet_20mb_50gb", - max_model_mem_kb=20e3, - max_tot_mem_kb=50e6, - ), - dict( - name="shapenet_10mb_100gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=100e6, - ), - dict( - name="shapenet_10mb_32gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=32e6, - ), - dict( - name="shapenet_2mb_32gb", - max_model_mem_kb=2e3, - max_tot_mem_kb=32e6, - ), - dict( - name="shapenet_10mb_8gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=8e6, - ), - dict( - name="shapenet_10mb_1gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=1e6, - ), - dict( - name="shapenet_2mb_1gb", - max_model_mem_kb=2e3, - max_tot_mem_kb=1e6, - ), + { + "name": "gso", + "max_model_mem_kb": 10e3, + "max_num_objects": 1000, + }, + { + "name": "shapenet_10mb_5k", + "max_model_mem_kb": 10e3, + "max_num_objects": 5000, + }, + { + "name": "shapenet_10mb_10k", + "max_model_mem_kb": 10e3, + "max_num_objects": 10000, + }, + { + "name": "shapenet_10mb_15k", + "max_model_mem_kb": 10e3, + "max_num_objects": 15000, + }, + { + "name": "shapenet_100mb_200gb", + "max_model_mem_kb": 100e3, + "max_tot_mem_kb": 200e6, + }, + { + "name": "shapenet_10mb_200gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 200e6, + }, + { + "name": "shapenet_10mb_50gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 50e6, + }, + { + "name": "shapenet_20mb_50gb", + "max_model_mem_kb": 20e3, + "max_tot_mem_kb": 50e6, + }, + { + "name": "shapenet_10mb_100gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 100e6, + }, + { + "name": "shapenet_10mb_32gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 32e6, + }, + { + "name": "shapenet_2mb_32gb", + "max_model_mem_kb": 2e3, + "max_tot_mem_kb": 32e6, + }, + { + "name": "shapenet_10mb_8gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 8e6, + }, + { + "name": "shapenet_10mb_1gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 1e6, + }, + { + "name": "shapenet_2mb_1gb", + "max_model_mem_kb": 2e3, + "max_tot_mem_kb": 1e6, + }, ] for split in splits: diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_panda3d.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_panda3d.py index f96df5a4..960e1361 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_panda3d.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_panda3d.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,12 +14,10 @@ """ - # Standard Library import shutil import subprocess import time -from concurrent.futures import ProcessPoolExecutor as Pool from multiprocessing import Process from pathlib import Path @@ -46,12 +43,12 @@ def fix_normals(obj_path): is_block = False def make_new_block(): - return dict( - g="", - usemtl="", - f=[], - l=[], - ) + return { + "g": "", + "usemtl": "", + "f": [], + "l": [], + } for line in lines: if line.startswith("mtllib"): @@ -105,7 +102,9 @@ def make_new_block(): for line_f in block["f"]: face = line_f[3:].split(" ") face = [f.split("/") for f in face] - face_flipped = " ".join([f"{x[0]}/{x[1]}/{int(x[2])+n_vn_orig}" for x in face]) + face_flipped = " ".join( + [f"{x[0]}/{x[1]}/{int(x[2])+n_vn_orig}" for x in face], + ) f_flipped.append(f"f {face_flipped}") block["f"] += f_flipped @@ -143,9 +142,9 @@ def convert_obj_to_gltf(obj_path): print(n, obj_path) obj_path = Path(obj_path) new_obj = fix_normals(obj_path) - binormals_obj_path = Path((str(obj_path.with_suffix("")) + "_binormals.obj")) + binormals_obj_path = Path(str(obj_path.with_suffix("")) + "_binormals.obj") binormals_obj_path.write_text(new_obj) - proc = subprocess.run(["obj2gltf", "-i", str(binormals_obj_path)]) + subprocess.run(["obj2gltf", "-i", str(binormals_obj_path)]) gltf_path = binormals_obj_path.with_suffix(".gltf") p = Process(target=convert_gltf, args=(gltf_path,)) p.start() @@ -153,7 +152,9 @@ def convert_obj_to_gltf(obj_path): bam_path = gltf_path.with_suffix(".bam") bam_exists = bam_path.exists() if bam_exists: - new_models_dir = Path(str(obj_path.parent).replace("models_orig", "models_panda3d_bam")) + new_models_dir = Path( + str(obj_path.parent).replace("models_orig", "models_panda3d_bam"), + ) Path(new_models_dir).mkdir(exist_ok=True, parents=True) img_dir = obj_path.parent.parent / "images" new_img_dir = new_models_dir diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_ply_scaled.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_ply_scaled.py index 17793605..b8387836 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_ply_scaled.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_ply_scaled.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,12 +14,10 @@ """ - # Standard Library import shutil import time -from concurrent.futures import ProcessPoolExecutor as Pool -from multiprocessing import Process, Queue +from multiprocessing import Process from pathlib import Path # Third Party @@ -40,8 +37,12 @@ def make_ply_scaled(mesh_path, scale=SCALE): n, mesh_path = mesh_path mesh_path = Path(mesh_path) new_mesh_path = str(mesh_path.with_suffix("")) + "_scaled.ply" - new_mesh_path = new_mesh_path.replace("models_orig", MODELS_DIR_TEMPLATE.format(scale=scale)) - mesh = trimesh.load(str(mesh_path), skip_materials=True, process=False, maintain_order=True) + new_mesh_path = new_mesh_path.replace( + "models_orig", MODELS_DIR_TEMPLATE.format(scale=scale), + ) + mesh = trimesh.load( + str(mesh_path), skip_materials=True, process=False, maintain_order=True, + ) mesh = as_mesh(mesh) mesh.apply_scale(scale) mesh.apply_scale(1000) @@ -73,5 +74,4 @@ def make_ply_scaled_(mesh_path): for mesh_path in tqdm(mesh_paths): make_ply_scaled_(mesh_path) - time.sleep(60) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_pointclouds.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_pointclouds.py index 31dd1273..fa2ae450 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_pointclouds.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_pointclouds.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,12 +14,10 @@ """ - # Standard Library import shutil import time -from concurrent.futures import ProcessPoolExecutor as Pool -from multiprocessing import Process, Queue +from multiprocessing import Process from pathlib import Path # Third Party @@ -45,7 +42,11 @@ def make_obj_pc(mesh_path): new_mesh_path = new_mesh_path.replace("models_orig", SPLIT_NAME) mesh = trimesh.load( - mesh_path, group_material=False, process=False, skip_materials=True, maintain_order=True + mesh_path, + group_material=False, + process=False, + skip_materials=True, + maintain_order=True, ) mesh = as_mesh(mesh) points = trimesh.sample.sample_surface(mesh, n_points)[0] @@ -68,7 +69,8 @@ def make_obj_pc_(mesh_path): shutil.rmtree(TARGETS_MODEL_DIR) TARGETS_MODEL_DIR.mkdir() shutil.copy( - (SHAPENET_DIR / "models_orig" / "taxonomy.json"), TARGETS_MODEL_DIR / "taxonomy.json" + (SHAPENET_DIR / "models_orig" / "taxonomy.json"), + TARGETS_MODEL_DIR / "taxonomy.json", ) n_procs = 20 mesh_paths = [] diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_statistics.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_statistics.py index 71d304e8..547a3d4e 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_statistics.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_statistics.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,13 +14,11 @@ """ - # Standard Library import io import json -from concurrent.futures import ProcessPoolExecutor as Pool from contextlib import redirect_stdout -from multiprocessing import Process, Queue +from multiprocessing import Process from pathlib import Path # Third Party @@ -30,8 +27,10 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import SHAPENET_DIR +from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_scene_renderer import ( + App, +) from happypose.toolbox.datasets.datasets_cfg import make_object_dataset -from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_scene_renderer import App def measure_memory(gltf_path): @@ -60,9 +59,9 @@ def measure_memory(gltf_path): idx = [n for n, w in enumerate(l_) if w == "minimum"][0] mems.append(float(l_[idx + 1])) tot_mem_kb = sum(mems) - stats = dict( - tot_mem_kb=tot_mem_kb, - ) + stats = { + "tot_mem_kb": tot_mem_kb, + } (gltf_path.parent / "stats.json").write_text(json.dumps(stats)) return @@ -76,15 +75,15 @@ def measure_memory_(gltf_path): if __name__ == "__main__": panda3d_obj_dataset = make_object_dataset("shapenet.panda3d_bam") panda3d_map = {obj["label"]: obj for obj in panda3d_obj_dataset.objects} - panda3d_objects = set(list(panda3d_map.keys())) + panda3d_objects = set(panda3d_map.keys()) pc_obj_dataset = make_object_dataset("shapenet.pointcloud") pc_map = {obj["label"]: obj for obj in pc_obj_dataset.objects} - pc_objects = set(list(pc_map.keys())) + pc_objects = set(pc_map.keys()) vanilla_obj_dataset = make_object_dataset("shapenet.orig") - vanilla_objects = set([obj["label"] for obj in vanilla_obj_dataset.objects]) + vanilla_objects = {obj["label"] for obj in vanilla_obj_dataset.objects} stats = [] - for n, obj in enumerate(tqdm(vanilla_obj_dataset.objects)): - stats_ = dict() + for _n, obj in enumerate(tqdm(vanilla_obj_dataset.objects)): + stats_ = {} label = obj["label"] stats_["label"] = label stats_["has_pointcloud"] = label in pc_objects @@ -92,7 +91,11 @@ def measure_memory_(gltf_path): if stats_["has_panda3d"] and stats_["has_pointcloud"]: panda3d_obj_dir = Path(panda3d_map[label]["mesh_path"]).parent tot_mem_kb = sum( - [f.stat().st_size / 1024 for f in panda3d_obj_dir.iterdir() if f.is_file()] + [ + f.stat().st_size / 1024 + for f in panda3d_obj_dir.iterdir() + if f.is_file() + ], ) else: tot_mem_kb = np.nan diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_subsets.py b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_subsets.py index fe5fc3dc..c7994e78 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_subsets.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/make_shapenet_subsets.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import json from pathlib import Path @@ -27,15 +25,13 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import SHAPENET_DIR -from happypose.toolbox.datasets.datasets_cfg import make_object_dataset def read_all_stats(obj_dataset): - gltf_paths = [] statistics = [] - for n, obj in tqdm(enumerate(obj_dataset.objects)): + for _n, obj in tqdm(enumerate(obj_dataset.objects)): mesh_path = Path(obj["mesh_path"]) - infos = dict() + infos = {} stats_path = mesh_path.parent / "stats.json" pc_path = mesh_path.parent / "model_normalized_pointcloud.obj" infos["label"] = obj["label"] @@ -56,7 +52,9 @@ def get_labels_split(statistics, max_model_mem_kb, max_tot_mem_kb): print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) statistics = statistics.iloc[np.where(np.isfinite(statistics["tot_mem_kb"]))[0]] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) - statistics = statistics.iloc[np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0]] + statistics = statistics.iloc[ + np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0] + ] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) np_random = np.random.RandomState(0) @@ -72,7 +70,9 @@ def get_labels_split_max_objects(statistics, max_model_mem_kb, max_num_objects): print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) statistics = statistics.iloc[np.where(np.isfinite(statistics["tot_mem_kb"]))[0]] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) - statistics = statistics.iloc[np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0]] + statistics = statistics.iloc[ + np.where(statistics["tot_mem_kb"] <= max_model_mem_kb)[0] + ] print(len(statistics), np.nansum(statistics["tot_mem_kb"]) / 1e6) np_random = np.random.RandomState(0) @@ -90,101 +90,101 @@ def get_labels_split_max_objects(statistics, max_model_mem_kb, max_num_objects): statistics = pd.read_json(ds_stats_path) splits = [ - dict( - name="shapenet_10mb_100", - max_model_mem_kb=10e3, - max_num_objects=100, - ), - dict( - name="shapenet_10mb_1k", - max_model_mem_kb=10e3, - max_num_objects=1000, - ), - dict( - name="shapenet_10mb_2k", - max_model_mem_kb=10e3, - max_num_objects=2500, - ), - dict( - name="shapenet_10mb_5k", - max_model_mem_kb=10e3, - max_num_objects=5000, - ), - dict( - name="shapenet_10mb_10k", - max_model_mem_kb=10e3, - max_num_objects=10000, - ), - dict( - name="shapenet_10mb_15k", - max_model_mem_kb=10e3, - max_num_objects=15000, - ), - dict( - name="shapenet_10mb_20k", - max_model_mem_kb=10e3, - max_num_objects=20000, - ), - dict( - name="shapenet_100mb_200gb", - max_model_mem_kb=100e3, - max_tot_mem_kb=200e6, - ), - dict( - name="shapenet_10mb_200gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=200e6, - ), - dict( - name="shapenet_10mb_50gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=50e6, - ), - dict( - name="shapenet_20mb_50gb", - max_model_mem_kb=20e3, - max_tot_mem_kb=50e6, - ), - dict( - name="shapenet_10mb_100gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=100e6, - ), - dict( - name="shapenet_10mb_32gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=32e6, - ), - dict( - name="shapenet_2mb_32gb", - max_model_mem_kb=2e3, - max_tot_mem_kb=32e6, - ), - dict( - name="shapenet_10mb_8gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=8e6, - ), - dict( - name="shapenet_10mb_1gb", - max_model_mem_kb=10e3, - max_tot_mem_kb=1e6, - ), - dict( - name="shapenet_2mb_1gb", - max_model_mem_kb=2e3, - max_tot_mem_kb=1e6, - ), + { + "name": "shapenet_10mb_100", + "max_model_mem_kb": 10e3, + "max_num_objects": 100, + }, + { + "name": "shapenet_10mb_1k", + "max_model_mem_kb": 10e3, + "max_num_objects": 1000, + }, + { + "name": "shapenet_10mb_2k", + "max_model_mem_kb": 10e3, + "max_num_objects": 2500, + }, + { + "name": "shapenet_10mb_5k", + "max_model_mem_kb": 10e3, + "max_num_objects": 5000, + }, + { + "name": "shapenet_10mb_10k", + "max_model_mem_kb": 10e3, + "max_num_objects": 10000, + }, + { + "name": "shapenet_10mb_15k", + "max_model_mem_kb": 10e3, + "max_num_objects": 15000, + }, + { + "name": "shapenet_10mb_20k", + "max_model_mem_kb": 10e3, + "max_num_objects": 20000, + }, + { + "name": "shapenet_100mb_200gb", + "max_model_mem_kb": 100e3, + "max_tot_mem_kb": 200e6, + }, + { + "name": "shapenet_10mb_200gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 200e6, + }, + { + "name": "shapenet_10mb_50gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 50e6, + }, + { + "name": "shapenet_20mb_50gb", + "max_model_mem_kb": 20e3, + "max_tot_mem_kb": 50e6, + }, + { + "name": "shapenet_10mb_100gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 100e6, + }, + { + "name": "shapenet_10mb_32gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 32e6, + }, + { + "name": "shapenet_2mb_32gb", + "max_model_mem_kb": 2e3, + "max_tot_mem_kb": 32e6, + }, + { + "name": "shapenet_10mb_8gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 8e6, + }, + { + "name": "shapenet_10mb_1gb", + "max_model_mem_kb": 10e3, + "max_tot_mem_kb": 1e6, + }, + { + "name": "shapenet_2mb_1gb", + "max_model_mem_kb": 2e3, + "max_tot_mem_kb": 1e6, + }, ] for split in splits: if "max_num_objects" in split: labels = get_labels_split_max_objects( - statistics, split["max_model_mem_kb"], split["max_num_objects"] + statistics, split["max_model_mem_kb"], split["max_num_objects"], ) else: labels = get_labels_split( - statistics, split["max_model_mem_kb"], split["max_tot_mem_kb"] + statistics, split["max_model_mem_kb"], split["max_tot_mem_kb"], ) split_path = (ds_dir / "stats" / split["name"]).with_suffix(".json") split_path.write_text(json.dumps(labels)) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/run_full_megapose_eval.py b/happypose/pose_estimators/megapose/src/megapose/scripts/run_full_megapose_eval.py index 63baaafe..ed3114e9 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/run_full_megapose_eval.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/run_full_megapose_eval.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,28 +14,21 @@ """ - # Standard Library import copy import os from pathlib import Path -from typing import Dict, Optional, Tuple +from typing import Dict, Tuple # Third Party from omegaconf import OmegaConf # MegaPose from happypose.pose_estimators.megapose.src.megapose.bop_config import ( - PBR_COARSE, PBR_DETECTORS, - PBR_REFINER, - SYNT_REAL_COARSE, - SYNT_REAL_DETECTORS, - SYNT_REAL_REFINER, ) from happypose.pose_estimators.megapose.src.megapose.config import ( DEBUG_RESULTS_DIR, - EXP_DIR, MODELNET_TEST_CATEGORIES, RESULTS_DIR, ) @@ -46,8 +38,15 @@ FullEvalConfig, HardwareConfig, ) -from happypose.pose_estimators.megapose.src.megapose.evaluation.evaluation import generate_save_key, run_eval -from happypose.toolbox.utils.distributed import get_rank, get_world_size, init_distributed_mode +from happypose.pose_estimators.megapose.src.megapose.evaluation.evaluation import ( + generate_save_key, + run_eval, +) +from happypose.toolbox.utils.distributed import ( + get_rank, + get_world_size, + init_distributed_mode, +) from happypose.toolbox.utils.logging import get_logger, set_logging_level logger = get_logger(__name__) @@ -75,7 +74,9 @@ ] -MODELNET_TEST_DATASETS = [f"modelnet.{category}.test" for category in MODELNET_TEST_CATEGORIES] +MODELNET_TEST_DATASETS = [ + f"modelnet.{category}.test" for category in MODELNET_TEST_CATEGORIES +] def create_eval_cfg( @@ -84,7 +85,6 @@ def create_eval_cfg( coarse_estimation_type: str, ds_name: str, ) -> Tuple[str, EvalConfig]: - cfg = copy.deepcopy(cfg) cfg.inference.detection_type = detection_type @@ -100,7 +100,8 @@ def create_eval_cfg( elif detection_type == "gt": pass else: - raise ValueError(f"Unknown detector type {cfg.detector_type}") + msg = f"Unknown detector type {cfg.detector_type}" + raise ValueError(msg) name = generate_save_key(detection_type, coarse_estimation_type) @@ -108,7 +109,6 @@ def create_eval_cfg( def run_full_eval(cfg: FullEvalConfig) -> None: - bop_eval_cfgs = [] init_distributed_mode() @@ -122,17 +122,17 @@ def run_full_eval(cfg: FullEvalConfig) -> None: # Iterate over each dataset for ds_name in cfg.ds_names: - # create the EvalConfig objects that we will call `run_eval` on - eval_configs: Dict[str, EvalConfig] = dict() - for (detection_type, coarse_estimation_type) in cfg.detection_coarse_types: - name, cfg_ = create_eval_cfg(cfg, detection_type, coarse_estimation_type, ds_name) + eval_configs: Dict[str, EvalConfig] = {} + for detection_type, coarse_estimation_type in cfg.detection_coarse_types: + name, cfg_ = create_eval_cfg( + cfg, detection_type, coarse_estimation_type, ds_name, + ) eval_configs[name] = cfg_ # For each eval_cfg run the evaluation. # Note that the results get saved to disk - for save_key, eval_cfg in eval_configs.items(): - + for _save_key, eval_cfg in eval_configs.items(): # Run the inference if not cfg.skip_inference: eval_out = run_eval(eval_cfg) @@ -152,17 +152,16 @@ def run_full_eval(cfg: FullEvalConfig) -> None: } assert Path( - eval_out["results_path"] + eval_out["results_path"], ).is_file(), f"The file {eval_out['results_path']} doesn't exist" # Run the bop eval for each type of prediction if cfg.run_bop_eval and get_rank() == 0: - - bop_eval_keys = set(("refiner/final", "depth_refiner")) + bop_eval_keys = {"refiner/final", "depth_refiner"} bop_eval_keys = bop_eval_keys.intersection(set(eval_out["pred_keys"])) for method in bop_eval_keys: - if not "bop19" in ds_name: + if "bop19" not in ds_name: continue bop_eval_cfg = BOPEvalConfig( diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/run_inference_on_example.py b/happypose/pose_estimators/megapose/src/megapose/scripts/run_inference_on_example.py index d2a1be58..8f0948b7 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/run_inference_on_example.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/run_inference_on_example.py @@ -31,14 +31,14 @@ from happypose.toolbox.visualization.utils import make_contour_overlay # MegaPose -#from happypose.toolbox.datasets.object_dataset import RigidObject, RigidObjectDataset -#from happypose.toolbox.datasets.scene_dataset import CameraData, ObjectData - +# from happypose.toolbox.datasets.object_dataset import RigidObject, RigidObjectDataset +# from happypose.toolbox.datasets.scene_dataset import CameraData, ObjectData logger = get_logger(__name__) -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + def load_observation( example_dir: Path, @@ -51,7 +51,10 @@ def load_observation( depth = None if load_depth: - depth = np.array(Image.open(example_dir / "image_depth.png"), dtype=np.float32) / 1000 + depth = ( + np.array(Image.open(example_dir / "image_depth.png"), dtype=np.float32) + / 1000 + ) assert depth.shape[:2] == camera_data.resolution return rgb, depth, camera_data @@ -92,7 +95,9 @@ def make_object_dataset(example_dir: Path) -> RigidObjectDataset: assert not mesh_path, f"there multiple meshes in the {label} directory" mesh_path = fn assert mesh_path, f"couldnt find a obj or ply mesh for {label}" - rigid_objects.append(RigidObject(label=label, mesh_path=mesh_path, mesh_units=mesh_units)) + rigid_objects.append( + RigidObject(label=label, mesh_path=mesh_path, mesh_units=mesh_units), + ) # TODO: fix mesh units rigid_object_dataset = RigidObjectDataset(rigid_objects) return rigid_object_dataset @@ -120,7 +125,8 @@ def save_predictions( labels = pose_estimates.infos["label"] poses = pose_estimates.poses.cpu().numpy() object_data = [ - ObjectData(label=label, TWO=Transform(pose)) for label, pose in zip(labels, poses) + ObjectData(label=label, TWO=Transform(pose)) + for label, pose in zip(labels, poses) ] object_data_json = json.dumps([x.to_json() for x in object_data]) output_fn = example_dir / "outputs" / "object_data.json" @@ -134,11 +140,10 @@ def run_inference( example_dir: Path, model_name: str, ) -> None: - model_info = NAMED_MODELS[model_name] observation = load_observation_tensor( - example_dir, load_depth=model_info["requires_depth"] + example_dir, load_depth=model_info["requires_depth"], ) if torch.cuda.is_available(): observation.cuda() @@ -147,11 +152,11 @@ def run_inference( logger.info(f"Loading model {model_name}.") pose_estimator = load_named_model(model_name, object_dataset).to(device) - logger.info(f"Running inference.") + logger.info("Running inference.") output, _ = pose_estimator.run_inference_pipeline( - observation, detections=detections, **model_info["inference_parameters"] + observation, detections=detections, **model_info["inference_parameters"], ) - + save_predictions(example_dir, output) return @@ -159,7 +164,6 @@ def run_inference( def make_output_visualization( example_dir: Path, ) -> None: - rgb, _, camera_data = load_observation(example_dir, load_depth=False) camera_data.TWC = Transform(np.eye(4)) object_datas = load_object_data(example_dir / "outputs" / "object_data.json") @@ -167,7 +171,9 @@ def make_output_visualization( renderer = Panda3dSceneRenderer(object_dataset) - camera_data, object_datas = convert_scene_observation_to_panda3d(camera_data, object_datas) + camera_data, object_datas = convert_scene_observation_to_panda3d( + camera_data, object_datas, + ) light_datas = [ Panda3dLightData( light_type="ambient", @@ -189,10 +195,12 @@ def make_output_visualization( fig_rgb = plotter.plot_image(rgb) fig_mesh_overlay = plotter.plot_overlay(rgb, renderings.rgb) contour_overlay = make_contour_overlay( - rgb, renderings.rgb, dilate_iterations=1, color=(0, 255, 0) + rgb, renderings.rgb, dilate_iterations=1, color=(0, 255, 0), )["img"] fig_contour_overlay = plotter.plot_image(contour_overlay) - fig_all = gridplot([[fig_rgb, fig_contour_overlay, fig_mesh_overlay]], toolbar_location=None) + fig_all = gridplot( + [[fig_rgb, fig_contour_overlay, fig_mesh_overlay]], toolbar_location=None, + ) vis_dir = example_dir / "visualizations" vis_dir.mkdir(exist_ok=True) export_png(fig_mesh_overlay, filename=vis_dir / "mesh_overlay.png") @@ -218,7 +226,9 @@ def make_output_visualization( set_logging_level("info") parser = argparse.ArgumentParser() parser.add_argument("example_name") - parser.add_argument("--model", type=str, default="megapose-1.0-RGB-multi-hypothesis") + parser.add_argument( + "--model", type=str, default="megapose-1.0-RGB-multi-hypothesis", + ) parser.add_argument("--vis-detections", action="store_true") parser.add_argument("--run-inference", action="store_true") parser.add_argument("--vis-outputs", action="store_true") diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/run_megapose_training.py b/happypose/pose_estimators/megapose/src/megapose/scripts/run_megapose_training.py index 248ea4fa..0f02e621 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/run_megapose_training.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/run_megapose_training.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -48,14 +47,22 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.bop_config import BOP_CONFIG from happypose.pose_estimators.megapose.src.megapose.config import EXP_DIR -from happypose.pose_estimators.megapose.src.megapose.training.train_megapose import DatasetConfig, train_megapose -from happypose.pose_estimators.megapose.src.megapose.training.training_config import HardwareConfig, TrainingConfig +from happypose.pose_estimators.megapose.src.megapose.training.train_megapose import ( + DatasetConfig, + train_megapose, +) +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + HardwareConfig, + TrainingConfig, +) from happypose.toolbox.utils.logging import get_logger, set_logging_level logger = get_logger(__name__) -def train_on_bop_pbr_datasets(cfg: TrainingConfig, use_webdataset: bool = True) -> TrainingConfig: +def train_on_bop_pbr_datasets( + cfg: TrainingConfig, use_webdataset: bool = True, +) -> TrainingConfig: bop_names = ["lm", "tless", "itodd", "hb", "ycbv", "icbin", "tudl"] for bop_name in bop_names: bop_cfg = BOP_CONFIG[bop_name] @@ -80,7 +87,6 @@ def train_on_shapenet( ], remove_modelnet: bool = False, ) -> TrainingConfig: - if remove_modelnet: obj_filters.append("remove_modelnet") @@ -92,7 +98,7 @@ def train_on_shapenet( ds_name="webdataset." + ds_name, mesh_obj_ds_name=f"{obj_ds_name}.pointcloud", renderer_obj_ds_name=f"{obj_ds_name}.panda3d_bam", - ) + ), ) cfg.n_symmetries_batch = 1 return cfg @@ -103,7 +109,6 @@ def train_on_gso( ds_name: str = "gso_1M", n_objects: int = 940, ) -> TrainingConfig: - cfg.input_resize = (540, 720) obj_ds_name = f"gso.nobjects={n_objects}" cfg.train_datasets.append( @@ -111,7 +116,7 @@ def train_on_gso( ds_name="webdataset." + ds_name, mesh_obj_ds_name=f"{obj_ds_name}.pointcloud", renderer_obj_ds_name=f"{obj_ds_name}.normalized", - ) + ), ) cfg.n_symmetries_batch = 1 return cfg @@ -144,7 +149,7 @@ def make_coarse_cfg(cfg: TrainingConfig) -> TrainingConfig: def enable_depth_in_cfg(cfg: TrainingConfig) -> TrainingConfig: - """Adds flags for input depth + render depth to cfg""" + """Adds flags for input depth + render depth to cfg.""" cfg.depth_normalization_type = "tCR_scale_clamp_center" cfg.input_depth = True cfg.render_depth = True @@ -204,31 +209,33 @@ def train_on_gso_and_shapenet( elif config_id == "refiner-gso_shapenet-4views-normals-objects50p": cfg = make_refiner_cfg(cfg) cfg = train_on_gso_and_shapenet( - cfg, shapenet_obj_ds_name="shapenet_10mb_10k", gso_obj_ds_name="gso_500" + cfg, shapenet_obj_ds_name="shapenet_10mb_10k", gso_obj_ds_name="gso_500", ) elif config_id == "refiner-gso_shapenet-4views-normals-objects25p": cfg = make_refiner_cfg(cfg) cfg = train_on_gso_and_shapenet( - cfg, shapenet_obj_ds_name="shapenet_10mb_2k", gso_obj_ds_name="gso_250" + cfg, shapenet_obj_ds_name="shapenet_10mb_2k", gso_obj_ds_name="gso_250", ) elif config_id == "refiner-gso_shapenet-4views-normals-objects10p": cfg = make_refiner_cfg(cfg) cfg = train_on_gso_and_shapenet( - cfg, shapenet_obj_ds_name="shapenet_10mb_1k", gso_obj_ds_name="gso_100" + cfg, shapenet_obj_ds_name="shapenet_10mb_1k", gso_obj_ds_name="gso_100", ) elif config_id == "refiner-gso_shapenet-4views-normals-objects1p": cfg = make_refiner_cfg(cfg) cfg = train_on_gso_and_shapenet( - cfg, shapenet_obj_ds_name="shapenet_10mb_100", gso_obj_ds_name="gso_10" + cfg, shapenet_obj_ds_name="shapenet_10mb_100", gso_obj_ds_name="gso_10", ) elif config_id == "refiner-gso-4views-normals": cfg = make_refiner_cfg(cfg) - cfg = train_on_gso_and_shapenet(cfg, shapenet_obj_ds_name=None, gso_obj_ds_name="gso_940") + cfg = train_on_gso_and_shapenet( + cfg, shapenet_obj_ds_name=None, gso_obj_ds_name="gso_940", + ) elif config_id == "refiner-shapenet-4views-normals": cfg = make_refiner_cfg(cfg) cfg = train_on_gso_and_shapenet( - cfg, shapenet_obj_ds_name="shapenet_10mb_20k", gso_obj_ds_name=None + cfg, shapenet_obj_ds_name="shapenet_10mb_20k", gso_obj_ds_name=None, ) elif config_id == "refiner-gso_shapenet_nomodelnet-4views-normals": cfg = make_refiner_cfg(cfg) @@ -263,7 +270,8 @@ def train_on_gso_and_shapenet( cfg = train_on_gso_and_shapenet(cfg) else: - raise ValueError("Unknown config") + msg = "Unknown config" + raise ValueError(msg) if cfg.run_id is None: cfg.run_postfix = str(np.random.randint(int(1e6))) diff --git a/happypose/pose_estimators/megapose/src/megapose/scripts/test_distributed.py b/happypose/pose_estimators/megapose/src/megapose/scripts/test_distributed.py index 0667d07b..b16e99d4 100644 --- a/happypose/pose_estimators/megapose/src/megapose/scripts/test_distributed.py +++ b/happypose/pose_estimators/megapose/src/megapose/scripts/test_distributed.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import os diff --git a/happypose/pose_estimators/megapose/src/megapose/tests/__init__.py b/happypose/pose_estimators/megapose/src/megapose/tests/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/tests/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/tests/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/training/__init__.py b/happypose/pose_estimators/megapose/src/megapose/training/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/pose_estimators/megapose/src/megapose/training/detector_models_cfg.py b/happypose/pose_estimators/megapose/src/megapose/training/detector_models_cfg.py index 5fea06b4..8b887c0c 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/detector_models_cfg.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/detector_models_cfg.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,7 +15,9 @@ # MegaPose -from happypose.pose_estimators.megapose.src.megapose.models.mask_rcnn import DetectorMaskRCNN +from happypose.pose_estimators.megapose.src.megapose.models.mask_rcnn import ( + DetectorMaskRCNN, +) from happypose.toolbox.utils.logging import get_logger logger = get_logger(__name__) @@ -24,7 +25,9 @@ def check_update_config(cfg): obj_prefix = cfg.train_ds_names[0][0].split(".")[0] - cfg.label_to_category_id = {f"{obj_prefix}-{k}": v for k, v in cfg.label_to_category_id.items()} + cfg.label_to_category_id = { + f"{obj_prefix}-{k}": v for k, v in cfg.label_to_category_id.items() + } return cfg diff --git a/happypose/pose_estimators/megapose/src/megapose/training/megapose_forward_loss.py b/happypose/pose_estimators/megapose/src/megapose/training/megapose_forward_loss.py index b3eb417f..b2ec469a 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/megapose_forward_loss.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/megapose_forward_loss.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,6 +25,17 @@ from bokeh.layouts import gridplot from torch import nn +from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import ( + PosePredictor, +) +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + TrainingConfig, +) +from happypose.pose_estimators.megapose.src.megapose.training.utils import ( + cast, + cast_images, +) + # MegaPose from happypose.toolbox.datasets.pose_dataset import BatchPoseData from happypose.toolbox.lib3d.camera_geometry import ( @@ -38,9 +48,6 @@ from happypose.toolbox.lib3d.multiview import make_TCO_multiview from happypose.toolbox.lib3d.rigid_mesh_database import BatchedMeshes from happypose.toolbox.lib3d.transform_ops import add_noise, invert_transform_matrices -from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import PosePredictor -from happypose.pose_estimators.megapose.src.megapose.training.training_config import TrainingConfig -from happypose.pose_estimators.megapose.src.megapose.training.utils import cast, cast_images from happypose.toolbox.visualization.bokeh_plotter import BokehPlotter @@ -56,7 +63,6 @@ def megapose_forward_loss( train: bool = True, is_notebook: bool = False, ) -> torch.Tensor: - # Normalize RGB dims to be in [0,1] from [0,255] # Don't tamper with depth images = cast_images(rgb=data.rgbs, depth=data.depths) @@ -75,15 +81,19 @@ def megapose_forward_loss( torch.arange(batch_size, device=device).unsqueeze(1).repeat(1, cfg.n_hypotheses) ) hypotheses_labels = np.repeat( - np.expand_dims(np.array(labels_gt, dtype=object), axis=1), cfg.n_hypotheses, axis=1 + np.expand_dims(np.array(labels_gt, dtype=object), axis=1), + cfg.n_hypotheses, + axis=1, ).copy() if cfg.hypotheses_init_method == "coarse_z_up+auto-depth": assert cfg.n_hypotheses == 1 - points_3d = mesh_db.select(np.ravel(hypotheses_labels).tolist()).sample_points(200) + points_3d = mesh_db.select(np.ravel(hypotheses_labels).tolist()).sample_points( + 200, + ) TCO_init_zup = TCO_init_from_boxes_zup_autodepth(bboxes_gt, points_3d, K) TCO_init_zup = add_noise( - TCO_init_zup, euler_deg_std=[0, 0, 0], trans_std=[0.01, 0.01, 0.05] + TCO_init_zup, euler_deg_std=[0, 0, 0], trans_std=[0.01, 0.01, 0.05], ) hypotheses_TCO_init = TCO_init_zup.unsqueeze(1) is_hypothesis_positive = None @@ -106,7 +116,9 @@ def megapose_forward_loss( trans_std=cfg.init_trans_std, ) tOR = torch.zeros(batch_size, 3, device=device, dtype=dtype) - tCR = TCO_gt_noise[..., :3, [-1]] + TCO_gt_noise[..., :3, :3] @ tOR.unsqueeze(-1) + tCR = TCO_gt_noise[..., :3, [-1]] + TCO_gt_noise[..., :3, :3] @ tOR.unsqueeze( + -1, + ) tCR = tCR.squeeze(-1) TCV_O = make_TCO_multiview( TCO_gt_noise, @@ -121,7 +133,9 @@ def megapose_forward_loss( views_permutation = np.empty((2, batch_size, n_hypotheses), dtype=int) for b in range(batch_size): views_permutation[0, b, :] = b - views_permutation[1, b, :] = np.random.permutation(n_candidate_views)[:n_hypotheses] + views_permutation[1, b, :] = np.random.permutation(n_candidate_views)[ + :n_hypotheses + ] positive_idx = np.where(views_permutation[1, b] == 0)[0] is_hypothesis_positive[b, positive_idx] = 1 if len(positive_idx) == 0: @@ -152,7 +166,9 @@ def megapose_forward_loss( meshes = mesh_db.select(labels_gt) points = meshes.sample_points(cfg.n_points_loss) TCO_possible_gt = TCO_gt.unsqueeze(1) @ meshes.symmetries - TCO_possible_gt = TCO_possible_gt.unsqueeze(1).repeat(1, n_hypotheses, 1, 1, 1).flatten(0, 1) + TCO_possible_gt = ( + TCO_possible_gt.unsqueeze(1).repeat(1, n_hypotheses, 1, 1, 1).flatten(0, 1) + ) points = points.unsqueeze(1).repeat(1, n_hypotheses, 1, 1).flatten(0, 1) list_losses_pose = [] @@ -165,7 +181,10 @@ def megapose_forward_loss( loss_TCO_iter, loss_TCO_iter_data = None, None if cfg.predict_pose_update: - loss_TCO_iter, loss_TCO_iter_data = loss_refiner_CO_disentangled_reference_point( + ( + loss_TCO_iter, + loss_TCO_iter_data, + ) = loss_refiner_CO_disentangled_reference_point( TCO_possible_gt=TCO_possible_gt, points=points, TCO_input=iter_outputs.TCO_input, @@ -179,7 +198,7 @@ def megapose_forward_loss( if cfg.predict_rendered_views_logits: list_rendering_logits.append( - iter_outputs.renderings_logits.view(batch_size, n_hypotheses, -1) + iter_outputs.renderings_logits.view(batch_size, n_hypotheses, -1), ) time_render += iter_outputs.timing_dict["render"] @@ -198,7 +217,7 @@ def megapose_forward_loss( # Batch size x N hypotheses x N iterations loss_hypotheses = torch.zeros( - (batch_size, n_hypotheses, n_iterations), device=device, dtype=dtype + (batch_size, n_hypotheses, n_iterations), device=device, dtype=dtype, ) if cfg.predict_pose_update: losses_pose = torch.stack(list_losses_pose).permute(1, 2, 0) @@ -216,8 +235,12 @@ def megapose_forward_loss( rendering_logits.flatten(1, 3), torch.tensor(is_hypothesis_positive, dtype=torch.float, device=device), ).unsqueeze(-1) - meters["loss_renderings_confidence"].add(loss_renderings_confidence.mean().item()) - loss_hypotheses += cfg.loss_alpha_renderings_confidence * loss_renderings_confidence + meters["loss_renderings_confidence"].add( + loss_renderings_confidence.mean().item(), + ) + loss_hypotheses += ( + cfg.loss_alpha_renderings_confidence * loss_renderings_confidence + ) loss = loss_hypotheses.mean() @@ -226,10 +249,10 @@ def megapose_forward_loss( if make_visualization: def add_mask_to_image( - image: torch.Tensor, mask: torch.Tensor, color: str = "red" + image: torch.Tensor, mask: torch.Tensor, color: str = "red", ) -> torch.Tensor: t_color = torch.zeros_like(image) - idx = dict(red=0, green=1, blue=2)[color] + idx = {"red": 0, "green": 1, "blue": 2}[color] t_color[idx, mask > 0] = 1.0 output = image * 0.8 + t_color * 0.2 return output @@ -240,10 +263,16 @@ def add_mask_to_image( n_views = cfg.n_rendered_views last_iter_outputs = outputs[f"iteration={n_iterations}"] images_crop = last_iter_outputs.images_crop - images_crop = images_crop.view(batch_size, n_hypotheses, *images_crop.shape[-3:]) + images_crop = images_crop.view( + batch_size, n_hypotheses, *images_crop.shape[-3:], + ) renders = last_iter_outputs.renders renders = renders.view( - batch_size, n_hypotheses, n_views, renders.shape[1] // n_views, *renders.shape[-2:] + batch_size, + n_hypotheses, + n_views, + renders.shape[1] // n_views, + *renders.shape[-2:], ) KV_crop = last_iter_outputs.KV_crop @@ -278,16 +307,21 @@ def add_mask_to_image( TCO_ = TCV_O[[batch_idx], init_idx, view_idx] TCR_ = TCV_R[[batch_idx], init_idx, view_idx] - image_crop_ = add_mask_to_image(image_crop_[:3], image_crop_[-1]) - image_crop_ = add_mask_to_image(image_crop_[:3], render_[-1], "green") + image_crop_ = add_mask_to_image( + image_crop_[:3], render_[-1], "green", + ) f = plotter.plot_image(image_crop_) f.title.text = f"init of iteration {n_iterations}" row.append(f) n_channels = render_.shape[0] - ref_point_uv = project_points_robust(points_orig, KV_crop_, TCR_).flatten() - origin_uv = project_points_robust(points_orig, KV_crop_, TCO_).flatten() + ref_point_uv = project_points_robust( + points_orig, KV_crop_, TCR_, + ).flatten() + origin_uv = project_points_robust( + points_orig, KV_crop_, TCO_, + ).flatten() f = plotter.plot_image(render_[:3]) f.circle( [int(ref_point_uv[0])], @@ -295,15 +329,15 @@ def add_mask_to_image( color="red", ) f.circle( - [int(origin_uv[0])], [int(render_.shape[1] - origin_uv[1])], color="green" + [int(origin_uv[0])], + [int(render_.shape[1] - origin_uv[1])], + color="green", ) f.title.text = f"idx={batch_idx},view={view_idx},init={init_idx}" if cfg.predict_rendered_views_logits: assert is_hypothesis_positive is not None is_positive = is_hypothesis_positive[batch_idx, init_idx] - f.title.text = ( - f"idx={batch_idx},view={view_idx},init={init_idx},target={is_positive}" - ) + f.title.text = f"idx={batch_idx},view={view_idx},init={init_idx},target={is_positive}" row.append(f) if n_channels == 6: diff --git a/happypose/pose_estimators/megapose/src/megapose/training/pose_models_cfg.py b/happypose/pose_estimators/megapose/src/megapose/training/pose_models_cfg.py index ea653fa5..a032cea1 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/pose_models_cfg.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/pose_models_cfg.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,18 +15,24 @@ # Standard Library -from typing import Union # MegaPose # Backbones import happypose.pose_estimators.megapose.src.megapose.models.torchvision_resnet as models -from happypose.toolbox.lib3d.rigid_mesh_database import BatchedMeshes # Pose models -from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import PosePredictor -from happypose.pose_estimators.megapose.src.megapose.models.wide_resnet import WideResNet18, WideResNet34 +from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import ( + PosePredictor, +) +from happypose.pose_estimators.megapose.src.megapose.models.wide_resnet import ( + WideResNet18, + WideResNet34, +) +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + TrainingConfig, +) +from happypose.toolbox.lib3d.rigid_mesh_database import BatchedMeshes from happypose.toolbox.renderer.panda3d_batch_renderer import Panda3dBatchRenderer -from happypose.pose_estimators.megapose.src.megapose.training.training_config import TrainingConfig from happypose.toolbox.utils.logging import get_logger logger = get_logger(__name__) @@ -35,7 +40,6 @@ def check_update_config(cfg: TrainingConfig) -> TrainingConfig: """Useful for loading models previously trained with different configurations.""" - cfg.is_coarse_compat = False # Detect old coarse model definition if hasattr(cfg, "input_strategy") and cfg.input_strategy == "input=obs+one_render": @@ -99,13 +103,16 @@ def create_model_pose( # Assumes that if you are rendering depth you are also # inputting it from the model n_inputs = (n_channels + n_depth_channels) + ( - (n_channels + n_normals_channels + n_rendered_depth_channels) * cfg.n_rendered_views + (n_channels + n_normals_channels + n_rendered_depth_channels) + * cfg.n_rendered_views ) backbone_str = cfg.backbone_str render_size = (240, 320) if "vanilla_resnet34" == backbone_str: n_features = 512 - backbone = models.__dict__["resnet34"](num_classes=n_features, n_input_channels=n_inputs) + backbone = models.__dict__["resnet34"]( + num_classes=n_features, n_input_channels=n_inputs, + ) backbone.n_features = n_features elif "resnet34" == backbone_str: backbone = WideResNet34(n_inputs=n_inputs) @@ -115,7 +122,8 @@ def create_model_pose( width = int(backbone_str.split("resnet34_width=")[1]) backbone = WideResNet34(n_inputs=n_inputs, width=width) else: - raise ValueError("Unknown backbone", backbone_str) + msg = "Unknown backbone" + raise ValueError(msg, backbone_str) logger.debug(f"Backbone: {backbone_str}") backbone.n_inputs = n_inputs diff --git a/happypose/pose_estimators/megapose/src/megapose/training/train_megapose.py b/happypose/pose_estimators/megapose/src/megapose/training/train_megapose.py index 6cb7b597..c8e46327 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/train_megapose.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/train_megapose.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,8 +33,34 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import EXP_DIR -from happypose.toolbox.datasets.datasets_cfg import make_object_dataset, make_scene_dataset -from happypose.toolbox.datasets.object_dataset import RigidObjectDataset, concat_object_datasets +from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_batch_renderer import ( + Panda3dBatchRenderer, +) +from happypose.pose_estimators.megapose.src.megapose.training.megapose_forward_loss import ( + megapose_forward_loss, +) +from happypose.pose_estimators.megapose.src.megapose.training.pose_models_cfg import ( + check_update_config, + create_model_pose, +) +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + DatasetConfig, + TrainingConfig, +) +from happypose.pose_estimators.megapose.src.megapose.training.utils import ( + CudaTimer, + make_lr_ratio_function, + make_optimizer, + write_logs, +) +from happypose.toolbox.datasets.datasets_cfg import ( + make_object_dataset, + make_scene_dataset, +) +from happypose.toolbox.datasets.object_dataset import ( + RigidObjectDataset, + concat_object_datasets, +) from happypose.toolbox.datasets.pose_dataset import PoseDataset from happypose.toolbox.datasets.scene_dataset import ( IterableMultiSceneDataset, @@ -43,18 +68,11 @@ RandomIterableSceneDataset, SceneDataset, ) -from happypose.toolbox.datasets.web_scene_dataset import IterableWebSceneDataset, WebSceneDataset -from happypose.toolbox.lib3d.rigid_mesh_database import MeshDataBase -from happypose.pose_estimators.megapose.src.megapose.panda3d_renderer.panda3d_batch_renderer import Panda3dBatchRenderer -from happypose.pose_estimators.megapose.src.megapose.training.megapose_forward_loss import megapose_forward_loss -from happypose.pose_estimators.megapose.src.megapose.training.pose_models_cfg import check_update_config, create_model_pose -from happypose.pose_estimators.megapose.src.megapose.training.training_config import DatasetConfig, TrainingConfig -from happypose.pose_estimators.megapose.src.megapose.training.utils import ( - CudaTimer, - make_lr_ratio_function, - make_optimizer, - write_logs, +from happypose.toolbox.datasets.web_scene_dataset import ( + IterableWebSceneDataset, + WebSceneDataset, ) +from happypose.toolbox.lib3d.rigid_mesh_database import MeshDataBase from happypose.toolbox.utils.distributed import ( get_rank, get_world_size, @@ -65,7 +83,11 @@ ) from happypose.toolbox.utils.logging import get_logger from happypose.toolbox.utils.random import get_unique_seed, set_seed, temp_numpy_seed -from happypose.toolbox.utils.resources import get_cuda_memory, get_gpu_memory, get_total_memory +from happypose.toolbox.utils.resources import ( + get_cuda_memory, + get_gpu_memory, + get_total_memory, +) def worker_init_fn(worker_id: int) -> None: @@ -91,23 +113,27 @@ def train_megapose(cfg: TrainingConfig) -> None: cfg.global_batch_size = world_size * cfg.batch_size assert cfg.hardware.n_gpus == world_size - def split_objects_across_gpus(obj_dataset: RigidObjectDataset) -> RigidObjectDataset: + def split_objects_across_gpus( + obj_dataset: RigidObjectDataset, + ) -> RigidObjectDataset: rank, world_size = get_rank(), get_world_size() if cfg.split_objects_across_gpus: with temp_numpy_seed(0): this_rank_labels = set( np.array_split( - np.random.permutation(np.array([obj.label for obj in obj_dataset.objects])), + np.random.permutation( + np.array([obj.label for obj in obj_dataset.objects]), + ), world_size, - )[rank].tolist() + )[rank].tolist(), ) else: - this_rank_labels = set([obj.label for obj in renderer_obj_dataset.objects]) + this_rank_labels = {obj.label for obj in renderer_obj_dataset.objects} if cfg.n_max_objects is not None: this_rank_labels = set(list(this_rank_labels)[: cfg.n_max_objects]) obj_dataset = RigidObjectDataset( - [obj for obj in obj_dataset.objects if obj.label in this_rank_labels] + [obj for obj in obj_dataset.objects if obj.label in this_rank_labels], ) return obj_dataset @@ -116,15 +142,15 @@ def split_objects_across_gpus(obj_dataset: RigidObjectDataset) -> RigidObjectDat [ split_objects_across_gpus(make_object_dataset(ds_cfg.renderer_obj_ds_name)) for ds_cfg in cfg.train_datasets + cfg.val_datasets - ] + ], ) mesh_obj_dataset = concat_object_datasets( [ split_objects_across_gpus(make_object_dataset(ds_cfg.mesh_obj_ds_name)) for ds_cfg in cfg.train_datasets + cfg.val_datasets - ] + ], ) - this_rank_labels = set([obj.label for obj in renderer_obj_dataset.objects]) + this_rank_labels = {obj.label for obj in renderer_obj_dataset.objects} assert len(renderer_obj_dataset) == len(mesh_obj_dataset) logger.info(f"Number of objects to train on (this rank): {len(mesh_obj_dataset)})") @@ -142,7 +168,7 @@ def make_iterable_scene_dataset( if isinstance(ds, WebSceneDataset): assert not deterministic iterator: IterableSceneDataset = IterableWebSceneDataset( - ds, buffer_size=cfg.sample_buffer_size + ds, buffer_size=cfg.sample_buffer_size, ) else: assert isinstance(ds, SceneDataset) @@ -230,7 +256,7 @@ def make_iterable_scene_dataset( ckpt = torch.load(ckpt_path) except EOFError: print( - "Unable to load checkpoint.pth.tar. Falling back to checkpoint_epoch=last.pth.tar" + "Unable to load checkpoint.pth.tar. Falling back to checkpoint_epoch=last.pth.tar", ) ckpt_path = resume_run_dir / "checkpoint_epoch=last.pth.tar" ckpt = torch.load(ckpt_path) @@ -244,7 +270,9 @@ def make_iterable_scene_dataset( model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = sync_model(model) model = torch.nn.parallel.DistributedDataParallel( - model, device_ids=[torch.cuda.current_device()], output_device=torch.cuda.current_device() + model, + device_ids=[torch.cuda.current_device()], + output_device=torch.cuda.current_device(), ) optimizer = make_optimizer(model.parameters(), cfg) @@ -252,7 +280,9 @@ def make_iterable_scene_dataset( this_rank_epoch_size = cfg.epoch_size // get_world_size() this_rank_n_batch_per_epoch = this_rank_epoch_size // cfg.batch_size # NOTE: LR schedulers "epoch" actually correspond to "batch" - lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, make_lr_ratio_function(cfg)) + lr_scheduler = torch.optim.lr_scheduler.LambdaLR( + optimizer, make_lr_ratio_function(cfg), + ) lr_scheduler.last_epoch = ( # type: ignore start_epoch * this_rank_epoch_size // cfg.batch_size - 1 ) @@ -266,16 +296,26 @@ def make_iterable_scene_dataset( scaler = torch.cuda.amp.GradScaler() for epoch in range(start_epoch, cfg.n_epochs + 1): - meters_train: Dict[str, AverageValueMeter] = defaultdict(lambda: AverageValueMeter()) - meters_val: Dict[str, AverageValueMeter] = defaultdict(lambda: AverageValueMeter()) + meters_train: Dict[str, AverageValueMeter] = defaultdict( + lambda: AverageValueMeter(), + ) + meters_val: Dict[str, AverageValueMeter] = defaultdict( + lambda: AverageValueMeter(), + ) if cfg.add_iteration_epoch_interval is None: n_iterations = cfg.n_iterations else: - n_iterations = min(epoch // cfg.add_iteration_epoch_interval + 1, cfg.n_iterations) + n_iterations = min( + epoch // cfg.add_iteration_epoch_interval + 1, cfg.n_iterations, + ) forward_loss_fn = functools.partial( - megapose_forward_loss, model=model, cfg=cfg, n_iterations=n_iterations, mesh_db=mesh_db + megapose_forward_loss, + model=model, + cfg=cfg, + n_iterations=n_iterations, + mesh_db=mesh_db, ) def train() -> None: @@ -283,7 +323,9 @@ def train() -> None: set_seed(epoch * get_rank() + get_rank()) model.train() pbar = tqdm( - range(this_rank_n_batch_per_epoch), ncols=120, disable=cfg.logging_style != "tqdm" + range(this_rank_n_batch_per_epoch), + ncols=120, + disable=cfg.logging_style != "tqdm", ) for n in pbar: start_iter = time.time() @@ -293,7 +335,7 @@ def train() -> None: optimizer.zero_grad() - debug_dict: Dict[str, Any] = dict() + debug_dict: Dict[str, Any] = {} timer_forward = CudaTimer(enabled=cfg.cuda_timing) timer_forward.start() with torch.cuda.amp.autocast(): @@ -313,7 +355,7 @@ def train() -> None: scaler.scale(loss).backward() scaler.unscale_(optimizer) total_grad_norm = torch.nn.utils.clip_grad_norm_( - model.parameters(), max_norm=cfg.clip_grad_norm, norm_type=2 + model.parameters(), max_norm=cfg.clip_grad_norm, norm_type=2, ) meters["grad_norm"].add(torch.as_tensor(total_grad_norm).item()) @@ -327,14 +369,14 @@ def train() -> None: if n > 0: meters["time_iter"].add(time_iter) - infos = dict( - loss=f"{loss.item():.2e}", - tf=f"{timer_forward.elapsed():.3f}", - tb=f"{timer_backward.elapsed():.3f}", - tr=f"{time_render:.3f}", - td=f"{time_data:.3f}", - tt=f"{time_iter:.3f}", - ) + infos = { + "loss": f"{loss.item():.2e}", + "tf": f"{timer_forward.elapsed():.3f}", + "tb": f"{timer_backward.elapsed():.3f}", + "tr": f"{time_render:.3f}", + "td": f"{time_data:.3f}", + "tt": f"{time_iter:.3f}", + } infos["it/s"] = f"{1. / time_iter:.2f}" if not pbar.disable: pbar.set_postfix(**infos) @@ -360,7 +402,7 @@ def validation() -> None: iter_val = iter(ds_iter_val) n_batch = (cfg.val_size // get_world_size()) // cfg.batch_size pbar = tqdm(range(n_batch), ncols=120) - for n in pbar: + for _n in pbar: data = next(iter_val) loss = forward_loss_fn( data=data, @@ -375,7 +417,7 @@ def validation() -> None: if do_eval and ds_iter_val is not None: validation() - log_dict = dict() + log_dict = {} log_dict.update( { "grad_norm": meters_train["grad_norm"].mean, @@ -390,7 +432,7 @@ def validation() -> None: "time": time.time(), "n_iterations": epoch * cfg.epoch_size // cfg.batch_size, "n_datas": epoch * this_rank_n_batch_per_epoch * cfg.batch_size, - } + }, ) for string, meters in zip(("train", "val"), (meters_train, meters_val)): diff --git a/happypose/pose_estimators/megapose/src/megapose/training/training_config.py b/happypose/pose_estimators/megapose/src/megapose/training/training_config.py index 7d7a9163..237b7794 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/training_config.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/training_config.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,7 +46,7 @@ class TrainingConfig(omegaconf.dictconfig.DictConfig): Two options for creating a training configuration: 1. Create it manually, and set `run_id`. 2. If `run_id` is None, then use `config_id`, `run_comment`and - `run_postfix` to create a `run_id` + `run_postfix` to create a `run_id`. In 2., the parameters of the config are set-up using the function `update_cfg_with_config_id`. """ diff --git a/happypose/pose_estimators/megapose/src/megapose/training/utils.py b/happypose/pose_estimators/megapose/src/megapose/training/utils.py index d1fbd6d4..827ab212 100644 --- a/happypose/pose_estimators/megapose/src/megapose/training/utils.py +++ b/happypose/pose_estimators/megapose/src/megapose/training/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,12 +23,13 @@ import simplejson as json import torch from bokeh import document -from bokeh.io import export_png, save -from bokeh.io.export import get_screenshot_as_png +from bokeh.io import save from omegaconf import OmegaConf # MegaPose -from happypose.pose_estimators.megapose.src.megapose.training.training_config import TrainingConfig +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + TrainingConfig, +) from happypose.toolbox.utils.distributed import get_rank, get_world_size from happypose.toolbox.utils.logging import get_logger @@ -56,10 +56,12 @@ def cast_images(rgb: torch.Tensor, depth: Optional[torch.Tensor]) -> torch.Tenso """Convert rgb and depth to a single to cuda FloatTensor. Arguments: + --------- rgb: (bsz, 3, h, w) uint8 tensor, with values in [0, 1] depth: (bsz, h, w) float tensor, or None Returns: + ------- images: (bsz, 3, h, w) RGB or (bsz, 4, h, w) RGB-D images. """ rgb_tensor = cast(rgb).float() / 255 @@ -71,9 +73,10 @@ def cast_images(rgb: torch.Tensor, depth: Optional[torch.Tensor]) -> torch.Tenso def cast_tensor_image_to_numpy(images): - """Convert images to + """Convert images to. Args: + ---- images: [B,C,H,W] """ images = (images[:, :3] * 255).to(torch.uint8) @@ -83,10 +86,10 @@ def cast_tensor_image_to_numpy(images): def cast_raw_numpy_images_to_tensor(images): - """ - Casts numpy images to tensor. + """Casts numpy images to tensor. Args: + ---- images: [B,H,W,C] numpy array, RGB values in [0,255], depth in meters """ @@ -99,7 +102,10 @@ def cast_raw_numpy_images_to_tensor(images): max_rgb = torch.max(images[:, RGB_DIMS]) if max_rgb < 1.5: - raise Warning("You are about to divide by 255 but the max rgb pixel value is less than 1.5") + msg = "You are about to divide by 255 but the max rgb pixel value is less than 1.5" + raise Warning( + msg, + ) # [B,C,H,W] images = images.permute(0, 3, 1, 2).cuda().float() @@ -108,17 +114,19 @@ def cast_raw_numpy_images_to_tensor(images): def make_optimizer( - parameters: Iterator[torch.nn.Parameter], - cfg: TrainingConfig + parameters: Iterator[torch.nn.Parameter], cfg: TrainingConfig, ) -> torch.optim.Optimizer: - optimizer: Optional[torch.optim.Optimizer] = None if cfg.optimizer == "adam": optimizer = torch.optim.Adam( - parameters, lr=cfg.lr, weight_decay=cfg.weight_decay) + parameters, lr=cfg.lr, weight_decay=cfg.weight_decay, + ) elif cfg.optimizer == "sgd": optimizer = torch.optim.SGD( - parameters, lr=cfg.lr, momentum=cfg.sgd_momentum, weight_decay=cfg.weight_decay + parameters, + lr=cfg.lr, + momentum=cfg.sgd_momentum, + weight_decay=cfg.weight_decay, ) else: raise ValueError(cfg.optimizer) @@ -126,7 +134,6 @@ def make_optimizer( def make_lr_ratio_function(cfg: TrainingConfig) -> Callable: - def lr_ratio(batch: int) -> float: this_rank_epoch_size = cfg.epoch_size // get_world_size() n_batch_per_epoch = this_rank_epoch_size // cfg.batch_size @@ -179,7 +186,9 @@ def save_checkpoint(model, postfix=None): if cfg.vis_save_only_last: bokeh_doc_path = bokeh_doc_dir / f"epoch=last_{bokeh_doc_postfix}.html" else: - bokeh_doc_path = bokeh_doc_dir / f"epoch={epoch}_{bokeh_doc_postfix}.html" + bokeh_doc_path = ( + bokeh_doc_dir / f"epoch={epoch}_{bokeh_doc_postfix}.html" + ) if bokeh_doc_path.exists(): bokeh_doc_path.unlink() bokeh_doc = document.Document.from_json(bokeh_doc_json) @@ -256,9 +265,15 @@ def elapsed(self) -> float: return 0.0 if not self.start_called: - raise ValueError("You must call CudaTimer.start() before querying the elapsed time") + msg = "You must call CudaTimer.start() before querying the elapsed time" + raise ValueError( + msg, + ) if not self.end_called: - raise ValueError("You must call CudaTimer.end() before querying the elapsed time") + msg = "You must call CudaTimer.end() before querying the elapsed time" + raise ValueError( + msg, + ) return self.elapsed_sec diff --git a/happypose/pose_estimators/megapose/src/megapose/utils/__init__.py b/happypose/pose_estimators/megapose/src/megapose/utils/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/pose_estimators/megapose/src/megapose/utils/__init__.py +++ b/happypose/pose_estimators/megapose/src/megapose/utils/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/toolbox/datasets/augmentations.py b/happypose/toolbox/datasets/augmentations.py index 35e5db70..b4b63187 100644 --- a/happypose/toolbox/datasets/augmentations.py +++ b/happypose/toolbox/datasets/augmentations.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import dataclasses import random @@ -47,7 +45,9 @@ def __call__(self, obs: SceneObservation) -> SceneObservation: class SceneObservationAugmentation(SceneObservationTransform): def __init__( self, - transform: Union[SceneObservationTransform, List["SceneObservationAugmentation"]], + transform: Union[ + SceneObservationTransform, List["SceneObservationAugmentation"], + ], p: float = 1.0, ): self.p = p @@ -65,30 +65,40 @@ def __call__(self, obs: SceneObservation) -> SceneObservation: class PillowRGBTransform(SceneObservationTransform): - def __init__(self, pillow_fn: PIL.ImageEnhance._Enhance, factor_interval: Tuple[float, float]): + def __init__( + self, pillow_fn: PIL.ImageEnhance._Enhance, factor_interval: Tuple[float, float], + ): self.pillow_fn = pillow_fn self.factor_interval = factor_interval def __call__(self, obs: SceneObservation) -> SceneObservation: rgb_pil = PIL.Image.fromarray(obs.rgb) - rgb_pil = self.pillow_fn(rgb_pil).enhance(factor=random.uniform(*self.factor_interval)) + rgb_pil = self.pillow_fn(rgb_pil).enhance( + factor=random.uniform(*self.factor_interval), + ) obs = dataclasses.replace(obs, rgb=np.array(rgb_pil)) return obs class PillowSharpness(PillowRGBTransform): def __init__(self, factor_interval: Tuple[float, float] = (0.0, 50.0)): - super().__init__(pillow_fn=ImageEnhance.Sharpness, factor_interval=factor_interval) + super().__init__( + pillow_fn=ImageEnhance.Sharpness, factor_interval=factor_interval, + ) class PillowContrast(PillowRGBTransform): def __init__(self, factor_interval: Tuple[float, float] = (0.2, 50.0)): - super().__init__(pillow_fn=ImageEnhance.Contrast, factor_interval=factor_interval) + super().__init__( + pillow_fn=ImageEnhance.Contrast, factor_interval=factor_interval, + ) class PillowBrightness(PillowRGBTransform): def __init__(self, factor_interval: Tuple[float, float] = (0.1, 6.0)): - super().__init__(pillow_fn=ImageEnhance.Brightness, factor_interval=factor_interval) + super().__init__( + pillow_fn=ImageEnhance.Brightness, factor_interval=factor_interval, + ) class PillowColor(PillowRGBTransform): @@ -156,8 +166,12 @@ def _transform_depth(self, depth: np.ndarray) -> np.ndarray: ) small_H, small_W = (np.array([H, W]) / rescale_factor).astype(int) - additive_noise = np.random.normal(loc=0.0, scale=self.std_dev, size=(small_H, small_W)) - additive_noise = cv2.resize(additive_noise, (W, H), interpolation=cv2.INTER_CUBIC) + additive_noise = np.random.normal( + loc=0.0, scale=self.std_dev, size=(small_H, small_W), + ) + additive_noise = cv2.resize( + additive_noise, (W, H), interpolation=cv2.INTER_CUBIC, + ) depth[depth > 0] += additive_noise[depth > 0] depth = np.clip(depth, 0, np.finfo(np.float32).max) return depth @@ -178,7 +192,7 @@ def _transform_depth(self, depth: np.ndarray) -> np.ndarray: else: missing_fraction = self.max_missing_fraction dropout_ids = np.random.choice( - np.arange(len(u_idx)), int(missing_fraction * len(u_idx)), replace=False + np.arange(len(u_idx)), int(missing_fraction * len(u_idx)), replace=False, ) depth[v_idx[dropout_ids], u_idx[dropout_ids]] = 0 return depth @@ -207,15 +221,19 @@ def __init__( @staticmethod def generate_random_ellipses( - depth_img: np.ndarray, noise_params: Dict[str, float] + depth_img: np.ndarray, noise_params: Dict[str, float], ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: # Sample number of ellipses to dropout - num_ellipses_to_dropout = np.random.poisson(noise_params["ellipse_dropout_mean"]) + num_ellipses_to_dropout = np.random.poisson( + noise_params["ellipse_dropout_mean"], + ) # Sample ellipse centers - nonzero_pixel_indices = np.array(np.where(depth_img > 0)).T # Shape: [#nonzero_pixels x 2] + nonzero_pixel_indices = np.array( + np.where(depth_img > 0), + ).T # Shape: [#nonzero_pixels x 2] dropout_centers_indices = np.random.choice( - nonzero_pixel_indices.shape[0], size=num_ellipses_to_dropout + nonzero_pixel_indices.shape[0], size=num_ellipses_to_dropout, ) # Shape: [num_ellipses_to_dropout x 2] dropout_centers = nonzero_pixel_indices[dropout_centers_indices, :] @@ -237,7 +255,7 @@ def generate_random_ellipses( @staticmethod def dropout_random_ellipses( - depth_img: np.ndarray, noise_params: Dict[str, float] + depth_img: np.ndarray, noise_params: Dict[str, float], ) -> np.ndarray: """Randomly drop a few ellipses in the image for robustness. @@ -250,7 +268,6 @@ def dropout_random_ellipses( @param depth_img: a [H x W] set of depth z values """ - depth_img = depth_img.copy() ( @@ -259,7 +276,7 @@ def dropout_random_ellipses( angles, dropout_centers, ) = DepthEllipseDropoutTransform.generate_random_ellipses( - depth_img, noise_params=noise_params + depth_img, noise_params=noise_params, ) num_ellipses_to_dropout = x_radii.shape[0] @@ -314,12 +331,14 @@ def _transform_depth(self, depth: np.ndarray) -> np.ndarray: angles, dropout_centers, ) = DepthEllipseDropoutTransform.generate_random_ellipses( - depth_img, noise_params=self._noise_params + depth_img, noise_params=self._noise_params, ) num_ellipses_to_dropout = x_radii.shape[0] - additive_noise = np.random.normal(loc=0.0, scale=self.std_dev, size=x_radii.shape) + additive_noise = np.random.normal( + loc=0.0, scale=self.std_dev, size=x_radii.shape, + ) # Dropout ellipses noise = np.zeros_like(depth) @@ -444,7 +463,12 @@ def __call__(self, obs: SceneObservation) -> SceneObservation: x0, y0 = w / 2, h / 2 crop_box_size = (crop_h, w) crop_h, crop_w = min(crop_box_size), max(crop_box_size) - x1, y1, x2, y2 = x0 - crop_w / 2, y0 - crop_h / 2, x0 + crop_w / 2, y0 + crop_h / 2 + x1, y1, x2, y2 = ( + x0 - crop_w / 2, + y0 - crop_h / 2, + x0 + crop_w / 2, + y0 + crop_h / 2, + ) box = (x1, y1, x2, y2) rgb_pil = rgb_pil.crop(box) segmentation_pil = segmentation_pil.crop(box) @@ -463,9 +487,13 @@ def __call__(self, obs: SceneObservation) -> SceneObservation: w, h = rgb_pil.size w_resize, h_resize = max(self.resize), min(self.resize) rgb_pil = rgb_pil.resize((w_resize, h_resize), resample=PIL.Image.BILINEAR) - segmentation_pil = segmentation_pil.resize((w_resize, h_resize), resample=PIL.Image.NEAREST) + segmentation_pil = segmentation_pil.resize( + (w_resize, h_resize), resample=PIL.Image.NEAREST, + ) if depth_pil is not None: - depth_pil = depth_pil.resize((w_resize, h_resize), resample=PIL.Image.NEAREST) + depth_pil = depth_pil.resize( + (w_resize, h_resize), resample=PIL.Image.NEAREST, + ) box = (0, 0, w, h) new_K = get_K_crop_resize( torch.tensor(new_K).unsqueeze(0), @@ -488,7 +516,10 @@ def __call__(self, obs: SceneObservation) -> SceneObservation: for obj in obs.object_datas: if obj.unique_id in dets_gt: new_obj = dataclasses.replace( - obj, bbox_modal=dets_gt[obj.unique_id], bbox_amodal=None, visib_fract=None + obj, + bbox_modal=dets_gt[obj.unique_id], + bbox_amodal=None, + visib_fract=None, ) new_object_datas.append(new_obj) new_obs.object_datas = new_object_datas diff --git a/happypose/toolbox/datasets/bop_object_datasets.py b/happypose/toolbox/datasets/bop_object_datasets.py index d044bc85..ec7df61a 100644 --- a/happypose/toolbox/datasets/bop_object_datasets.py +++ b/happypose/toolbox/datasets/bop_object_datasets.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import json from pathlib import Path @@ -23,12 +21,12 @@ # Third Party import numpy as np -# MegaPose -from happypose.toolbox.lib3d.symmetries import ContinuousSymmetry, DiscreteSymmetry - # Local Folder from happypose.toolbox.datasets.object_dataset import RigidObject, RigidObjectDataset +# MegaPose +from happypose.toolbox.lib3d.symmetries import ContinuousSymmetry, DiscreteSymmetry + class BOPObjectDataset(RigidObjectDataset): def __init__(self, ds_dir: Path, label_format: str = "{label}"): diff --git a/happypose/toolbox/datasets/bop_scene_dataset.py b/happypose/toolbox/datasets/bop_scene_dataset.py index 3062bccc..deacc8af 100644 --- a/happypose/toolbox/datasets/bop_scene_dataset.py +++ b/happypose/toolbox/datasets/bop_scene_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -67,15 +66,14 @@ def build_index_and_annotations( save_file_annotations=None, make_per_view_annotations=True, ): - scene_ids, view_ids = [], [] - annotations = dict() + annotations = {} base_dir = ds_dir / split for scene_dir in tqdm(base_dir.iterdir()): scene_id = scene_dir.name - annotations_scene = dict() + annotations_scene = {} for f in ("scene_camera.json", "scene_gt_info.json", "scene_gt.json"): path = scene_dir / f if path.exists(): @@ -87,7 +85,7 @@ def build_index_and_annotations( scene_annotation = annotations_scene for view_id in scene_annotation["scene_camera"].keys(): if make_per_view_annotations: - this_annotation = dict() + this_annotation = {} this_annotation["camera"] = scene_annotation["scene_camera"][ str(view_id) ] @@ -99,7 +97,7 @@ def build_index_and_annotations( annotation_dir = base_dir / scene_id / "per_view_annotations" annotation_dir.mkdir(exist_ok=True) (annotation_dir / f"view={view_id}.json").write_text( - json.dumps(this_annotation) + json.dumps(this_annotation), ) scene_ids.append(int(scene_id)) view_ids.append(int(view_id)) @@ -192,7 +190,7 @@ def data_from_bop_obs( class BOPDataset(SceneDataset): """Read a dataset in the BOP format. - See https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md + See https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md. # TODO: Document whats happening with the per-view annotations. # TODO: Remove per-view annotations, recommend using WebDataset for performance ? @@ -208,7 +206,6 @@ def __init__( allow_cache: bool = False, per_view_annotations: bool = False, ): - self.ds_dir = ds_dir assert ds_dir.exists(), "Dataset does not exists." @@ -232,7 +229,7 @@ def __init__( self.annotations = pickle.loads(save_file_annotations.read_bytes()) else: frame_index, self.annotations = build_index_and_annotations( - ds_dir, split, make_per_view_annotations=per_view_annotations + ds_dir, split, make_per_view_annotations=per_view_annotations, ) self.use_raw_object_id = use_raw_object_id @@ -245,7 +242,7 @@ def __init__( ) def _load_scene_observation( - self, image_infos: ObservationInfos + self, image_infos: ObservationInfos, ) -> SceneObservation: scene_id, view_id = image_infos.scene_id, image_infos.view_id view_id = int(view_id) @@ -257,7 +254,7 @@ def _load_scene_observation( # TODO: Also change the pandas numpy arrays to np.string_ instead of np.object # See https://github.com/pytorch/pytorch/issues/13246#issuecomment-905703662 this_annotation_path = ( - scene_dir / "per_view_annotations" / f"view={str(view_id)}.json" + scene_dir / "per_view_annotations" / f"view={view_id!s}.json" ) if this_annotation_path.exists(): this_annotation = json.loads(this_annotation_path.read_text()) @@ -357,8 +354,8 @@ def _load_scene_observation( for n in range(n_objects): binary_mask_n = np.array( Image.open( - scene_dir / "mask_visib" / f"{view_id_str}_{n:06d}.png" - ) + scene_dir / "mask_visib" / f"{view_id_str}_{n:06d}.png", + ), ) segmentation[binary_mask_n == 255] = n + 1 diff --git a/happypose/toolbox/datasets/datasets_cfg.py b/happypose/toolbox/datasets/datasets_cfg.py index 48012698..a6746373 100644 --- a/happypose/toolbox/datasets/datasets_cfg.py +++ b/happypose/toolbox/datasets/datasets_cfg.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import json from typing import List, Optional, Tuple @@ -24,10 +22,6 @@ import numpy as np import pandas as pd -# HappyPose -from happypose.toolbox.datasets.object_dataset import RigidObjectDataset -from happypose.toolbox.datasets.scene_dataset import SceneDataset - # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import ( BOP_DS_DIR, @@ -44,6 +38,10 @@ from happypose.toolbox.datasets.deepim_modelnet import DeepImModelNetDataset from happypose.toolbox.datasets.gso_dataset import GoogleScannedObjectDataset from happypose.toolbox.datasets.modelnet_object_dataset import ModelNetObjectDataset + +# HappyPose +from happypose.toolbox.datasets.object_dataset import RigidObjectDataset +from happypose.toolbox.datasets.scene_dataset import SceneDataset from happypose.toolbox.datasets.shapenet_object_dataset import ShapeNetObjectDataset from happypose.toolbox.datasets.urdf_dataset import UrdfDataset from happypose.toolbox.datasets.web_scene_dataset import WebSceneDataset @@ -58,7 +56,9 @@ def keep_bop19(ds: SceneDataset) -> SceneDataset: targets = pd.read_json(ds.ds_dir / "test_targets_bop19.json") targets = remap_bop_targets(targets) targets = targets.loc[:, ["scene_id", "view_id"]].drop_duplicates() - index = ds.frame_index.merge(targets, on=["scene_id", "view_id"]).reset_index(drop=True) + index = ds.frame_index.merge(targets, on=["scene_id", "view_id"]).reset_index( + drop=True, + ) assert len(index) == len(targets) ds.frame_index = index return ds @@ -69,11 +69,12 @@ def make_scene_dataset( load_depth: bool = False, n_frames: Optional[int] = None, ) -> SceneDataset: - # BOP challenge splits if ds_name == "hb.bop19": ds_dir = BOP_DS_DIR / "hb" - ds: SceneDataset = BOPDataset(ds_dir, split="test_primesense", label_format="hb-{label}") + ds: SceneDataset = BOPDataset( + ds_dir, split="test_primesense", label_format="hb-{label}", + ) ds = keep_bop19(ds) elif ds_name == "icbin.bop19": ds_dir = BOP_DS_DIR / "icbin" @@ -126,29 +127,31 @@ def make_scene_dataset( elif ds_name == "ycbv.train.real": ds_dir = BOP_DS_DIR / "ycbv" ds = BOPDataset(ds_dir, split="train_real", label_format="ycbv-{label}") - elif ds_name == 'ycbv.train.synt': - ds_dir = BOP_DS_DIR / 'ycbv' - ds = BOPDataset(ds_dir, split='train_synt', label_format="ycbv-{label}") + elif ds_name == "ycbv.train.synt": + ds_dir = BOP_DS_DIR / "ycbv" + ds = BOPDataset(ds_dir, split="train_synt", label_format="ycbv-{label}") elif ds_name == "ycbv.real.train": ds_dir = BOP_DS_DIR / "ycbv" ds = BOPDataset(ds_dir, split="train_real", label_format="ycbv-{label}") - elif ds_name == 'ycbv.synt.train': - ds_dir = BOP_DS_DIR / 'ycbv' + elif ds_name == "ycbv.synt.train": + ds_dir = BOP_DS_DIR / "ycbv" ds = BOPDataset(ds_dir, split="train_synt", label_format="ycbv-{label}") elif ds_name == "ycbv.test": ds_dir = BOP_DS_DIR / "ycbv" ds = BOPDataset(ds_dir, split="test", label_format="ycbv-{label}") - elif ds_name == 'ycbv.test.keyframes': - ds_dir = BOP_DS_DIR / 'ycbv' - ds = BOPDataset(ds_dir, split='test', label_format="ycbv-{label}") - keyframes_path = ds_dir / 'keyframe.txt' - ls = keyframes_path.read_text().split('\n')[:-1] + elif ds_name == "ycbv.test.keyframes": + ds_dir = BOP_DS_DIR / "ycbv" + ds = BOPDataset(ds_dir, split="test", label_format="ycbv-{label}") + keyframes_path = ds_dir / "keyframe.txt" + ls = keyframes_path.read_text().split("\n")[:-1] frame_index = ds.frame_index ids = [] for l_n in ls: - scene_id, view_id = l_n.split('/') + scene_id, view_id = l_n.split("/") scene_id, view_id = int(scene_id), int(view_id) - mask = (frame_index['scene_id'] == scene_id) & (frame_index['view_id'] == view_id) + mask = (frame_index["scene_id"] == scene_id) & ( + frame_index["view_id"] == view_id + ) ids.append(np.where(mask)[0].item()) ds.frame_index = frame_index.iloc[ids].reset_index(drop=True) elif ds_name == "lmo.test": @@ -193,7 +196,15 @@ def make_scene_dataset( n_objects = ( 30 if category - in {"bathtub", "bookshelf", "guitar", "range_hood", "sofa", "wardrobe", "tv_stand"} + in { + "bathtub", + "bookshelf", + "guitar", + "range_hood", + "sofa", + "wardrobe", + "tv_stand", + } else 50 ) ds = DeepImModelNetDataset( @@ -210,14 +221,18 @@ def make_scene_dataset( ds = WebSceneDataset(WDS_DS_DIR / ds_name) # Synthetic datasets - elif 'synthetic.' in ds_name: - from happypose.pose_estimators.cosypose.cosypose.datasets.synthetic_dataset import SyntheticSceneDataset - assert '.train' in ds_name or '.val' in ds_name - is_train = 'train' in ds_name.split('.')[-1] - ds_name = ds_name.split('.')[1] - print("ds_name synthetic =", ds_name) - ds = SyntheticSceneDataset(ds_dir=LOCAL_DATA_DIR / 'synt_datasets' / ds_name, train=is_train) + elif "synthetic." in ds_name: + from happypose.pose_estimators.cosypose.cosypose.datasets.synthetic_dataset import ( + SyntheticSceneDataset, + ) + assert ".train" in ds_name or ".val" in ds_name + is_train = "train" in ds_name.split(".")[-1] + ds_name = ds_name.split(".")[1] + print("ds_name synthetic =", ds_name) + ds = SyntheticSceneDataset( + ds_dir=LOCAL_DATA_DIR / "synt_datasets" / ds_name, train=is_train, + ) else: raise ValueError(ds_name) @@ -233,12 +248,16 @@ def make_object_dataset(ds_name: str) -> RigidObjectDataset: # BOP original models if ds_name == "tless.cad": ds: RigidObjectDataset = BOPObjectDataset( - BOP_DS_DIR / "tless/models_cad", label_format="tless-{label}" + BOP_DS_DIR / "tless/models_cad", label_format="tless-{label}", ) elif ds_name == "tless.eval": - ds = BOPObjectDataset(BOP_DS_DIR / "tless/models_eval", label_format="tless-{label}") + ds = BOPObjectDataset( + BOP_DS_DIR / "tless/models_eval", label_format="tless-{label}", + ) elif ds_name == "tless.reconst": - ds = BOPObjectDataset(BOP_DS_DIR / "tless/models_reconst", label_format="tless-{label}") + ds = BOPObjectDataset( + BOP_DS_DIR / "tless/models_reconst", label_format="tless-{label}", + ) elif ds_name == "ycbv": ds = BOPObjectDataset(BOP_DS_DIR / "ycbv/models", label_format="ycbv-{label}") elif ds_name == "hb": @@ -261,25 +280,45 @@ def make_object_dataset(ds_name: str) -> RigidObjectDataset: # BOP models converted for Panda3D # TODO: Is this necessary ? elif ds_name == "hb.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "hb/models", label_format="hb-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "hb/models", label_format="hb-{label}", + ) elif ds_name == "icbin.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "icbin/models", label_format="icbin-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "icbin/models", label_format="icbin-{label}", + ) elif ds_name == "itodd.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "itodd/models", label_format="itodd-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "itodd/models", label_format="itodd-{label}", + ) elif ds_name == "lm.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "lm/models", label_format="lm-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "lm/models", label_format="lm-{label}", + ) elif ds_name == "tless.cad.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "tless/models_cad", label_format="tless-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "tless/models_cad", label_format="tless-{label}", + ) elif ds_name == "ycbv.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "ycbv/models", label_format="ycbv-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "ycbv/models", label_format="ycbv-{label}", + ) elif ds_name == "tudl.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "tudl/models", label_format="tudl-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "tudl/models", label_format="tudl-{label}", + ) elif ds_name == "tyol.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "tyol/models", label_format="tyol-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "tyol/models", label_format="tyol-{label}", + ) elif ds_name == "ruapc.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "ruapc/models", label_format="ruapc-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "ruapc/models", label_format="ruapc-{label}", + ) elif ds_name == "hope.panda3d": - ds = BOPObjectDataset(BOP_PANDA3D_DS_DIR / "hope/models", label_format="hope-{label}") + ds = BOPObjectDataset( + BOP_PANDA3D_DS_DIR / "hope/models", label_format="hope-{label}", + ) # GSO elif ds_name == "gso.orig": @@ -296,7 +335,15 @@ def make_object_dataset(ds_name: str) -> RigidObjectDataset: n_objects = ( 30 if category - in {"bathtub", "bookshelf", "guitar", "range_hood", "sofa", "wardrobe", "tv_stand"} + in { + "bathtub", + "bookshelf", + "guitar", + "range_hood", + "sofa", + "wardrobe", + "tv_stand", + } else 50 ) ds = ModelNetObjectDataset( @@ -323,20 +370,18 @@ def make_object_dataset(ds_name: str) -> RigidObjectDataset: for filter_str in filters_list: if filter_str == "remove_modelnet": - keep_labels = set( - [ - obj.label + keep_labels = { + obj.label for obj in ds.objects if obj.category not in SHAPENET_MODELNET_CATEGORIES - ] - ) + } else: keep_labels = set( json.loads( (SHAPENET_DIR / "stats" / ("shapenet_" + filter_str)) .with_suffix(".json") - .read_text() - ) + .read_text(), + ), ) ds = ds.filter_objects(keep_labels) @@ -357,8 +402,8 @@ def make_object_dataset(ds_name: str) -> RigidObjectDataset: np_random = np.random.RandomState(0) keep_labels = set( np_random.choice( - [obj.label for obj in ds.objects], n_objects_, replace=False - ).tolist() + [obj.label for obj in ds.objects], n_objects_, replace=False, + ).tolist(), ) ds = ds.filter_objects(keep_labels) @@ -371,7 +416,9 @@ def make_urdf_dataset(ds_name: str) -> RigidObjectDataset: # BOP if ds_name == "tless.cad": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "tless.cad", mesh_units="mm", label_format="tless-{label}" + LOCAL_DATA_DIR / "urdfs" / "tless.cad", + mesh_units="mm", + label_format="tless-{label}", ) elif ds_name == "tless.reconst": ds = UrdfDataset( @@ -382,31 +429,41 @@ def make_urdf_dataset(ds_name: str) -> RigidObjectDataset: elif ds_name == "tless": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "tless.cad", mesh_units="mm", label_format="tless-{label}" + LOCAL_DATA_DIR / "urdfs" / "tless.cad", + mesh_units="mm", + label_format="tless-{label}", ) elif ds_name == "ycbv": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "ycbv", mesh_units="mm", label_format="ycbv-{label}" + LOCAL_DATA_DIR / "urdfs" / "ycbv", + mesh_units="mm", + label_format="ycbv-{label}", ) elif ds_name == "hb": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "hb", mesh_units="mm", label_format="hb-{label}" + LOCAL_DATA_DIR / "urdfs" / "hb", mesh_units="mm", label_format="hb-{label}", ) elif ds_name == "icbin": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "icbin", mesh_units="mm", label_format="icbin-{label}" + LOCAL_DATA_DIR / "urdfs" / "icbin", + mesh_units="mm", + label_format="icbin-{label}", ) elif ds_name == "itodd": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "itodd", mesh_units="mm", label_format="itodd-{label}" + LOCAL_DATA_DIR / "urdfs" / "itodd", + mesh_units="mm", + label_format="itodd-{label}", ) elif ds_name == "lm": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "lm", mesh_units="mm", label_format="lm-{label}" + LOCAL_DATA_DIR / "urdfs" / "lm", mesh_units="mm", label_format="lm-{label}", ) elif ds_name == "tudl": ds = UrdfDataset( - LOCAL_DATA_DIR / "urdfs" / "tudl", mesh_units="mm", label_format="tudl-{label}" + LOCAL_DATA_DIR / "urdfs" / "tudl", + mesh_units="mm", + label_format="tudl-{label}", ) else: @@ -415,7 +472,7 @@ def make_urdf_dataset(ds_name: str) -> RigidObjectDataset: def get_obj_ds_info(ds_name: str) -> Tuple[Optional[str], str]: - urdf_ds_name = None # Only used for bullet compatibility + urdf_ds_name = None # Only used for bullet compatibility if ds_name == "ycbv.bop19": ds_name = "ycbv" urdf_ds_name = "ycbv" @@ -437,7 +494,8 @@ def get_obj_ds_info(ds_name: str) -> Tuple[Optional[str], str]: category = ds_name.split(".")[1] obj_ds_name = f"modelnet.{category}.test.rescaled" else: - raise ValueError("Unknown dataset") + msg = "Unknown dataset" + raise ValueError(msg) return urdf_ds_name, obj_ds_name @@ -447,10 +505,12 @@ def get_object_label(ds_name, description): if ds_name == "ycbv": df = YCBV_OBJECT_NAMES else: - raise ValueError(f"Unknown dataset {ds_name}") + msg = f"Unknown dataset {ds_name}" + raise ValueError(msg) x = df[df.description == description] if len(x) == 0: - raise ValueError(f"Couldn't find object '{description}' in ds {ds_name}") + msg = f"Couldn't find object '{description}' in ds {ds_name}" + raise ValueError(msg) return x.iloc[0].label diff --git a/happypose/toolbox/datasets/deepim_modelnet.py b/happypose/toolbox/datasets/deepim_modelnet.py index 2988deb0..8b481a11 100644 --- a/happypose/toolbox/datasets/deepim_modelnet.py +++ b/happypose/toolbox/datasets/deepim_modelnet.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from pathlib import Path @@ -25,10 +23,6 @@ import torch from PIL import Image -# MegaPose -from happypose.toolbox.datasets.utils import make_detections_from_segmentation -from happypose.toolbox.lib3d.transform import Transform - # Local Folder from happypose.toolbox.datasets.scene_dataset import ( CameraData, @@ -38,6 +32,10 @@ SceneObservation, ) +# MegaPose +from happypose.toolbox.datasets.utils import make_detections_from_segmentation +from happypose.toolbox.lib3d.transform import Transform + def parse_pose(pose_str: str) -> np.ndarray: pose_str_split = pose_str.split("\n")[-3:] @@ -58,7 +56,6 @@ def __init__( n_images_per_object: int = 50, load_depth: bool = False, ): - self.test_template_im = ( modelnet_dir / "modelnet_render_v1/data/real/{category}/{split}/{obj_id}_{im_id:04d}-color.png" @@ -116,29 +113,36 @@ def __init__( load_depth=load_depth, ) - def _load_scene_observation(self, image_infos: ObservationInfos) -> SceneObservation: - infos_dict = dict( - category=self.category, - split=self.split, - obj_id=image_infos.scene_id, - im_id=image_infos.view_id, - ) + def _load_scene_observation( + self, image_infos: ObservationInfos, + ) -> SceneObservation: + infos_dict = { + "category": self.category, + "split": self.split, + "obj_id": image_infos.scene_id, + "im_id": image_infos.view_id, + } obj_label = image_infos.scene_id rgb = np.array(Image.open(str(self.test_template_im).format(**infos_dict))) if self.load_depth: - depth = np.array(Image.open(str(self.test_template_depth).format(**infos_dict))) + depth = np.array( + Image.open(str(self.test_template_depth).format(**infos_dict)), + ) depth = torch.as_tensor(depth) / self.depth_im_scale else: depth = None segmentation = np.array( - Image.open(str(self.test_template_label).format(**infos_dict)), dtype=np.int_ + Image.open(str(self.test_template_label).format(**infos_dict)), + dtype=np.int_, ) pose_str = Path(str(self.test_template_pose).format(**infos_dict)).read_text() pose = Transform(parse_pose(pose_str)) - init_pose_str = Path(str(self.init_template_pose).format(**infos_dict)).read_text() + init_pose_str = Path( + str(self.init_template_pose).format(**infos_dict), + ).read_text() init_pose = Transform(parse_pose(init_pose_str)) obj_label = self.label_format.format(label=obj_label) @@ -154,7 +158,7 @@ def _load_scene_observation(self, image_infos: ObservationInfos) -> SceneObserva visib_fract=1.0, unique_id=1, bbox_modal=dets[1], - ) + ), ] K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) diff --git a/happypose/toolbox/datasets/gso_dataset.py b/happypose/toolbox/datasets/gso_dataset.py index c5380f32..0d4e14a1 100644 --- a/happypose/toolbox/datasets/gso_dataset.py +++ b/happypose/toolbox/datasets/gso_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,9 +42,9 @@ def make_gso_infos(gso_dir: Path, model_name: str = "model.obj") -> List[str]: def load_object_infos(models_infos_path): - with open(models_infos_path, "r") as f: + with open(models_infos_path) as f: infos = json.load(f) - itos = dict() + itos = {} for info in infos: k = f"gso_{info['gso_id']}" itos[info["obj_id"]] = k diff --git a/happypose/toolbox/datasets/modelnet_object_dataset.py b/happypose/toolbox/datasets/modelnet_object_dataset.py index 157ab6fe..ef6519e7 100644 --- a/happypose/toolbox/datasets/modelnet_object_dataset.py +++ b/happypose/toolbox/datasets/modelnet_object_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,7 +30,6 @@ def __init__( rescaled: bool = True, n_objects: int = 30, ): - object_ids = ( Path(modelnet_dir / "model_set" / f"{category}_{split}.txt") .read_text() @@ -42,10 +40,16 @@ def __init__( for object_id in object_ids: if rescaled: mesh_path = ( - modelnet_dir / "ModelNet40" / category / split / f"{object_id}_rescaled.obj" + modelnet_dir + / "ModelNet40" + / category + / split + / f"{object_id}_rescaled.obj" ) else: - mesh_path = modelnet_dir / "ModelNet40" / category / split / f"{object_id}.obj" + mesh_path = ( + modelnet_dir / "ModelNet40" / category / split / f"{object_id}.obj" + ) obj = RigidObject( label=object_id, category=category, diff --git a/happypose/toolbox/datasets/object_dataset.py b/happypose/toolbox/datasets/object_dataset.py index 61bc48da..a1bb28a2 100644 --- a/happypose/toolbox/datasets/object_dataset.py +++ b/happypose/toolbox/datasets/object_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import itertools from pathlib import Path @@ -42,12 +40,12 @@ def __init__( mesh_units: str = "m", symmetries_discrete: List[DiscreteSymmetry] = [], symmetries_continuous: List[ContinuousSymmetry] = [], - ypr_offset_deg: Tuple[float, float, float] = (0., 0., 0.), + ypr_offset_deg: Tuple[float, float, float] = (0.0, 0.0, 0.0), scaling_factor: float = 1.0, scaling_factor_mesh_units_to_meters: Optional[float] = None, ): - """ - Args: + """Args: + ---- label (str): A unique label to identify an object. mesh_path (Path): Path to a mesh. Multiple object types are supported. Please refer to downstream usage of this class for the supported formats. @@ -87,14 +85,15 @@ def __init__( instead of the mesh_units argument. This is the scale that converts mesh units to meters. """ - self.label = label self.category = category self.mesh_path = mesh_path self.mesh_units = mesh_units if scaling_factor_mesh_units_to_meters is not None: - self.scaling_factor_mesh_units_to_meters = scaling_factor_mesh_units_to_meters + self.scaling_factor_mesh_units_to_meters = ( + scaling_factor_mesh_units_to_meters + ) else: self.scaling_factor_mesh_units_to_meters = { "m": 1.0, @@ -107,7 +106,9 @@ def __init__( if self._mesh_diameter is not None: self.mesh_diameter = mesh_diameter - self.diameter_meters = mesh_diameter * self.scaling_factor_mesh_units_to_meters + self.diameter_meters = ( + mesh_diameter * self.scaling_factor_mesh_units_to_meters + ) self.symmetries_discrete = symmetries_discrete self.symmetries_continuous = symmetries_continuous @@ -122,11 +123,11 @@ def scale(self) -> float: """Returns the scale factor that converts the mesh to desired units.""" return self.scaling_factor_mesh_units_to_meters * self.scaling_factor - def make_symmetry_poses( - self, n_symmetries_continuous: int = 64) -> np.ndarray: + def make_symmetry_poses(self, n_symmetries_continuous: int = 64) -> np.ndarray: """Generates the set of object symmetries. - Returns: + Returns + ------- (num_symmetries, 4, 4) array """ return make_symmetries_poses( @@ -145,7 +146,8 @@ def __init__( self.list_objects = objects self.label_to_objects = {obj.label: obj for obj in objects} if len(self.list_objects) != len(self.label_to_objects): - raise RuntimeError("There are objects with duplicate labels") + msg = "There are objects with duplicate labels" + raise RuntimeError(msg) def __getitem__(self, idx: int) -> RigidObject: return self.list_objects[idx] @@ -167,7 +169,7 @@ def filter_objects(self, keep_labels: Set[str]) -> "RigidObjectDataset": def append_dataset_name_to_object_labels( - ds_name: str, object_dataset: RigidObjectDataset + ds_name: str, object_dataset: RigidObjectDataset, ) -> RigidObjectDataset: for obj in object_dataset.list_objects: obj.label = f"ds_name={ds_name}_{obj.label}" diff --git a/happypose/toolbox/datasets/pickle_dataset.py b/happypose/toolbox/datasets/pickle_dataset.py index c8ca47b9..50e6acfb 100644 --- a/happypose/toolbox/datasets/pickle_dataset.py +++ b/happypose/toolbox/datasets/pickle_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -45,8 +44,13 @@ def __getitem__(self, idx): data["scene_id"] = 0 data["im_idx"] = idx mask = None - infos = dict() + infos = {} infos["camera"] = {"TWC": data["world_t_camera"], "K": data["intrinsics"]} - infos["frame_info"] = {"scene_id": 0, "view_id": idx, "cam_name": "cam", "cam_id": "cam"} + infos["frame_info"] = { + "scene_id": 0, + "view_id": idx, + "cam_name": "cam", + "cam_id": "cam", + } scene_data = SceneData(data["rgb"], data["depth"], mask, infos) return scene_data diff --git a/happypose/toolbox/datasets/pose_dataset.py b/happypose/toolbox/datasets/pose_dataset.py index f034e0d0..d353e762 100644 --- a/happypose/toolbox/datasets/pose_dataset.py +++ b/happypose/toolbox/datasets/pose_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import random import time @@ -26,14 +24,6 @@ import numpy as np import torch -# HappyPose -from happypose.toolbox.datasets.scene_dataset import ( - IterableSceneDataset, - ObjectData, - SceneDataset, - SceneObservation, -) - # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import LOCAL_DATA_DIR from happypose.toolbox.datasets.augmentations import ( @@ -52,20 +42,29 @@ PillowContrast, PillowSharpness, ) -from happypose.toolbox.datasets.augmentations import SceneObservationAugmentation as SceneObsAug +from happypose.toolbox.datasets.augmentations import ( + SceneObservationAugmentation as SceneObsAug, +) from happypose.toolbox.datasets.augmentations import VOCBackgroundAugmentation + +# HappyPose +from happypose.toolbox.datasets.scene_dataset import ( + IterableSceneDataset, + ObjectData, + SceneDataset, + SceneObservation, +) from happypose.toolbox.datasets.scene_dataset_wrappers import remove_invisible_objects from happypose.toolbox.utils.types import Resolution @dataclass class PoseData: - """ - rgb: (h, w, 3) uint8 + """rgb: (h, w, 3) uint8 depth: (bsz, h, w) float32 bbox: (4, ) int K: (3, 3) float32 - TCO: (4, 4) float32 + TCO: (4, 4) float32. """ rgb: np.ndarray @@ -78,12 +77,11 @@ class PoseData: @dataclass class BatchPoseData: - """ - rgbs: (bsz, 3, h, w) uint8 + """rgbs: (bsz, 3, h, w) uint8 depths: (bsz, h, w) float32 bboxes: (bsz, 4) int TCO: (bsz, 4, 4) float32 - K: (bsz, 3, 3) float32 + K: (bsz, 3, 3) float32. """ rgbs: torch.Tensor @@ -123,7 +121,6 @@ def __init__( keep_labels_set: Optional[Set[str]] = None, depth_augmentation_level: int = 1, ): - self.scene_ds = scene_ds self.resize_transform = CropResizeToAspectTransform(resize=resize) self.min_area = min_area @@ -131,7 +128,11 @@ def __init__( self.background_augmentations = [] if apply_background_augmentation: self.background_augmentations += [ - (SceneObsAug(VOCBackgroundAugmentation(LOCAL_DATA_DIR / "VOC2012"), p=0.3)) + ( + SceneObsAug( + VOCBackgroundAugmentation(LOCAL_DATA_DIR / "VOC2012"), p=0.3, + ) + ), ] self.rgb_augmentations = [] @@ -140,13 +141,17 @@ def __init__( SceneObsAug( [ SceneObsAug(PillowBlur(factor_interval=(1, 3)), p=0.4), - SceneObsAug(PillowSharpness(factor_interval=(0.0, 50.0)), p=0.3), + SceneObsAug( + PillowSharpness(factor_interval=(0.0, 50.0)), p=0.3, + ), SceneObsAug(PillowContrast(factor_interval=(0.2, 50.0)), p=0.3), - SceneObsAug(PillowBrightness(factor_interval=(0.1, 6.0)), p=0.5), + SceneObsAug( + PillowBrightness(factor_interval=(0.1, 6.0)), p=0.5, + ), SceneObsAug(PillowColor(factor_interval=(0.0, 20.0)), p=0.3), ], p=0.8, - ) + ), ] self.depth_augmentations = [] @@ -167,7 +172,9 @@ def __init__( SceneObsAug(DepthBlurTransform(), p=0.3), SceneObsAug( DepthCorrelatedGaussianNoiseTransform( - gp_rescale_factor_min=15.0, gp_rescale_factor_max=40.0, std_dev=0.01 + gp_rescale_factor_min=15.0, + gp_rescale_factor_max=40.0, + std_dev=0.01, ), p=0.3, ), @@ -194,13 +201,20 @@ def __init__( # Set the depth image to zero occasionally. if depth_augmentation_level == 2: - self.depth_augmentations.append(SceneObsAug(DepthDropoutTransform(), p=0.3)) self.depth_augmentations.append( - SceneObsAug(DepthBackgroundDropoutTransform(), p=0.2) + SceneObsAug(DepthDropoutTransform(), p=0.3), + ) + self.depth_augmentations.append( + SceneObsAug(DepthBackgroundDropoutTransform(), p=0.2), ) - self.depth_augmentations = [SceneObsAug(self.depth_augmentations, p=0.8)] + self.depth_augmentations = [ + SceneObsAug(self.depth_augmentations, p=0.8), + ] else: - raise ValueError(f"Unknown depth augmentation type {depth_augmentation_level}") + msg = f"Unknown depth augmentation type {depth_augmentation_level}" + raise ValueError( + msg, + ) self.return_first_object = return_first_object @@ -210,7 +224,9 @@ def __init__( def collate_fn(self, list_data: List[PoseData]) -> BatchPoseData: batch_data = BatchPoseData( - rgbs=torch.from_numpy(np.stack([d.rgb for d in list_data])).permute(0, 3, 1, 2), + rgbs=torch.from_numpy(np.stack([d.rgb for d in list_data])).permute( + 0, 3, 1, 2, + ), bboxes=torch.from_numpy(np.stack([d.bbox for d in list_data])), K=torch.from_numpy(np.stack([d.K for d in list_data])), TCO=torch.from_numpy(np.stack([d.TCO for d in list_data])), @@ -229,11 +245,10 @@ def make_data_from_obs(self, obs: SceneObservation) -> Union[PoseData, None]: 2. if `keep_objects_set` isn't None, the object must belong to this set If there are no objects that satisfy this condition in the observation, returns None. """ - obs = remove_invisible_objects(obs) start = time.time() - timings = dict() + timings = {} s = time.time() obs = self.resize_transform(obs) @@ -326,7 +341,8 @@ def find_valid_data(self, iterator: Iterator[SceneObservation]) -> PoseData: return data n_attempts += 1 if n_attempts > 200: - raise ValueError("Cannot find valid image in the dataset") + msg = "Cannot find valid image in the dataset" + raise ValueError(msg) def __iter__(self) -> Iterator[PoseData]: assert isinstance(self.scene_ds, IterableSceneDataset) diff --git a/happypose/toolbox/datasets/samplers.py b/happypose/toolbox/datasets/samplers.py index 4ea6449d..7228ff73 100644 --- a/happypose/toolbox/datasets/samplers.py +++ b/happypose/toolbox/datasets/samplers.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import numpy as np import torch @@ -71,7 +69,9 @@ def __init__(self, ds, num_replicas, rank, epoch_size, seed=0, shuffle=True): # NOTE: Epoch size is local. total_epoch_size = epoch_size * num_replicas n_repeats = 1 + total_epoch_size // len(ds) - self.all_indices = np.concatenate([np.arange(len(ds)) for _ in range(n_repeats)]) + self.all_indices = np.concatenate( + [np.arange(len(ds)) for _ in range(n_repeats)], + ) assert len(self.all_indices) >= total_epoch_size self.total_epoch_size = total_epoch_size self.seed = seed @@ -86,6 +86,10 @@ def __len__(self): def __iter__(self): self.epoch += 1 with temp_numpy_seed(self.epoch + self.seed): - indices_shuffled = np.random.permutation(self.all_indices)[: self.total_epoch_size] - local_indices = np.array_split(indices_shuffled, self.num_replicas)[self.rank] + indices_shuffled = np.random.permutation(self.all_indices)[ + : self.total_epoch_size + ] + local_indices = np.array_split(indices_shuffled, self.num_replicas)[ + self.rank + ] return iter(local_indices) diff --git a/happypose/toolbox/datasets/scene_dataset.py b/happypose/toolbox/datasets/scene_dataset.py index 3da06abb..1f0ad2ef 100644 --- a/happypose/toolbox/datasets/scene_dataset.py +++ b/happypose/toolbox/datasets/scene_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +23,7 @@ import random import time from dataclasses import dataclass -from typing import Any, Dict, Iterator, List, Optional, Union +from typing import Any, Dict, Iterator, List, Union # Third Party import numpy as np @@ -72,18 +71,16 @@ def transform_to_list(T: Transform) -> ListPose: class ObjectData: # NOTE (Yann): bbox_amodal, bbox_modal, visib_fract should be moved to SceneObservation label: str - TWO: Optional[Transform] = None - unique_id: Optional[int] = None - bbox_amodal: Optional[np.ndarray] = None # (4, ) array [xmin, ymin, xmax, ymax] - bbox_modal: Optional[np.ndarray] = None # (4, ) array [xmin, ymin, xmax, ymax] - visib_fract: Optional[float] = None - TWO_init: Optional[ - Transform - ] = None # Some pose estimation datasets (ModelNet) provide an initial pose estimate + TWO: Transform | None = None + unique_id: int | None = None + bbox_amodal: np.ndarray | None = None # (4, ) array [xmin, ymin, xmax, ymax] + bbox_modal: np.ndarray | None = None # (4, ) array [xmin, ymin, xmax, ymax] + visib_fract: float | None = None + TWO_init: Transform | None = None # Some pose estimation datasets (ModelNet) provide an initial pose estimate # NOTE: This should be loaded externally - def to_json(self) -> Dict[str, SingleDataJsonType]: - d: Dict[str, SingleDataJsonType] = dict(label=self.label) + def to_json(self) -> dict[str, SingleDataJsonType]: + d: dict[str, SingleDataJsonType] = {"label": self.label} for k in ("TWO", "TWO_init"): if getattr(self, k) is not None: d[k] = transform_to_list(getattr(self, k)) @@ -96,7 +93,7 @@ def to_json(self) -> Dict[str, SingleDataJsonType]: return d @staticmethod - def from_json(d: DataJsonType) -> "ObjectData": + def from_json(d: DataJsonType) -> ObjectData: assert isinstance(d, dict) label = d["label"] assert isinstance(label, str) @@ -122,17 +119,15 @@ def from_json(d: DataJsonType) -> "ObjectData": @dataclass class CameraData: - K: Optional[np.ndarray] = None - resolution: Optional[Resolution] = None - TWC: Optional[Transform] = None - camera_id: Optional[str] = None - TWC_init: Optional[ - Transform - ] = None # Some pose estimation datasets (ModelNet) provide an initial pose estimate + K: np.ndarray | None = None + resolution: Resolution | None = None + TWC: Transform | None = None + camera_id: str | None = None + TWC_init: Transform | None = None # Some pose estimation datasets (ModelNet) provide an initial pose estimate # NOTE: This should be loaded externally def to_json(self) -> str: - d: Dict[str, SingleDataJsonType] = dict() + d: dict[str, SingleDataJsonType] = {} for k in ("TWC", "TWC_init"): if getattr(self, k) is not None: d[k] = transform_to_list(getattr(self, k)) @@ -145,7 +140,7 @@ def to_json(self) -> str: return json.dumps(d) @staticmethod - def from_json(data_str: str) -> "CameraData": + def from_json(data_str: str) -> CameraData: d: DataJsonType = json.loads(data_str) assert isinstance(d, dict) data = CameraData() @@ -183,7 +178,7 @@ def to_json(self) -> str: return json.dumps(self.__dict__) @staticmethod - def from_json(data_str: str) -> "ObservationInfos": + def from_json(data_str: str) -> ObservationInfos: d = json.loads(data_str) assert "scene_id" in d assert "view_id" in d @@ -192,27 +187,27 @@ def from_json(data_str: str) -> "ObservationInfos": @dataclass class SceneObservation: - rgb: Optional[np.ndarray] = None # (h,w,3) uint8 numpy array - depth: Optional[np.ndarray] = None # (h, w), np.float32 - segmentation: Optional[np.ndarray] = None # (h, w), np.uint32 (important); + rgb: np.ndarray | None = None # (h,w,3) uint8 numpy array + depth: np.ndarray | None = None # (h, w), np.float32 + segmentation: np.ndarray | None = None # (h, w), np.uint32 (important); # contains objects unique ids. int64 are not handled and can be dangerous when used with PIL - infos: Optional[ObservationInfos] = None - object_datas: Optional[List[ObjectData]] = None - camera_data: Optional[CameraData] = None - binary_masks: Optional[ - Dict[int, np.ndarray] - ] = None # dict mapping unique id to (h, w) np.bool_ + infos: ObservationInfos | None = None + object_datas: list[ObjectData] | None = None + camera_data: CameraData | None = None + binary_masks: dict[int, np.ndarray] | None = None # dict mapping unique id to (h, w) np.bool_ @staticmethod def collate_fn( - batch: List[SceneObservation], object_labels: Optional[List[str]] = None - ) -> Dict[Any, Any]: + batch: list[SceneObservation], object_labels: list[str] | None = None, + ) -> dict[Any, Any]: """Collate a batch of SceneObservation objects. Args: + ---- object_labels: If passed in parse only those object labels. Returns: + ------- A dict with fields cameras: PandasTensorCollection rgb: torch.tensor [B,3,H,W] torch.uint8 @@ -235,21 +230,21 @@ def collate_fn( rgb_images = [] depth_images = [] - for n, data in enumerate(batch): + for _n, data in enumerate(batch): # data is of type SceneObservation batch_im_id += 1 - im_info = dict( - scene_id=data.infos.scene_id, - view_id=data.infos.view_id, - batch_im_id=batch_im_id, - ) + im_info = { + "scene_id": data.infos.scene_id, + "view_id": data.infos.view_id, + "batch_im_id": batch_im_id, + } im_infos.append(im_info) K.append(data.camera_data.K) - cam_info = dict( - TWC=data.camera_data.TWC, - resolution=data.camera_data.resolution, - ) + cam_info = { + "TWC": data.camera_data.TWC, + "resolution": data.camera_data.resolution, + } cam_infos.append(cam_info) # [3,H,W] @@ -288,19 +283,19 @@ def collate_fn( infos=pd.DataFrame(cam_infos), K=torch.as_tensor(np.stack(K)), ) - return dict( - cameras=cameras, - rgb=torch.stack(rgb_images), # [B,3,H,W] - depth=torch.as_tensor(np.stack(depth_images)), # [B,1,H,W] or [B,0] - im_infos=im_infos, - gt_detections=gt_detections, - gt_data=gt_data, - initial_data=initial_data, - ) + return { + "cameras": cameras, + "rgb": torch.stack(rgb_images), # [B,3,H,W] + "depth": torch.as_tensor(np.stack(depth_images)), # [B,1,H,W] or [B,0] + "im_infos": im_infos, + "gt_detections": gt_detections, + "gt_data": gt_data, + "initial_data": initial_data, + } def as_pandas_tensor_collection( self, - object_labels: Optional[List[str]] = None, + object_labels: list[str] | None = None, ) -> SceneObservationTensorCollection: """Convert SceneData to a PandasTensorCollection representation.""" obs = self @@ -319,15 +314,15 @@ def as_pandas_tensor_collection( if obs.camera_data.TWC_init is not None: TWC_init = torch.as_tensor(obs.camera_data.TWC_init.matrix).float() - for n, obj_data in enumerate(obs.object_datas): + for _n, obj_data in enumerate(obs.object_datas): if object_labels is not None and obj_data.label not in object_labels: continue - info = dict( - label=obj_data.label, - scene_id=obs.infos.scene_id, - view_id=obs.infos.view_id, - visib_fract=getattr(obj_data, "visib_fract", 1), - ) + info = { + "label": obj_data.label, + "scene_id": obs.infos.scene_id, + "view_id": obs.infos.view_id, + "visib_fract": getattr(obj_data, "visib_fract", 1), + } infos.append(info) TWO.append(torch.tensor(obj_data.TWO.matrix).float()) bboxes.append(torch.tensor(obj_data.bbox_modal).float()) @@ -384,7 +379,7 @@ def as_pandas_tensor_collection( class SceneDataset(torch.utils.data.Dataset): def __init__( self, - frame_index: Optional[pd.DataFrame], + frame_index: pd.DataFrame | None, load_depth: bool = False, load_segmentation: bool = True, ): @@ -392,18 +387,20 @@ def __init__( Can be an IterableDataset or a map-style Dataset. Args: + ---- frame_index (pd.DataFrame): Must contain the following columns: scene_id, view_id load_depth (bool, optional): Whether to load depth images. Defaults to False. load_segmentation (bool, optional): Whether to load image segmentation. Defaults to True. Defaults to f'{label}'. """ - self.frame_index = frame_index self.load_depth = load_depth self.load_segmentation = load_segmentation - def _load_scene_observation(self, image_infos: ObservationInfos) -> SceneObservation: + def _load_scene_observation( + self, image_infos: ObservationInfos, + ) -> SceneObservation: raise NotImplementedError def __getitem__(self, idx: int) -> SceneObservation: @@ -461,7 +458,7 @@ def __iter__(self) -> Iterator[SceneObservation]: class IterableMultiSceneDataset(IterableSceneDataset): def __init__( self, - list_iterable_scene_ds: List[IterableSceneDataset], + list_iterable_scene_ds: list[IterableSceneDataset], deterministic: bool = False, ): self.list_iterable_scene_ds = list_iterable_scene_ds @@ -483,4 +480,3 @@ def __iter__(self) -> Iterator[SceneObservation]: while True: idx = self.rng.randint(0, len(self.iterators) - 1) yield next(self.iterators[idx]) - diff --git a/happypose/toolbox/datasets/scene_dataset_wrappers.py b/happypose/toolbox/datasets/scene_dataset_wrappers.py index ecc55e23..1352d70e 100644 --- a/happypose/toolbox/datasets/scene_dataset_wrappers.py +++ b/happypose/toolbox/datasets/scene_dataset_wrappers.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/happypose/toolbox/datasets/shapenet_object_dataset.py b/happypose/toolbox/datasets/shapenet_object_dataset.py index 787d02d4..82d9cf6a 100644 --- a/happypose/toolbox/datasets/shapenet_object_dataset.py +++ b/happypose/toolbox/datasets/shapenet_object_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,9 +42,9 @@ def __init__(self, synset_id, source_id): def load_object_infos(models_infos_path): - with open(models_infos_path, "r") as f: + with open(models_infos_path) as f: infos = json.load(f) - itos = dict() + itos = {} for info in infos: k = f"shapenet_{info['shapenet_synset_id']}_{info['shapenet_source_id']}" itos[info["obj_id"]] = k @@ -59,7 +58,7 @@ def make_shapenet_infos(shapenet_dir, model_name): taxonomy_path = shapenet_dir / "taxonomy.json" taxonomy = json.loads(taxonomy_path.read_text()) - synset_id_to_synset = dict() + synset_id_to_synset = {} def get_synset(synset_id): if synset_id not in synset_id_to_synset: @@ -120,7 +119,8 @@ def __init__( model_name = "model_normalized_pointcloud.obj" ypr_offset_deg = (0.0, 0.0, 0.0) else: - raise ValueError("split") + msg = "split" + raise ValueError(msg) synsets = make_shapenet_infos(self.shapenet_dir, model_name) main_synsets = [ @@ -131,7 +131,6 @@ def __init__( objects = [] for synset in main_synsets: - for source_id in synset.models_descendants: model_path = ( self.shapenet_dir diff --git a/happypose/toolbox/datasets/urdf_dataset.py b/happypose/toolbox/datasets/urdf_dataset.py index 80534f3c..c5f3a2c8 100644 --- a/happypose/toolbox/datasets/urdf_dataset.py +++ b/happypose/toolbox/datasets/urdf_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from pathlib import Path @@ -24,7 +22,9 @@ class UrdfDataset(RigidObjectDataset): - def __init__(self, ds_dir: Path, mesh_units: str = "m", label_format: str = "{label}"): + def __init__( + self, ds_dir: Path, mesh_units: str = "m", label_format: str = "{label}", + ): objects = [] for urdf_dir in ds_dir.iterdir(): urdf_paths = list(urdf_dir.glob("*.urdf")) @@ -33,6 +33,6 @@ def __init__(self, ds_dir: Path, mesh_units: str = "m", label_format: str = "{la label = urdf_dir.name label = label_format.format(label=label) objects.append( - RigidObject(label=label, mesh_path=urdf_path, mesh_units=mesh_units) + RigidObject(label=label, mesh_path=urdf_path, mesh_units=mesh_units), ) super().__init__(objects) diff --git a/happypose/toolbox/datasets/utils.py b/happypose/toolbox/datasets/utils.py index 418d854a..19802615 100644 --- a/happypose/toolbox/datasets/utils.py +++ b/happypose/toolbox/datasets/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from typing import Dict, List @@ -26,17 +24,19 @@ def make_detections_from_segmentation( segmentations: np.ndarray, ) -> List[Dict[int, np.ndarray]]: - """ - segmentations: (n, h, w) int np.ndarray - """ + """segmentations: (n, h, w) int np.ndarray.""" assert segmentations.ndim == 3 detections = [] for segmentation_n in segmentations: - dets_n = dict() + dets_n = {} for unique_id in np.unique(segmentation_n): ids = np.where(segmentation_n == unique_id) - x1, y1, x2, y2 = np.min(ids[1]), np.min(ids[0]), np.max(ids[1]), np.max(ids[0]) + x1, y1, x2, y2 = ( + np.min(ids[1]), + np.min(ids[0]), + np.max(ids[1]), + np.max(ids[0]), + ) dets_n[int(unique_id)] = np.array([x1, y1, x2, y2]) detections.append(dets_n) return detections - diff --git a/happypose/toolbox/datasets/web_scene_dataset.py b/happypose/toolbox/datasets/web_scene_dataset.py index efea72b7..a2574b85 100644 --- a/happypose/toolbox/datasets/web_scene_dataset.py +++ b/happypose/toolbox/datasets/web_scene_dataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +17,6 @@ # Standard Library import io import json -import tarfile from collections import defaultdict from functools import partial from hashlib import sha1 @@ -65,13 +63,12 @@ def write_scene_ds_as_wds( frame_ids: Optional[List[int]] = None, depth_scale: int = 1000, ) -> None: - assert scene_ds.frame_index is not None wds_dir.mkdir(exist_ok=True, parents=True) frame_index = scene_ds.frame_index.copy() shard_writer = wds.ShardWriter( - str(wds_dir / shard_format), maxcount=maxcount, start_shard=0 + str(wds_dir / shard_format), maxcount=maxcount, start_shard=0, ) sampler = None @@ -96,7 +93,7 @@ def write_scene_ds_as_wds( if keep_labels_set is not None: assert obs.object_datas is not None - object_labels = set([obj.label for obj in obs.object_datas]) + object_labels = {obj.label for obj in obs.object_datas} n_objects_valid = len(object_labels.intersection(keep_labels_set)) if n_objects_valid == 0: continue @@ -127,9 +124,9 @@ def write_scene_ds_as_wds( frame_index = frame_index.loc[:, ["scene_id", "view_id", "key", "shard_fname"]] shard_writer.close() frame_index.to_feather(wds_dir / "frame_index.feather") - ds_infos = dict( - depth_scale=depth_scale, - ) + ds_infos = { + "depth_scale": depth_scale, + } (wds_dir / "infos.json").write_text(json.dumps(ds_infos)) return @@ -140,7 +137,6 @@ def load_scene_ds_obs( load_depth: bool = False, label_format: str = "{label}", ) -> SceneObservation: - assert isinstance(sample["rgb.png"], bytes) assert isinstance(sample["segmentation.png"], bytes) assert isinstance(sample["depth.png"], bytes) diff --git a/happypose/toolbox/inference/detector.py b/happypose/toolbox/inference/detector.py index b9587d15..a158d080 100644 --- a/happypose/toolbox/inference/detector.py +++ b/happypose/toolbox/inference/detector.py @@ -1,4 +1,3 @@ - # Standard Library from abc import ABCMeta, abstractmethod @@ -10,9 +9,6 @@ class DetectorModule(torch.nn.Module, metaclass=ABCMeta): - @abstractmethod - def get_detections( - self - ) -> DetectionsType: + def get_detections(self) -> DetectionsType: pass diff --git a/happypose/toolbox/inference/pose_estimator.py b/happypose/toolbox/inference/pose_estimator.py index 0792b7b4..7d903776 100644 --- a/happypose/toolbox/inference/pose_estimator.py +++ b/happypose/toolbox/inference/pose_estimator.py @@ -10,21 +10,14 @@ class PoseEstimationModule(torch.nn.Module, metaclass=ABCMeta): - @abstractmethod - def forward_coarse_model( - self - ) -> Tuple[PoseEstimatesType, dict]: + def forward_coarse_model(self) -> Tuple[PoseEstimatesType, dict]: pass @abstractmethod - def forward_refiner( - self - ) -> Tuple[dict, dict]: + def forward_refiner(self) -> Tuple[dict, dict]: pass @abstractmethod - def run_inference_pipeline( - self - ) -> Tuple[PoseEstimatesType, dict]: + def run_inference_pipeline(self) -> Tuple[PoseEstimatesType, dict]: pass diff --git a/happypose/toolbox/inference/types.py b/happypose/toolbox/inference/types.py index 448ee63a..1781ff67 100644 --- a/happypose/toolbox/inference/types.py +++ b/happypose/toolbox/inference/types.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,7 +18,6 @@ # Standard Library from dataclasses import dataclass -from typing import Optional, Tuple # Third Party import numpy as np @@ -95,23 +93,21 @@ class InferenceConfig: n_refiner_iterations: int = 5 n_pose_hypotheses: int = 5 run_depth_refiner: bool = False - depth_refiner: Optional[str] = None # ['icp', 'teaserpp'] + depth_refiner: str | None = None # ['icp', 'teaserpp'] bsz_objects: int = 16 # How many parallel refiners to run bsz_images: int = 576 # How many images to push through coarse model @dataclass class ObservationTensor: - """ - - images: [B,C,H,W] with C=3 (rgb) or C=4 (rgbd). RGB dimensions should already - be normalized to be in [0,1] by diving the uint8 values by 255 + """images: [B,C,H,W] with C=3 (rgb) or C=4 (rgbd). RGB dimensions should already + be normalized to be in [0,1] by diving the uint8 values by 255. K: [B,3,3] camera intrinsics """ images: torch.Tensor # [B,C,H,W] - K: Optional[torch.Tensor] = None # [B,3,3] + K: torch.Tensor | None = None # [B,3,3] def cuda(self) -> ObservationTensor: self.images = self.images.cuda() @@ -122,14 +118,14 @@ def cuda(self) -> ObservationTensor: @property def batch_size(self) -> int: """Returns the batch size.""" - return self.images.shape[0] @property def depth(self) -> torch.tensor: """Returns depth tensor. - Returns: + Returns + ------- torch.tensor with shape [B,H,W] """ assert self.channel_dim == 4 @@ -141,7 +137,6 @@ def channel_dim(self) -> int: return self.images.shape[1] def is_valid(self) -> bool: - if not self.images.ndim == 4: return False @@ -169,18 +164,18 @@ def is_valid(self) -> bool: @staticmethod def from_numpy( rgb: np.ndarray, - depth: Optional[np.ndarray] = None, - K: Optional[np.ndarray] = None, + depth: np.ndarray | None = None, + K: np.ndarray | None = None, ) -> ObservationTensor: """Create an ObservationData type from numpy data. Args: + ---- rgb: [H,W,3] np.uint8 depth: [H,W] np.float K: [3,3] np.float """ - assert rgb.dtype == np.uint8 rgb_tensor = torch.as_tensor(rgb).float() / 255 @@ -201,17 +196,15 @@ def from_numpy( @staticmethod def from_torch_batched( - rgb: torch.Tensor, depth: torch.Tensor, K: torch.Tensor + rgb: torch.Tensor, depth: torch.Tensor, K: torch.Tensor, ) -> ObservationTensor: - """ - - Args: + """Args: + ---- rgb: [B,3,H,W] torch.uint8 depth: [B,1,H,W] torch.float - K: [B,3,3] torch.float + K: [B,3,3] torch.float. """ - assert rgb.dtype == torch.uint8 # [B,3,H,W] @@ -221,7 +214,6 @@ def from_torch_batched( # [C,H,W] if depth is not None: - if depth.ndim == 3: depth.unsqueeze(1) diff --git a/happypose/toolbox/inference/utils.py b/happypose/toolbox/inference/utils.py index 08084143..bde10134 100644 --- a/happypose/toolbox/inference/utils.py +++ b/happypose/toolbox/inference/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +16,7 @@ # Standard Library from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union # Third Party import numpy as np @@ -26,36 +25,44 @@ import yaml from omegaconf import OmegaConf -# HappyPose -from happypose.toolbox.datasets.object_dataset import RigidObject, RigidObjectDataset -from happypose.toolbox.datasets.scene_dataset import CameraData, ObjectData - # MegaPose import happypose.pose_estimators.megapose.src.megapose import happypose.toolbox.utils.tensor_collection as tc from happypose.pose_estimators.megapose.src.megapose.config import EXP_DIR -from happypose.toolbox.datasets.datasets_cfg import make_object_dataset from happypose.pose_estimators.megapose.src.megapose.inference.detector import Detector -from happypose.toolbox.inference.types import DetectionsType, PoseEstimatesType -from happypose.toolbox.lib3d.rigid_mesh_database import MeshDataBase -from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import PosePredictor -from happypose.toolbox.renderer.panda3d_batch_renderer import Panda3dBatchRenderer +from happypose.pose_estimators.megapose.src.megapose.models.pose_rigid import ( + PosePredictor, +) from happypose.pose_estimators.megapose.src.megapose.training.detector_models_cfg import ( check_update_config as check_update_config_detector, ) -from happypose.pose_estimators.megapose.src.megapose.training.detector_models_cfg import create_model_detector +from happypose.pose_estimators.megapose.src.megapose.training.detector_models_cfg import ( + create_model_detector, +) from happypose.pose_estimators.megapose.src.megapose.training.pose_models_cfg import ( check_update_config as check_update_config_pose, ) -from happypose.pose_estimators.megapose.src.megapose.training.pose_models_cfg import create_model_pose -from happypose.pose_estimators.megapose.src.megapose.training.training_config import TrainingConfig +from happypose.pose_estimators.megapose.src.megapose.training.pose_models_cfg import ( + create_model_pose, +) +from happypose.pose_estimators.megapose.src.megapose.training.training_config import ( + TrainingConfig, +) + +# HappyPose +from happypose.toolbox.datasets.object_dataset import RigidObjectDataset +from happypose.toolbox.datasets.scene_dataset import CameraData, ObjectData +from happypose.toolbox.inference.types import DetectionsType, PoseEstimatesType +from happypose.toolbox.lib3d.rigid_mesh_database import MeshDataBase +from happypose.toolbox.renderer.panda3d_batch_renderer import Panda3dBatchRenderer from happypose.toolbox.utils.logging import get_logger from happypose.toolbox.utils.models_compat import change_keys_of_older_models from happypose.toolbox.utils.tensor_collection import PandasTensorCollection logger = get_logger(__name__) -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + def load_detector(run_id: str) -> torch.nn.Module: run_dir = EXP_DIR / run_id @@ -63,7 +70,7 @@ def load_detector(run_id: str) -> torch.nn.Module: cfg = check_update_config_detector(cfg) label_to_category_id = cfg.label_to_category_id model = create_model_detector(cfg, len(label_to_category_id)) - ckpt = torch.load(run_dir / "checkpoint.pth.tar", map_location=torch.device('cpu')) + ckpt = torch.load(run_dir / "checkpoint.pth.tar", map_location=torch.device("cpu")) ckpt = ckpt["state_dict"] model.load_state_dict(ckpt) model = model.to(device).eval() @@ -87,8 +94,11 @@ def load_pose_models( force_panda3d_renderer: bool = False, renderer_kwargs: Optional[dict] = None, models_root: Path = EXP_DIR, -) -> Tuple[torch.nn.Module, torch.nn.Module, happypose.toolbox.lib3d.rigid_mesh_database.BatchedMeshes]: - +) -> Tuple[ + torch.nn.Module, + torch.nn.Module, + happypose.toolbox.lib3d.rigid_mesh_database.BatchedMeshes, +]: coarse_run_dir = models_root / coarse_run_id coarse_cfg: TrainingConfig = load_cfg(coarse_run_dir / "config.yaml") coarse_cfg = check_update_config_pose(coarse_cfg) @@ -107,7 +117,7 @@ def load_pose_models( def make_renderer(renderer_type: str) -> Panda3dBatchRenderer: logger.debug("renderer_kwargs", renderer_kwargs) if renderer_kwargs is None: - renderer_kwargs_ = dict() + renderer_kwargs_ = {} else: renderer_kwargs_ = renderer_kwargs @@ -116,7 +126,9 @@ def make_renderer(renderer_type: str) -> Panda3dBatchRenderer: renderer_kwargs_.setdefault("n_workers", 4) if renderer_type == "panda3d" or force_panda3d_renderer: - renderer = Panda3dBatchRenderer(object_dataset=object_dataset, **renderer_kwargs_) + renderer = Panda3dBatchRenderer( + object_dataset=object_dataset, **renderer_kwargs_, + ) else: raise ValueError(renderer_type) return renderer @@ -131,12 +143,14 @@ def make_renderer(renderer_type: str) -> Panda3dBatchRenderer: def load_model(run_id: str, renderer: Panda3dBatchRenderer) -> PosePredictor: if run_id is None: - return + return None run_dir = models_root / run_id cfg: TrainingConfig = load_cfg(run_dir / "config.yaml") cfg = check_update_config_pose(cfg) model = create_model_pose(cfg, renderer=renderer, mesh_db=mesh_db_batched) - ckpt = torch.load(run_dir / "checkpoint.pth.tar", map_location=torch.device('cpu')) + ckpt = torch.load( + run_dir / "checkpoint.pth.tar", map_location=torch.device("cpu"), + ) ckpt = ckpt["state_dict"] ckpt = change_keys_of_older_models(ckpt) model.load_state_dict(ckpt) @@ -152,7 +166,7 @@ def load_model(run_id: str, renderer: Panda3dBatchRenderer) -> PosePredictor: def add_instance_id( - inputs: Union[PoseEstimatesType, DetectionsType] + inputs: Union[PoseEstimatesType, DetectionsType], ) -> Union[PoseEstimatesType, DetectionsType]: """Adds a column with instance_id to the provided detections. @@ -168,7 +182,7 @@ def create_instance_id(df: pd.DataFrame) -> pd.DataFrame: df = inputs.infos df = df.groupby(["batch_im_id", "label"], group_keys=False).apply( - lambda df: create_instance_id(df) + lambda df: create_instance_id(df), ) inputs.infos = df return inputs @@ -180,7 +194,6 @@ def filter_detections( one_instance_per_class: bool = False, ) -> DetectionsType: """Filter detections based on kwargs.""" - if labels is not None: df = detections.infos df = df[df.label.isin(labels)] @@ -200,7 +213,8 @@ def filter_detections( def make_cameras(camera_data: List[CameraData]) -> PandasTensorCollection: """Creates a PandasTensorCollection from list of camera data. - Returns: + Returns + ------- PandasTensorCollection. infos: pd.DataFrame with columns ['batch_im_id', 'resolution'] tensor: K with shape [B,3,3] of camera intrinsics matrices. @@ -209,18 +223,18 @@ def make_cameras(camera_data: List[CameraData]) -> PandasTensorCollection: K = [] for n, cam_data in enumerate(camera_data): K.append(torch.tensor(cam_data.K)) - infos.append(dict(batch_im_id=n, resolution=cam_data.resolution)) + infos.append({"batch_im_id": n, "resolution": cam_data.resolution}) return tc.PandasTensorCollection(infos=pd.DataFrame(infos), K=torch.stack(K)) def make_detections_from_object_data(object_data: List[ObjectData]) -> DetectionsType: infos = pd.DataFrame( - dict( - label=[data.label for data in object_data], - batch_im_id=0, - instance_id=np.arange(len(object_data)), - ) + { + "label": [data.label for data in object_data], + "batch_im_id": 0, + "instance_id": np.arange(len(object_data)), + }, ) bboxes = torch.as_tensor( np.stack([data.bbox_modal for data in object_data]), diff --git a/happypose/toolbox/lib3d/camera_geometry.py b/happypose/toolbox/lib3d/camera_geometry.py index bc1c831d..f9f1ae37 100644 --- a/happypose/toolbox/lib3d/camera_geometry.py +++ b/happypose/toolbox/lib3d/camera_geometry.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from typing import Tuple @@ -30,7 +28,9 @@ def project_points(points_3d, K, TCO): n_points = points_3d.shape[1] device = points_3d.device if points_3d.shape[-1] == 3: - points_3d = torch.cat((points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1) + points_3d = torch.cat( + (points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1, + ) P = K @ TCO[:, :3] suv = (P.unsqueeze(1) @ points_3d.unsqueeze(-1)).squeeze(-1) suv = suv / suv[..., [-1]] @@ -44,7 +44,9 @@ def project_points_robust(points_3d, K, TCO, z_min=0.1): n_points = points_3d.shape[1] device = points_3d.device if points_3d.shape[-1] == 3: - points_3d = torch.cat((points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1) + points_3d = torch.cat( + (points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1, + ) P = K @ TCO[:, :3] suv = (P.unsqueeze(1) @ points_3d.unsqueeze(-1)).squeeze(-1) z = suv[..., -1] @@ -65,14 +67,18 @@ def boxes_from_uv(uv): def get_K_crop_resize( - K: torch.Tensor, boxes: torch.Tensor, orig_size: Tuple[int, int], crop_resize: Tuple[int, int] + K: torch.Tensor, + boxes: torch.Tensor, + orig_size: Tuple[int, int], + crop_resize: Tuple[int, int], ) -> torch.Tensor: - """ - Adapted from https://github.com/BerkeleyAutomation/perception/blob/master/perception/camera_intrinsics.py + """Adapted from https://github.com/BerkeleyAutomation/perception/blob/master/perception/camera_intrinsics.py Skew is not handled. + Args: + ---- K: (bsz, 3, 3) float - boxes: (bsz, 4) float + boxes: (bsz, 4) float. """ assert K.dim() == 3 assert K.shape[1:] == (3, 3) @@ -115,12 +121,16 @@ def get_K_crop_resize( return new_K -def cropresize_backtransform_points2d(input_wh, boxes_2d_crop, output_wh, points_2d_in_output): +def cropresize_backtransform_points2d( + input_wh, boxes_2d_crop, output_wh, points_2d_in_output, +): bsz = input_wh.shape[0] assert output_wh.shape == (bsz, 2) assert input_wh.shape == (bsz, 2) assert points_2d_in_output.dim() == 3 points_2d_normalized = points_2d_in_output / output_wh.unsqueeze(1) - points_2d = boxes_2d_crop[:, [0, 1]].unsqueeze(1) + points_2d_normalized * input_wh.unsqueeze(1) + points_2d = boxes_2d_crop[:, [0, 1]].unsqueeze( + 1, + ) + points_2d_normalized * input_wh.unsqueeze(1) return points_2d diff --git a/happypose/toolbox/lib3d/cropping.py b/happypose/toolbox/lib3d/cropping.py index 00621e5b..4792d020 100644 --- a/happypose/toolbox/lib3d/cropping.py +++ b/happypose/toolbox/lib3d/cropping.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import torch import torchvision @@ -27,10 +25,11 @@ from .camera_geometry import boxes_from_uv, project_points, project_points_robust -def deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, 320), clamp=False): - """ - gt_boxes: N x 4 - crop_boxes: N x 4 +def deepim_boxes( + rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, 320), clamp=False, +): + """gt_boxes: N x 4 + crop_boxes: N x 4. """ lobs, robs, uobs, dobs = obs_boxes[:, [0, 2, 1, 3]].t() lrend, rrend, urend, drend = rend_boxes[:, [0, 2, 1, 3]].t() @@ -47,10 +46,12 @@ def deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, r = w / h xdists = torch.cat( - ((lobs - xc).abs(), (lrend - xc).abs(), (robs - xc).abs(), (rrend - xc).abs()), dim=1 + ((lobs - xc).abs(), (lrend - xc).abs(), (robs - xc).abs(), (rrend - xc).abs()), + dim=1, ) ydists = torch.cat( - ((uobs - yc).abs(), (urend - yc).abs(), (dobs - yc).abs(), (drend - yc).abs()), dim=1 + ((uobs - yc).abs(), (urend - yc).abs(), (dobs - yc).abs(), (drend - yc).abs()), + dim=1, ) xdist = xdists.max(dim=1)[0] ydist = ydists.max(dim=1)[0] @@ -59,7 +60,9 @@ def deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, xc, yc = xc.squeeze(-1), yc.squeeze(-1) x1, y1, x2, y2 = xc - width / 2, yc - height / 2, xc + width / 2, yc + height / 2 - boxes = torch.cat((x1.unsqueeze(1), y1.unsqueeze(1), x2.unsqueeze(1), y2.unsqueeze(1)), dim=1) + boxes = torch.cat( + (x1.unsqueeze(1), y1.unsqueeze(1), x2.unsqueeze(1), y2.unsqueeze(1)), dim=1, + ) assert not clamp if clamp: boxes[:, [0, 2]] = torch.clamp(boxes[:, [0, 2]], 0, w - 1) @@ -67,16 +70,24 @@ def deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, return boxes -def deepim_crops(images, obs_boxes, K, TCO_pred, O_vertices, output_size=None, lamb=1.4): +def deepim_crops( + images, obs_boxes, K, TCO_pred, O_vertices, output_size=None, lamb=1.4, +): batch_size, _, h, w = images.shape device = images.device if output_size is None: output_size = (h, w) uv = project_points(O_vertices, K, TCO_pred) rend_boxes = boxes_from_uv(uv) - rend_center_uv = project_points(torch.zeros(batch_size, 1, 3).to(device), K, TCO_pred) - boxes = deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb) - boxes = torch.cat((torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1) + rend_center_uv = project_points( + torch.zeros(batch_size, 1, 3).to(device), K, TCO_pred, + ) + boxes = deepim_boxes( + rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb, + ) + boxes = torch.cat( + (torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1, + ) crops = crop_images(images, boxes, output_size=output_size, sampling_ratio=4) return boxes[:, 1:], crops @@ -101,9 +112,15 @@ def deepim_crops_robust( rend_boxes = boxes_from_uv(uv) TCR = TCO_pred.clone() TCR[:, :3, -1] = tCR_in - rend_center_uv = project_points_robust(torch.zeros(batch_size, 1, 3).to(device), K, TCR) - boxes = deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb) - boxes = torch.cat((torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1) + rend_center_uv = project_points_robust( + torch.zeros(batch_size, 1, 3).to(device), K, TCR, + ) + boxes = deepim_boxes( + rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb, + ) + boxes = torch.cat( + (torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1, + ) crops = None if return_crops: crops = crop_images(images, boxes, output_size=output_size, sampling_ratio=4) @@ -123,10 +140,12 @@ def crop_images(images, boxes, output_size, sampling_ratio): if not has_depth: crops = torchvision.ops.roi_align( - images, boxes, output_size=output_size, sampling_ratio=sampling_ratio + images, boxes, output_size=output_size, sampling_ratio=sampling_ratio, ) else: - crops = torchvision.ops.roi_align(images, boxes, output_size=output_size, sampling_ratio=4) + crops = torchvision.ops.roi_align( + images, boxes, output_size=output_size, sampling_ratio=4, + ) # roi_align can result in invalid depth measurements # since it does interpolation. Simply set those to zero @@ -135,7 +154,7 @@ def crop_images(images, boxes, output_size, sampling_ratio): depth_valid = torch.zeros_like(images[:, DEPTH_DIMS]) depth_valid[depth > 0] = 1 depth_valid_crops = torchvision.ops.roi_align( - depth_valid, boxes, output_size=output_size, sampling_ratio=4 + depth_valid, boxes, output_size=output_size, sampling_ratio=4, ) depth_mask = torch.ones_like(depth_valid_crops) depth_mask[depth_valid_crops < 0.99] = 0 diff --git a/happypose/toolbox/lib3d/distances.py b/happypose/toolbox/lib3d/distances.py index 871d5dd4..4894447d 100644 --- a/happypose/toolbox/lib3d/distances.py +++ b/happypose/toolbox/lib3d/distances.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import torch diff --git a/happypose/toolbox/lib3d/mesh_losses.py b/happypose/toolbox/lib3d/mesh_losses.py index bde4a9c1..907fd50c 100644 --- a/happypose/toolbox/lib3d/mesh_losses.py +++ b/happypose/toolbox/lib3d/mesh_losses.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import torch @@ -44,7 +42,9 @@ def compute_ADD_L1_loss(TCO_gt, TCO_pred, points): assert TCO_pred.shape == (bsz, 4, 4) and TCO_gt.shape == (bsz, 4, 4) assert points.dim() == 3 and points.shape[-1] == 3 dists = ( - (transform_pts(TCO_gt, points) - transform_pts(TCO_pred, points)).abs().mean(dim=(-1, -2)) + (transform_pts(TCO_gt, points) - transform_pts(TCO_pred, points)) + .abs() + .mean(dim=(-1, -2)) ) return dists diff --git a/happypose/toolbox/lib3d/mesh_ops.py b/happypose/toolbox/lib3d/mesh_ops.py index c29b06f5..e9a3eca0 100644 --- a/happypose/toolbox/lib3d/mesh_ops.py +++ b/happypose/toolbox/lib3d/mesh_ops.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import numpy as np import torch diff --git a/happypose/toolbox/lib3d/multiview.py b/happypose/toolbox/lib3d/multiview.py index ceb08f15..edd68f83 100644 --- a/happypose/toolbox/lib3d/multiview.py +++ b/happypose/toolbox/lib3d/multiview.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import numpy as np import torch @@ -37,7 +35,9 @@ def _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0): obj.reparentTo(root) obj.setPos(0, 0, 0) - TCCGL = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float) + TCCGL = np.array( + [[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float, + ) tCR = np.array(tCR.tolist()) TOC = Transform(np.array(TCO.tolist())).inverse().toHomogeneousMatrix() @@ -96,7 +96,7 @@ def get_1_view_TCO_pos_front(TCO, tCR): cam_positions_wrt_cam0 = np.array( [ [0, 0, 0], - ] + ], ) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) @@ -107,7 +107,7 @@ def get_3_views_TCO_pos_front(TCO, tCR): [0, 0, 0], [1, 0, 0], [-1, 0, 0], - ] + ], ) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) @@ -120,7 +120,7 @@ def get_5_views_TCO_pos_front(TCO, tCR): [-1, 0, 0], [0, 0, 1], [0, 0, -1], - ] + ], ) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) @@ -131,7 +131,7 @@ def get_3_views_TCO_pos_sphere(TCO, tCR): [0, 0, 0], [1, 0, 0], [-1, 0, 0], - ] + ], ) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) @@ -145,7 +145,7 @@ def get_6_views_TCO_pos_sphere(TCO, tCR): [0, 1, 1], [-1, 1, 0], [0, 1, -1], - ] + ], ) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) @@ -162,6 +162,7 @@ def get_26_views_TCO_pos_sphere(TCO, tCR): cam_positions_wrt_cam0 = np.array(cam_positions_wrt_cam0, dtype=float) return _get_views_TCO_pos_sphere(TCO, tCR, cam_positions_wrt_cam0) + def make_TCO_multiview( TCO: torch.Tensor, tCR: torch.Tensor, @@ -170,14 +171,16 @@ def make_TCO_multiview( remove_TCO_rendering: bool = False, views_inplane_rotations: bool = False, ): - """_summary_ + """_summary_. Args: + ---- TCO (torch.Tensor): (bsz, 4, 4) tCR (torch.Tensor): (bsz, 3) Returns: + ------- _type_: _description_ """ bsz = TCO.shape[0] @@ -188,7 +191,7 @@ def make_TCO_multiview( if n_views == 1: TC0_CV = [] - for b in range(bsz): + for _b in range(bsz): TC0_CV_ = [np.eye(4)] TC0_CV.append(TC0_CV_) TC0_CV = torch.as_tensor(np.stack(TC0_CV), device=device, dtype=dtype) @@ -239,7 +242,7 @@ def make_TCO_multiview( for idx, angle in enumerate([np.pi / 2, np.pi, 3 * np.pi / 2]): idx = idx + 1 dR = torch.as_tensor( - transforms3d.euler.euler2mat(0, 0, angle), device=device, dtype=dtype + transforms3d.euler.euler2mat(0, 0, angle), device=device, dtype=dtype, ) TCV_O[:, :, idx, :3, :3] = dR @ TCV_O[:, :, idx, :3, :3] TCV_O = TCV_O.flatten(1, 2) diff --git a/happypose/toolbox/lib3d/rigid_mesh_database.py b/happypose/toolbox/lib3d/rigid_mesh_database.py index 6e4f5d9a..a7d272b2 100644 --- a/happypose/toolbox/lib3d/rigid_mesh_database.py +++ b/happypose/toolbox/lib3d/rigid_mesh_database.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from copy import deepcopy from typing import List @@ -27,16 +25,12 @@ # MegaPose from happypose.toolbox.datasets.object_dataset import RigidObject - - from happypose.toolbox.lib3d.mesh_ops import get_meshes_bounding_boxes, sample_points -from happypose.toolbox.lib3d.symmetries import make_symmetries_poses from happypose.toolbox.utils.tensor_collection import TensorCollection def as_mesh(scene_or_mesh): - """ - Convert a possible scene to a mesh. + """Convert a possible scene to a mesh. If conversion occurs, the returned mesh has only vertex and face data. """ @@ -49,7 +43,7 @@ def as_mesh(scene_or_mesh): tuple( trimesh.Trimesh(vertices=g.vertices, faces=g.faces) for g in scene_or_mesh.geometry.values() - ) + ), ) else: mesh = scene_or_mesh @@ -60,7 +54,7 @@ class MeshDataBase: def __init__(self, obj_list: List[RigidObject]): self.obj_dict = {obj.label: obj for obj in obj_list} self.obj_list = obj_list - self.infos = {obj.label: dict() for obj in obj_list} + self.infos = {obj.label: {} for obj in obj_list} self.meshes = { l: as_mesh( trimesh.load( @@ -69,14 +63,13 @@ def __init__(self, obj_list: List[RigidObject]): process=False, skip_materials=True, maintain_order=True, - ) + ), ) for l, obj in self.obj_dict.items() } for label, obj in self.obj_dict.items(): if obj.diameter_meters is None: - mesh = self.meshes[label] points = np.array(mesh.vertices) * obj.scale extent = points.max(0) - points.min(0) @@ -97,7 +90,9 @@ def batched(self, aabb=False, resample_n_points=None, n_sym=64): new_infos = deepcopy(self.infos) for label, mesh in self.meshes.items(): if aabb: - points_n = get_meshes_bounding_boxes(torch.as_tensor(mesh.vertices).unsqueeze(0))[0] + points_n = get_meshes_bounding_boxes( + torch.as_tensor(mesh.vertices).unsqueeze(0), + )[0] elif resample_n_points: if isinstance(mesh, trimesh.PointCloud): points_n = sample_points( @@ -107,7 +102,7 @@ def batched(self, aabb=False, resample_n_points=None, n_sym=64): )[0] else: points_n = torch.tensor( - trimesh.sample.sample_surface(mesh, resample_n_points)[0] + trimesh.sample.sample_surface(mesh, resample_n_points)[0], ) else: points_n = torch.tensor(mesh.vertices) @@ -128,7 +123,9 @@ def batched(self, aabb=False, resample_n_points=None, n_sym=64): labels = np.array(labels) points = pad_stack_tensors(points, fill="select_random", deterministic=True) - symmetries = pad_stack_tensors(symmetries, fill=torch.eye(4), deterministic=True) + symmetries = pad_stack_tensors( + symmetries, fill=torch.eye(4), deterministic=True, + ) return BatchedMeshes(new_infos, labels, points, symmetries).float() diff --git a/happypose/toolbox/lib3d/rotations.py b/happypose/toolbox/lib3d/rotations.py index 326abac0..ca586017 100644 --- a/happypose/toolbox/lib3d/rotations.py +++ b/happypose/toolbox/lib3d/rotations.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import numpy as np import torch @@ -23,11 +21,10 @@ def compute_rotation_matrix_from_ortho6d(poses): - """ - Code from https://github.com/papagina/RotationContinuity + """Code from https://github.com/papagina/RotationContinuity On the Continuity of Rotation Representations in Neural Networks Zhou et al. CVPR19 - https://zhouyisjtu.github.io/project_rotation/rotation.html + https://zhouyisjtu.github.io/project_rotation/rotation.html. """ assert poses.shape[-1] == 6 x_raw = poses[..., 0:3] @@ -41,9 +38,8 @@ def compute_rotation_matrix_from_ortho6d(poses): def euler2quat(xyz, axes="sxyz"): - """ - euler: sxyz - quaternion: xyzw + """euler: sxyz + quaternion: xyzw. """ wxyz = transforms3d.euler.euler2quat(*xyz, axes=axes) xyzw = [*wxyz[1:], wxyz[0]] @@ -51,12 +47,14 @@ def euler2quat(xyz, axes="sxyz"): def angle_axis_to_rotation_matrix(angle_axis): - """Convert 3d vector of axis-angle rotation to 4x4 rotation matrix + """Convert 3d vector of axis-angle rotation to 4x4 rotation matrix. Args: + ---- angle_axis (Tensor): tensor of 3d vector of axis-angle rotations. Returns: + ------- Tensor: tensor of 4x4 rotation matrices. Shape: @@ -64,6 +62,7 @@ def angle_axis_to_rotation_matrix(angle_axis): - Output: :math:`(N, 4, 4)` Example: + ------- >>> input = torch.rand(1, 3) # Nx3 >>> output = tgm.angle_axis_to_rotation_matrix(input) # Nx4x4 """ @@ -88,13 +87,17 @@ def _compute_rotation_matrix(angle_axis, theta2, eps=1e-6): r02 = wy * sin_theta + wx * wz * (k_one - cos_theta) r12 = -wx * sin_theta + wy * wz * (k_one - cos_theta) r22 = cos_theta + wz * wz * (k_one - cos_theta) - rotation_matrix = torch.cat([r00, r01, r02, r10, r11, r12, r20, r21, r22], dim=1) + rotation_matrix = torch.cat( + [r00, r01, r02, r10, r11, r12, r20, r21, r22], dim=1, + ) return rotation_matrix.view(-1, 3, 3) def _compute_rotation_matrix_taylor(angle_axis): rx, ry, rz = torch.chunk(angle_axis, 3, dim=1) k_one = torch.ones_like(rx) - rotation_matrix = torch.cat([k_one, -rz, ry, rz, k_one, -rx, -ry, rx, k_one], dim=1) + rotation_matrix = torch.cat( + [k_one, -rz, ry, rz, k_one, -rx, -ry, rx, k_one], dim=1, + ) return rotation_matrix.view(-1, 3, 3) # stolen from ceres/rotation.h @@ -130,9 +133,11 @@ def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h Args: + ---- quaternion (torch.Tensor): tensor with quaternions. Return: + ------ torch.Tensor: tensor with angle axis of rotation. Shape: @@ -140,15 +145,20 @@ def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: - Output: :math:`(*, 3)` Example: + ------- >>> quaternion = torch.rand(2, 4) # Nx4 >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion) # Nx3 """ if not torch.is_tensor(quaternion): - raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(quaternion))) + msg = f"Input type is not a torch.Tensor. Got {type(quaternion)}" + raise TypeError( + msg, + ) if not quaternion.shape[-1] == 4: + msg = f"Input must be a tensor of shape Nx4 or 4. Got {quaternion.shape}" raise ValueError( - "Input must be a tensor of shape Nx4 or 4. Got {}".format(quaternion.shape) + msg, ) # unpack input and compute conversion q1: torch.Tensor = quaternion[..., 1] @@ -159,7 +169,9 @@ def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: sin_theta: torch.Tensor = torch.sqrt(sin_squared_theta) cos_theta: torch.Tensor = quaternion[..., 0] two_theta: torch.Tensor = 2.0 * torch.where( - cos_theta < 0.0, torch.atan2(-sin_theta, -cos_theta), torch.atan2(sin_theta, cos_theta) + cos_theta < 0.0, + torch.atan2(-sin_theta, -cos_theta), + torch.atan2(sin_theta, cos_theta), ) k_pos: torch.Tensor = two_theta / sin_theta diff --git a/happypose/toolbox/lib3d/symmetries.py b/happypose/toolbox/lib3d/symmetries.py index 79e3d9d0..23b03f76 100644 --- a/happypose/toolbox/lib3d/symmetries.py +++ b/happypose/toolbox/lib3d/symmetries.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from dataclasses import dataclass from typing import List, Optional @@ -42,9 +40,7 @@ class ContinuousSymmetry: @dataclass class DiscreteSymmetry: - """ - pose: (4, 4) homogeneous matrix - """ + """pose: (4, 4) homogeneous matrix.""" pose: npt.NDArray[np.float_] @@ -58,7 +54,8 @@ def make_symmetries_poses( ) -> np.ndarray: """Generates the set of object symmetries. - Returns: + Returns + ------- (num_symmetries, 4, 4) array """ # Note: See https://github.com/thodan/bop_toolkit/blob/master/bop_toolkit_lib/misc.py diff --git a/happypose/toolbox/lib3d/transform.py b/happypose/toolbox/lib3d/transform.py index 71ad8392..001a19c2 100644 --- a/happypose/toolbox/lib3d/transform.py +++ b/happypose/toolbox/lib3d/transform.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -38,14 +37,13 @@ def __init__( Tuple[float, float, float, float], ], # rotation Union[np.ndarray, torch.Tensor, Tuple[float, float, float]], # translation - ] + ], ): - """ - - Transform(T): SE3 or (4, 4) array + """- Transform(T): SE3 or (4, 4) array - Transform(quaternion, translation), where quaternion: pin.Quaternion, 4-array representing a xyzw quaternion, or a 3x3 rotation matrix - translation: 3-array + translation: 3-array. """ if len(args) == 1: arg_T = args[0] @@ -118,5 +116,5 @@ def quaternion(self) -> pin.Quaternion: @property def matrix(self) -> np.ndarray: - """Returns 4x4 homogeneous matrix representations""" + """Returns 4x4 homogeneous matrix representations.""" return self._T.homogeneous diff --git a/happypose/toolbox/lib3d/transform_ops.py b/happypose/toolbox/lib3d/transform_ops.py index f9d20b75..95348050 100644 --- a/happypose/toolbox/lib3d/transform_ops.py +++ b/happypose/toolbox/lib3d/transform_ops.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from typing import Tuple @@ -29,16 +27,17 @@ def transform_pts(T: torch.Tensor, pts: torch.Tensor) -> torch.Tensor: - """ - - Args: + """Args: + ---- T (torch.Tensor): (bsz, 4, 4) or (bsz, dim2, 4, 4) - pts (torch.Tensor): (bsz, n_pts, 3) + pts (torch.Tensor): (bsz, n_pts, 3). - Raises: + Raises + ------ ValueError: _description_ - Returns: + Returns + ------- torch.Tensor: _description_ """ bsz = T.shape[0] @@ -50,7 +49,8 @@ def transform_pts(T: torch.Tensor, pts: torch.Tensor) -> torch.Tensor: elif T.dim() == 3: assert T.shape == (bsz, 4, 4) else: - raise ValueError("Unsupported shape for T", T.shape) + msg = "Unsupported shape for T" + raise ValueError(msg, T.shape) pts = pts.unsqueeze(-1) T = T.unsqueeze(-3) pts_transformed = T[..., :3, :3] @ pts + T[..., :3, [-1]] @@ -85,7 +85,9 @@ def add_noise( ) euler_noise_rad = euler_noise_deg * np.pi / 180 R_noise = ( - torch.tensor(np.stack([transforms3d.euler.euler2mat(*xyz) for xyz in euler_noise_rad])) + torch.tensor( + np.stack([transforms3d.euler.euler2mat(*xyz) for xyz in euler_noise_rad]), + ) .float() .to(device) ) diff --git a/happypose/toolbox/renderer/geometry.py b/happypose/toolbox/renderer/geometry.py index 188d8f03..08a65106 100644 --- a/happypose/toolbox/renderer/geometry.py +++ b/happypose/toolbox/renderer/geometry.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -59,7 +58,8 @@ class ViewerClosedError(ViewerError): def make_axes(): """Make an axes geometry. - Returns: + Returns + ------- Geom -- p3d geometry """ vformat = GeomVertexFormat.get_v3c4() @@ -87,10 +87,12 @@ def make_grid(num_ticks=10, step=1.0): """Make a grid geometry. Keyword Arguments: + ----------------- step {float} -- step in meters (default: {1.0}) num_ticks {int} -- ticks number per axis (default: {5}) Returns: + ------- Geom -- p3d geometry """ ticks = np.arange(-num_ticks // 2, num_ticks // 2 + 1) * step @@ -119,14 +121,17 @@ def make_capsule(radius, length, num_segments=16, num_rings=16): """Make capsule geometry. Arguments: + --------- radius {float} -- capsule radius length {float} -- capsule length Keyword Arguments: + ----------------- num_segments {int} -- segments number (default: {16}) num_rings {int} -- rings number (default: {16}) Returns: + ------- Geom -- p3d geometry """ vformat = GeomVertexFormat.get_v3n3t2() @@ -164,10 +169,12 @@ def make_cylinder(num_segments=16, closed=True): """Make a uniform cylinder geometry. Keyword Arguments: + ----------------- num_segments {int} -- segments number (default: {16}) closed {bool} -- add caps (default: {True}) Returns: + ------- Geom -- p3d geometry """ vformat = GeomVertexFormat.get_v3n3t2() @@ -222,7 +229,8 @@ def make_cylinder(num_segments=16, closed=True): def make_box(): """Make a uniform box geometry. - Returns: + Returns + ------- Geom -- p3d geometry """ vformat = GeomVertexFormat.get_v3n3t2() @@ -257,9 +265,11 @@ def make_plane(size=(1.0, 1.0)): """Make a plane geometry. Arguments: + --------- size {tuple} -- plane size x,y Returns: + ------- Geom -- p3d geometry """ vformat = GeomVertexFormat.get_v3n3t2() @@ -290,10 +300,12 @@ def make_sphere(num_segments=16, num_rings=16): """Make a uniform UV sphere geometry. Keyword Arguments: + ----------------- num_segments {int} -- segments number (default: {16}) num_rings {int} -- rings number (default: {16}) Returns: + ------- Geom -- p3d geometry """ return make_capsule(1.0, 0.0, num_segments, num_rings) @@ -303,16 +315,19 @@ def make_points(vertices, colors=None, texture_coords=None, geom=None): """Make or update existing points set geometry. Arguments: + --------- root_path {str} -- path to the group's root node name {str} -- node name within a group vertices {list} -- point coordinates (and other data in a point cloud format) Keyword Arguments: + ----------------- colors {list} -- colors (default: {None}) texture_coords {list} -- texture coordinates (default: {None}) geom {Geom} -- geometry to update (default: {None}) Returns: + ------- Geom -- p3d geometry """ if not isinstance(vertices, np.ndarray): @@ -324,7 +339,10 @@ def make_points(vertices, colors=None, texture_coords=None, geom=None): if colors.dtype != np.uint8: colors = np.uint8(colors * 255) vertices = np.column_stack( - (vertices.view(dtype=np.uint32).reshape(-1, 3), colors.view(dtype=np.uint32)) + ( + vertices.view(dtype=np.uint32).reshape(-1, 3), + colors.view(dtype=np.uint32), + ), ) if texture_coords is not None: @@ -334,7 +352,7 @@ def make_points(vertices, colors=None, texture_coords=None, geom=None): ( vertices.view(dtype=np.uint32).reshape(-1, 3), texture_coords.view(dtype=np.uint32).reshape(-1, 2), - ) + ), ) data = vertices.tostring() @@ -347,8 +365,9 @@ def make_points(vertices, colors=None, texture_coords=None, geom=None): elif vertices.strides[0] == 20: vformat = GeomVertexFormat.get_v3t2() else: + msg = f"Incompatible point clout format: {vertices.dtype},{vertices.shape}" raise ViewerError( - "Incompatible point clout format: {},{}".format(vertices.dtype, vertices.shape) + msg, ) vdata = GeomVertexData("vdata", vformat, Geom.UHDynamic) diff --git a/happypose/toolbox/renderer/panda3d_batch_renderer.py b/happypose/toolbox/renderer/panda3d_batch_renderer.py index 7e3e7698..a4a2ebd4 100644 --- a/happypose/toolbox/renderer/panda3d_batch_renderer.py +++ b/happypose/toolbox/renderer/panda3d_batch_renderer.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from dataclasses import dataclass from typing import List, Optional, Set, Union @@ -28,7 +26,6 @@ # HappyPose from happypose.toolbox.datasets.object_dataset import RigidObjectDataset - # MegaPose from happypose.toolbox.lib3d.transform import Transform from happypose.toolbox.lib3d.transform_ops import invert_transform_matrices @@ -49,10 +46,9 @@ @dataclass class RenderOutput: - """ - rgb: (h, w, 3) uint8 + """rgb: (h, w, 3) uint8 normals: (h, w, 3) uint8 - depth: (h, w, 1) float32 + depth: (h, w, 1) float32. """ data_id: int @@ -63,8 +59,7 @@ class RenderOutput: @dataclass class BatchRenderOutput: - """ - rgb: (bsz, 3, h, w) float, values in [0, 1] + """rgb: (bsz, 3, h, w) float, values in [0, 1] normals: (bsz, 3, h, w) float, values in [0, 1] depth: (bsz, 1, h, w) float, in meters. """ @@ -96,7 +91,6 @@ def worker_loop( object_dataset: RigidObjectDataset, preload_labels: Set[str] = set(), ) -> None: - logger.debug(f"Init worker: {worker_id}") renderer = Panda3dSceneRenderer( asset_dataset=object_dataset, @@ -161,7 +155,6 @@ def __init__( preload_cache: bool = True, split_objects: bool = False, ): - assert n_workers >= 1 self._object_dataset = object_dataset self._n_workers = n_workers @@ -178,9 +171,10 @@ def make_scene_data( light_datas: List[List[Panda3dLightData]], resolution: Resolution, ) -> List[SceneData]: - """_summary_ + """_summary_. Args: + ---- labels (List[str]): _description_ TCO (torch.Tensor): (bsz, 4, 4) float K (torch.Tensor): (bsz, 3, 3) float @@ -188,6 +182,7 @@ def make_scene_data( resolution (Resolution): _description_ Returns: + ------- List[SceneData]: _description_ """ bsz = TCO.shape[0] @@ -210,7 +205,7 @@ def make_scene_data( Panda3dObjectData( label=label_n, TWO=TWO, - ) + ), ], light_datas=lights_n, ) @@ -228,7 +223,6 @@ def render( render_mask: bool = False, render_normals: bool = False, ) -> BatchRenderOutput: - if render_mask: raise NotImplementedError @@ -265,7 +259,7 @@ def render( rgbs = torch.stack(list_rgbs).pin_memory().cuda(non_blocking=True) else: rgbs = torch.stack(list_rgbs) - + rgbs = rgbs.float().permute(0, 3, 1, 2) / 255 if render_depth: @@ -294,7 +288,7 @@ def render( depths=depths, normals=normals, ) - + def _init_renderers(self, preload_cache: bool) -> None: object_labels = [obj.label for obj in self._object_dataset.list_objects] @@ -303,9 +297,11 @@ def _init_renderers(self, preload_cache: bool) -> None: self._in_queues: List[torch.multiprocessing.Queue] = [ torch.multiprocessing.Queue() for _ in range(self._n_workers) ] - self._worker_id_to_queue = {n: self._in_queues[n] for n in range(self._n_workers)} + self._worker_id_to_queue = { + n: self._in_queues[n] for n in range(self._n_workers) + } object_labels_split = np.array_split(object_labels, self._n_workers) - self._object_label_to_queue = dict() + self._object_label_to_queue = {} for n, split in enumerate(object_labels_split): for label in split: self._object_label_to_queue[label] = self._in_queues[n] @@ -313,7 +309,9 @@ def _init_renderers(self, preload_cache: bool) -> None: object_labels_split = [object_labels for _ in range(self._n_workers)] self._in_queues = [torch.multiprocessing.Queue()] self._object_label_to_queue = {k: self._in_queues[0] for k in object_labels} - self._worker_id_to_queue = {n: self._in_queues[0] for n in range(self._n_workers)} + self._worker_id_to_queue = { + n: self._in_queues[0] for n in range(self._n_workers) + } self._out_queue: torch.multiprocessing.Queue = torch.multiprocessing.Queue() @@ -324,13 +322,13 @@ def _init_renderers(self, preload_cache: bool) -> None: preload_labels = set() renderer_process = torch.multiprocessing.Process( target=worker_loop, - kwargs=dict( - worker_id=n, - in_queue=self._worker_id_to_queue[n], - out_queue=self._out_queue, - object_dataset=self._object_dataset, - preload_labels=preload_labels, - ), + kwargs={ + "worker_id": n, + "in_queue": self._worker_id_to_queue[n], + "out_queue": self._out_queue, + "object_dataset": self._object_dataset, + "preload_labels": preload_labels, + }, ) renderer_process.start() self._renderers.append(renderer_process) diff --git a/happypose/toolbox/renderer/panda3d_scene_renderer.py b/happypose/toolbox/renderer/panda3d_scene_renderer.py index 98536600..cb31226e 100644 --- a/happypose/toolbox/renderer/panda3d_scene_renderer.py +++ b/happypose/toolbox/renderer/panda3d_scene_renderer.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import builtins import os @@ -23,10 +21,9 @@ import time import xml.etree.ElementTree as ET from collections import defaultdict -from typing import Dict, List, Set from dataclasses import dataclass from functools import partial -from typing import Dict, List, Optional, Set +from typing import Dict, List, Set # Third Party import numpy as np @@ -85,7 +82,7 @@ def __init__(self) -> None: assert len(devices) == 1 if "EGL_VISIBLE_DEVICES" not in os.environ: out = subprocess.check_output( - ["nvidia-smi", "--id=" + str(devices[0]), "-q", "--xml-format"] + ["nvidia-smi", "--id=" + str(devices[0]), "-q", "--xml-format"], ) tree = ET.fromstring(out) gpu = tree.findall("gpu")[0] @@ -114,11 +111,11 @@ def make_scene_lights( [0, -1, 0], [0, 0, 1], [0, 0, -1], - ] + ], ) def pos_fn( - root_node: p3d.core.NodePath, light_node: p3d.core.NodePath, pos: np.ndarray + root_node: p3d.core.NodePath, light_node: p3d.core.NodePath, pos: np.ndarray, ) -> None: radius = root_node.getBounds().radius xyz_ = pos * radius * 10 @@ -130,8 +127,10 @@ def pos_fn( pos_fn_ = partial(pos_fn, pos=pos_n) light_datas.append( Panda3dLightData( - light_type="point", color=point_lights_color, positioning_function=pos_fn_ - ) + light_type="point", + color=point_lights_color, + positioning_function=pos_fn_, + ), ) return light_datas @@ -149,12 +148,11 @@ def __init__( debug: bool = False, verbose: bool = False, ): - self._asset_dataset = asset_dataset - self._label_to_node: Dict[str, p3d.core.NodePath] = dict() + self._label_to_node: Dict[str, p3d.core.NodePath] = {} self.verbose = verbose self.debug = debug - self.debug_data = Panda3dDebugData(timings=dict()) + self.debug_data = Panda3dDebugData(timings={}) self._cameras_pool: Dict[Resolution, List[Panda3dCamera]] = defaultdict(list) if hasattr(builtins, "base"): @@ -171,12 +169,16 @@ def __init__( def create_new_camera(self, resolution: Resolution) -> Panda3dCamera: idx = sum([len(x) for x in self._cameras_pool.values()]) - cam = Panda3dCamera.create(f"camera={idx}", resolution=resolution, app=self._app) + cam = Panda3dCamera.create( + f"camera={idx}", resolution=resolution, app=self._app, + ) self._cameras_pool[resolution].append(cam) return cam def get_cameras(self, data_cameras: List[Panda3dCameraData]) -> List[Panda3dCamera]: - resolution_to_data_cameras: Dict[Resolution, List[Panda3dCameraData]] = defaultdict(list) + resolution_to_data_cameras: Dict[ + Resolution, List[Panda3dCameraData], + ] = defaultdict(list) for data_camera in data_cameras: resolution_to_data_cameras[data_camera.resolution].append(data_camera) @@ -211,12 +213,14 @@ def use_normals_texture(self, obj_node: p3d.core.NodePath) -> p3d.core.NodePath: obj_node.setMaterialOff(1) obj_node.set_color(p3d.core.Vec4((1.0, 1.0, 1.0, 1.0))) obj_node.setTextureOff(1) - obj_node.setTexGen(p3d.core.TextureStage.getDefault(), p3d.core.TexGenAttrib.MEyeNormal) + obj_node.setTexGen( + p3d.core.TextureStage.getDefault(), p3d.core.TexGenAttrib.MEyeNormal, + ) obj_node.setTexture(self._rgb_texture) return obj_node def setup_scene( - self, root_node: p3d.core.NodePath, data_objects: List[Panda3dObjectData] + self, root_node: p3d.core.NodePath, data_objects: List[Panda3dObjectData], ) -> List[p3d.core.NodePath]: obj_nodes = [] for n, data_obj in enumerate(data_objects): @@ -236,7 +240,7 @@ def setup_scene( return obj_nodes def setup_cameras( - self, root_node: p3d.core.NodePath, data_cameras: List[Panda3dCameraData] + self, root_node: p3d.core.NodePath, data_cameras: List[Panda3dCameraData], ) -> List[Panda3dCamera]: cameras = self.get_cameras(data_cameras) @@ -253,9 +257,11 @@ def setup_cameras( return cameras def render_images( - self, cameras: List[Panda3dCamera], copy_arrays: bool = True, render_depth: bool = False + self, + cameras: List[Panda3dCamera], + copy_arrays: bool = True, + render_depth: bool = False, ) -> List[CameraRenderingData]: - self._app.graphicsEngine.renderFrame() self._app.graphicsEngine.syncFrame() @@ -272,7 +278,7 @@ def render_images( return renderings def setup_lights( - self, root_node: p3d.core, light_datas: List[Panda3dLightData] + self, root_node: p3d.core, light_datas: List[Panda3dLightData], ) -> List[p3d.core.NodePath]: light_node_paths = [] for n, light_data in enumerate(light_datas): @@ -306,7 +312,6 @@ def render_scene( render_normals: bool = False, clear: bool = True, ) -> List[CameraRenderingData]: - start = time.time() root_node = self._app.render.attachNewNode("world") object_nodes = self.setup_scene(root_node, object_datas) @@ -315,12 +320,16 @@ def render_scene( setup_time = time.time() - start start = time.time() - renderings = self.render_images(cameras, copy_arrays=copy_arrays, render_depth=render_depth) + renderings = self.render_images( + cameras, copy_arrays=copy_arrays, render_depth=render_depth, + ) if render_normals: for object_node in object_nodes: self.use_normals_texture(object_node) root_node.clear_light() - light_data = Panda3dLightData(light_type="ambient", color=(1.0, 1.0, 1.0, 1.0)) + light_data = Panda3dLightData( + light_type="ambient", color=(1.0, 1.0, 1.0, 1.0), + ) light_nodes += self.setup_lights(root_node, [light_data]) normals_renderings = self.render_images(cameras, copy_arrays=copy_arrays) for n, rendering in enumerate(renderings): diff --git a/happypose/toolbox/renderer/types.py b/happypose/toolbox/renderer/types.py index 5e92f69d..aa532156 100644 --- a/happypose/toolbox/renderer/types.py +++ b/happypose/toolbox/renderer/types.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,20 +32,21 @@ RgbaColor = Tuple[float, float, float, float] NodeFunction = Callable[ - [p3d.core.NodePath, p3d.core.NodePath], None + [p3d.core.NodePath, p3d.core.NodePath], None, ] # (root_node_path, object_node_path) Resolution = Tuple[int, int] -TCCGL = Transform(np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float)) +TCCGL = Transform( + np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float), +) @dataclass class CameraRenderingData: - """ - rgb: (h, w, 3) uint8 + """rgb: (h, w, 3) uint8 normals: (h, w, 3) uint8 depth: (h, w, 1) float32 - binary_mask: (h, w, 1) np.bool_ + binary_mask: (h, w, 1) np.bool_. """ rgb: np.ndarray @@ -92,7 +92,7 @@ def set_lens_parameters(self, lens: p3d.core.Lens) -> p3d.core.Lens: [0, 0, A, 1], [0, fy, 0, 0], [0, 0, B, 0], - ] + ], ) lens.setFilmSize(w, h) @@ -106,7 +106,7 @@ class Panda3dLightData: """Data used to to define a light in a panda3d scene. light_type: ambient, point, or directional NOTE: Alpha is largely irrelevant - https://docs.panda3d.org/1.10/python/programming/render-attributes/lighting#colored-lights + https://docs.panda3d.org/1.10/python/programming/render-attributes/lighting#colored-lights. """ light_type: str @@ -185,7 +185,9 @@ def create( depth_texture = p3d.core.Texture() depth_texture.setFormat(p3d.core.Texture.FDepthComponent) graphics_buffer.addRenderTexture( - depth_texture, p3d.core.GraphicsOutput.RTMCopyRam, p3d.core.GraphicsOutput.RTPDepth + depth_texture, + p3d.core.GraphicsOutput.RTMCopyRam, + p3d.core.GraphicsOutput.RTPDepth, ) cam_node = p3d.core.Camera(f"Camera [{name}]") @@ -210,9 +212,10 @@ def create( ) def get_rgb_image(self) -> np.ndarray: - """_summary_ + """_summary_. - Returns: + Returns + ------- np.ndarray: (h, w, 3) uint8 array """ # TODO : Extract data from the rgb texture ? @@ -234,7 +237,8 @@ def _get_depth_buffer(self) -> np.ndarray: https://developer.nvidia.com/content/depth-precision-visualized#:~:text=GPU%20hardware%20depth%20buffers%20don,reciprocal%20of%20world%2Dspace%20depth. - Returns: + Returns + ------- depth_buffer: [H,W,1] numpy array with values in [0,1] """ diff --git a/happypose/toolbox/renderer/utils.py b/happypose/toolbox/renderer/utils.py index 204c9785..70e997d4 100644 --- a/happypose/toolbox/renderer/utils.py +++ b/happypose/toolbox/renderer/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,9 +14,7 @@ """ - # Standard Library -from typing import List # Third Party import numpy as np @@ -28,8 +25,11 @@ from happypose.toolbox.lib3d.transform import Transform from happypose.toolbox.renderer.geometry import make_axes, make_box, make_sphere + def compute_view_mat(TWC): - TCCGL = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float) + TCCGL = np.array( + [[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=float, + ) TCCGL = Transform(TCCGL) TWC = Transform(TWC) TWCGL = TWC * TCCGL @@ -37,12 +37,16 @@ def compute_view_mat(TWC): view_mat = p3d.core.LMatrix4f(*view_mat.transpose().flatten().tolist()) return view_mat + def np_to_lmatrix4(np_array: npt.NDArray) -> p3d.core.LMatrix4f: return p3d.core.LMatrix4f(*np_array.transpose().flatten().tolist()) def depth_image_from_depth_buffer( - depth_buffer: npt.NDArray[np.float32], z_near: float, z_far: float, eps: float = 0.001 + depth_buffer: npt.NDArray[np.float32], + z_near: float, + z_far: float, + eps: float = 0.001, ) -> npt.NDArray[np.float32]: """Convert depth image to depth buffer. @@ -57,7 +61,9 @@ def depth_image_from_depth_buffer( def make_rgb_texture_normal_map(size: int = 32) -> p3d.core.Texture: tex = p3d.core.Texture() - tex.setup3dTexture(size, size, size, p3d.core.Texture.T_unsigned_byte, p3d.core.Texture.F_rgb8) + tex.setup3dTexture( + size, size, size, p3d.core.Texture.T_unsigned_byte, p3d.core.Texture.F_rgb8, + ) im = np.ones((size, size, size, 3), dtype=np.uint8) * 255 for x in range(size): for y in range(size): diff --git a/happypose/toolbox/utils/conversion.py b/happypose/toolbox/utils/conversion.py index ad3b7eaf..fa2cad2c 100644 --- a/happypose/toolbox/utils/conversion.py +++ b/happypose/toolbox/utils/conversion.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,9 +23,8 @@ def convert_scene_observation_to_panda3d( - camera_data: CameraData, object_datas: List[ObjectData] + camera_data: CameraData, object_datas: List[ObjectData], ) -> Tuple[Panda3dCameraData, List[Panda3dObjectData]]: - assert camera_data.TWC is not None assert camera_data.K is not None assert camera_data.resolution is not None @@ -44,6 +42,6 @@ def convert_scene_observation_to_panda3d( Panda3dObjectData( label=object_data.label, TWO=object_data.TWO, - ) + ), ) return panda3d_camera_data, panda3d_object_datas diff --git a/happypose/toolbox/utils/distributed.py b/happypose/toolbox/utils/distributed.py index 72750675..d865126b 100644 --- a/happypose/toolbox/utils/distributed.py +++ b/happypose/toolbox/utils/distributed.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import datetime import os @@ -23,11 +21,12 @@ from pathlib import Path from typing import Any, Dict, List +import omegaconf + # Third Party import torch import torch.distributed as dist from omegaconf import OmegaConf -import omegaconf # MegaPose from happypose.toolbox.utils.logging import get_logger @@ -46,7 +45,7 @@ def get_tmp_dir() -> Path: def sync_config( - cfg: omegaconf.dictconfig.DictConfig, local_fields: List[str] = [] + cfg: omegaconf.dictconfig.DictConfig, local_fields: List[str] = [], ) -> omegaconf.dictconfig.DictConfig: cfg_path = get_tmp_dir() / "config.yaml" if get_rank() == 0: @@ -103,12 +102,8 @@ def get_world_size() -> int: return world_size -def reduce_dict( - input_dict: Dict[str, Any], - average: bool = True -) -> Dict[str, Any]: - """ - https://github.com/pytorch/vision/blob/master/references/detection/utils.py +def reduce_dict(input_dict: Dict[str, Any], average: bool = True) -> Dict[str, Any]: + """https://github.com/pytorch/vision/blob/master/references/detection/utils.py Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum diff --git a/happypose/toolbox/utils/download.py b/happypose/toolbox/utils/download.py index bd0e1848..b7a8a003 100644 --- a/happypose/toolbox/utils/download.py +++ b/happypose/toolbox/utils/download.py @@ -16,138 +16,162 @@ logger = get_logger(__name__) -RCLONE_CFG_PATH = (PROJECT_DIR / 'rclone.conf') -RCLONE_ROOT = 'happypose:' -DOWNLOAD_DIR = LOCAL_DATA_DIR / 'downloads' +RCLONE_CFG_PATH = PROJECT_DIR / "rclone.conf" +RCLONE_ROOT = "happypose:" +DOWNLOAD_DIR = LOCAL_DATA_DIR / "downloads" DOWNLOAD_DIR.mkdir(exist_ok=True) -BOP_SRC = 'https://bop.felk.cvut.cz/media/data/bop_datasets/' +BOP_SRC = "https://bop.felk.cvut.cz/media/data/bop_datasets/" BOP_DATASETS = { - 'ycbv': { - 'splits': ['train_real', 'train_synt', 'test_all'] + "ycbv": {"splits": ["train_real", "train_synt", "test_all"]}, + "tless": { + "splits": ["test_primesense_all", "train_primesense"], }, - - 'tless': { - 'splits': ['test_primesense_all', 'train_primesense'], - }, - - 'hb': { - 'splits': ['test_primesense_all', 'val_primesense'], - }, - - 'icbin': { - 'splits': ['test_all'], + "hb": { + "splits": ["test_primesense_all", "val_primesense"], }, - - 'itodd': { - 'splits': ['val', 'test_all'], + "icbin": { + "splits": ["test_all"], }, - - 'lm': { - 'splits': ['test_all'], + "itodd": { + "splits": ["val", "test_all"], }, - - 'lmo': { - 'splits': ['test_all'], - 'has_pbr': False, + "lm": { + "splits": ["test_all"], }, - - 'tudl': { - 'splits': ['test_all', 'train_real'] + "lmo": { + "splits": ["test_all"], + "has_pbr": False, }, + "tudl": {"splits": ["test_all", "train_real"]}, } BOP_DS_NAMES = list(BOP_DATASETS.keys()) def main(): - parser = argparse.ArgumentParser('CosyPose download utility') - parser.add_argument('--bop_dataset', default='', type=str, choices=BOP_DS_NAMES) - parser.add_argument('--bop_src', default='bop', type=str, choices=['bop', 'gdrive']) - parser.add_argument('--bop_extra_files', default='', type=str, choices=['ycbv', 'tless']) - parser.add_argument('--cosypose_models', default='', type=str) + parser = argparse.ArgumentParser("CosyPose download utility") + parser.add_argument("--bop_dataset", default="", type=str, choices=BOP_DS_NAMES) + parser.add_argument("--bop_src", default="bop", type=str, choices=["bop", "gdrive"]) + parser.add_argument( + "--bop_extra_files", default="", type=str, choices=["ycbv", "tless"], + ) + parser.add_argument("--cosypose_models", default="", type=str) parser.add_argument("--megapose_models", action="store_true") - parser.add_argument('--urdf_models', default='', type=str) - parser.add_argument('--ycbv_compat_models', action='store_true') - parser.add_argument('--texture_dataset', action='store_true') - parser.add_argument('--result_id', default='', type=str) - parser.add_argument('--bop_result_id', default='', type=str) - parser.add_argument('--synt_dataset', default='', type=str) - parser.add_argument('--detections', default='', type=str) - parser.add_argument('--example_scenario', action='store_true') - parser.add_argument('--pbr_training_images', action='store_true') - parser.add_argument('--all_bop20_results', action='store_true') - parser.add_argument('--all_bop20_models', action='store_true') - - parser.add_argument('--debug', action='store_true') + parser.add_argument("--urdf_models", default="", type=str) + parser.add_argument("--ycbv_compat_models", action="store_true") + parser.add_argument("--texture_dataset", action="store_true") + parser.add_argument("--result_id", default="", type=str) + parser.add_argument("--bop_result_id", default="", type=str) + parser.add_argument("--synt_dataset", default="", type=str) + parser.add_argument("--detections", default="", type=str) + parser.add_argument("--example_scenario", action="store_true") + parser.add_argument("--pbr_training_images", action="store_true") + parser.add_argument("--all_bop20_results", action="store_true") + parser.add_argument("--all_bop20_models", action="store_true") + + parser.add_argument("--debug", action="store_true") args = parser.parse_args() if args.debug: logger.setLevel(logging.DEBUG) if args.bop_dataset: - if args.bop_src == 'bop': - download_bop_original(args.bop_dataset, args.pbr_training_images and BOP_DATASETS[args.bop_dataset].get('has_pbr', True)) - elif args.bop_src == 'gdrive': + if args.bop_src == "bop": + download_bop_original( + args.bop_dataset, + args.pbr_training_images + and BOP_DATASETS[args.bop_dataset].get("has_pbr", True), + ) + elif args.bop_src == "gdrive": download_bop_gdrive(args.bop_dataset) if args.bop_extra_files: - if args.bop_extra_files == 'tless': + if args.bop_extra_files == "tless": # https://github.com/kirumang/Pix2Pose#download-pre-trained-weights - download(f'cosypose/bop_datasets/tless/all_target_tless.json', BOP_DS_DIR / 'tless') - os.symlink(BOP_DS_DIR / 'tless/models_eval', BOP_DS_DIR / 'tless/models') - elif args.bop_extra_files == 'ycbv': + download( + "cosypose/bop_datasets/tless/all_target_tless.json", + BOP_DS_DIR / "tless", + ) + os.symlink(BOP_DS_DIR / "tless/models_eval", BOP_DS_DIR / "tless/models") + elif args.bop_extra_files == "ycbv": # Friendly names used with YCB-Video - download(f'cosypose/bop_datasets/ycbv/ycbv_friendly_names.txt', BOP_DS_DIR / 'ycbv') + download( + "cosypose/bop_datasets/ycbv/ycbv_friendly_names.txt", + BOP_DS_DIR / "ycbv", + ) # Offsets between YCB-Video and BOP (extracted from BOP readme) - download(f'cosypose/bop_datasets/ycbv/offsets.txt', BOP_DS_DIR / 'ycbv') + download("cosypose/bop_datasets/ycbv/offsets.txt", BOP_DS_DIR / "ycbv") # Evaluation models for YCB-Video (used by other works) - download(f'cosypose/bop_datasets/ycbv/models_original', BOP_DS_DIR / 'ycbv') + download("cosypose/bop_datasets/ycbv/models_original", BOP_DS_DIR / "ycbv") # Keyframe definition - download(f'cosypose/bop_datasets/ycbv/keyframe.txt', BOP_DS_DIR / 'ycbv') + download("cosypose/bop_datasets/ycbv/keyframe.txt", BOP_DS_DIR / "ycbv") if args.urdf_models: - download(f'cosypose/urdfs/{args.urdf_models}', LOCAL_DATA_DIR / 'urdfs') + download(f"cosypose/urdfs/{args.urdf_models}", LOCAL_DATA_DIR / "urdfs") if args.ycbv_compat_models: - download(f'cosypose/bop_datasets/ycbv/models_bop-compat', BOP_DS_DIR / 'ycbv') - download(f'cosypose/bop_datasets/ycbv/models_bop-compat_eval', BOP_DS_DIR / 'ycbv') + download("cosypose/bop_datasets/ycbv/models_bop-compat", BOP_DS_DIR / "ycbv") + download( + "cosypose/bop_datasets/ycbv/models_bop-compat_eval", BOP_DS_DIR / "ycbv", + ) if args.cosypose_models: - download(f'cosypose/experiments/{args.cosypose_models}', LOCAL_DATA_DIR / 'experiments') - + download( + f"cosypose/experiments/{args.cosypose_models}", + LOCAL_DATA_DIR / "experiments", + ) + if args.megapose_models: # rclone copyto inria_data:megapose-models/ megapose-models/ # --exclude="**epoch**" --config $MEGAPOSE_DIR/rclone.conf -P download( - f"megapose/megapose-models/", + "megapose/megapose-models/", LOCAL_DATA_DIR / "megapose-models/", flags=["--exclude", "*epoch*"], ) if args.detections: - download(f'cosypose/saved_detections/{args.detections}.pkl', LOCAL_DATA_DIR / 'saved_detections') + download( + f"cosypose/saved_detections/{args.detections}.pkl", + LOCAL_DATA_DIR / "saved_detections", + ) if args.result_id: - download(f'cosypose/results/{args.result_id}', LOCAL_DATA_DIR / 'results') + download(f"cosypose/results/{args.result_id}", LOCAL_DATA_DIR / "results") if args.bop_result_id: - csv_name = args.bop_result_id + '.csv' - download(f'cosypose/bop_predictions/{csv_name}', LOCAL_DATA_DIR / 'bop_predictions') - download(f'cosypose/bop_eval_outputs/{args.bop_result_id}', LOCAL_DATA_DIR / 'bop_predictions') + csv_name = args.bop_result_id + ".csv" + download( + f"cosypose/bop_predictions/{csv_name}", LOCAL_DATA_DIR / "bop_predictions", + ) + download( + f"cosypose/bop_eval_outputs/{args.bop_result_id}", + LOCAL_DATA_DIR / "bop_predictions", + ) if args.texture_dataset: - download('cosypose/zip_files/textures.zip', DOWNLOAD_DIR) - logger.info('Extracting textures ...') - zipfile.ZipFile(DOWNLOAD_DIR / 'textures.zip').extractall(LOCAL_DATA_DIR / 'texture_datasets') + download("cosypose/zip_files/textures.zip", DOWNLOAD_DIR) + logger.info("Extracting textures ...") + zipfile.ZipFile(DOWNLOAD_DIR / "textures.zip").extractall( + LOCAL_DATA_DIR / "texture_datasets", + ) if args.synt_dataset: - zip_name = f'{args.synt_dataset}.zip' - download(f'cosypose/zip_files/{zip_name}', DOWNLOAD_DIR) - logger.info('Extracting textures ...') - zipfile.ZipFile(DOWNLOAD_DIR / zip_name).extractall(LOCAL_DATA_DIR / 'synt_datasets') + zip_name = f"{args.synt_dataset}.zip" + download(f"cosypose/zip_files/{zip_name}", DOWNLOAD_DIR) + logger.info("Extracting textures ...") + zipfile.ZipFile(DOWNLOAD_DIR / zip_name).extractall( + LOCAL_DATA_DIR / "synt_datasets", + ) if args.example_scenario: - download(f'cosypose/custom_scenarios/example/candidates.csv', LOCAL_DATA_DIR / 'custom_scenarios/example') - download(f'cosypose/custom_scenarios/example/scene_camera.json', LOCAL_DATA_DIR / 'custom_scenarios/example') + download( + "cosypose/custom_scenarios/example/candidates.csv", + LOCAL_DATA_DIR / "custom_scenarios/example", + ) + download( + "cosypose/custom_scenarios/example/scene_camera.json", + LOCAL_DATA_DIR / "custom_scenarios/example", + ) if args.all_bop20_models: from happypose.pose_estimators.cosypose.cosypose.bop_config import ( @@ -158,10 +182,19 @@ def main(): SYNT_REAL_DETECTORS, SYNT_REAL_REFINER, ) - for model_dict in (PBR_DETECTORS, PBR_COARSE, PBR_REFINER, - SYNT_REAL_DETECTORS, SYNT_REAL_COARSE, SYNT_REAL_REFINER): + + for model_dict in ( + PBR_DETECTORS, + PBR_COARSE, + PBR_REFINER, + SYNT_REAL_DETECTORS, + SYNT_REAL_COARSE, + SYNT_REAL_REFINER, + ): for model in model_dict.values(): - download(f'cosypose/experiments/{model}', LOCAL_DATA_DIR / 'experiments') + download( + f"cosypose/experiments/{model}", LOCAL_DATA_DIR / "experiments", + ) if args.all_bop20_results: from happypose.pose_estimators.cosypose.cosypose.bop_config import ( @@ -171,14 +204,20 @@ def main(): SYNT_REAL_ICP_INFERENCE_ID, SYNT_REAL_INFERENCE_ID, ) - for result_id in (PBR_INFERENCE_ID, SYNT_REAL_INFERENCE_ID, SYNT_REAL_ICP_INFERENCE_ID, - SYNT_REAL_4VIEWS_INFERENCE_ID, SYNT_REAL_8VIEWS_INFERENCE_ID): - download(f'cosypose/results/{result_id}', LOCAL_DATA_DIR / 'results') + + for result_id in ( + PBR_INFERENCE_ID, + SYNT_REAL_INFERENCE_ID, + SYNT_REAL_ICP_INFERENCE_ID, + SYNT_REAL_4VIEWS_INFERENCE_ID, + SYNT_REAL_8VIEWS_INFERENCE_ID, + ): + download(f"cosypose/results/{result_id}", LOCAL_DATA_DIR / "results") def run_rclone(cmd, args, flags): - rclone_cmd = ['rclone', cmd] + args + flags + ['--config', str(RCLONE_CFG_PATH)] - logger.debug(' '.join(rclone_cmd)) + rclone_cmd = ["rclone", cmd, *args, *flags] + ["--config", str(RCLONE_CFG_PATH)] + logger.debug(" ".join(rclone_cmd)) print(rclone_cmd) subprocess.run(rclone_cmd) @@ -187,41 +226,43 @@ def download(download_path, local_path, flags=[]): download_path = Path(download_path) if download_path.name != local_path.name: local_path = local_path / download_path.name - if '.' in str(download_path): + if "." in str(download_path): rclone_path = RCLONE_ROOT + str(download_path) else: rclone_path = RCLONE_ROOT + str(download_path) + "/" local_path = str(local_path) logger.info(f"Copying {rclone_path} to {local_path}") - run_rclone("copyto", [rclone_path, local_path], flags=flags + ["-P"]) + run_rclone("copyto", [rclone_path, local_path], flags=[*flags, "-P"]) def download_bop_original(ds_name, download_pbr): - filename = f'{ds_name}_base.zip' + filename = f"{ds_name}_base.zip" wget_download_and_extract(BOP_SRC + filename, BOP_DS_DIR) - suffixes = ['models'] + BOP_DATASETS[ds_name]['splits'] + suffixes = ["models"] + BOP_DATASETS[ds_name]["splits"] if download_pbr: - suffixes += ['train_pbr'] + suffixes += ["train_pbr"] for suffix in suffixes: - wget_download_and_extract(BOP_SRC + f'{ds_name}_{suffix}.zip', BOP_DS_DIR / ds_name) + wget_download_and_extract( + BOP_SRC + f"{ds_name}_{suffix}.zip", BOP_DS_DIR / ds_name, + ) def download_bop_gdrive(ds_name): - download(f'bop_datasets/{ds_name}', BOP_DS_DIR / ds_name) + download(f"bop_datasets/{ds_name}", BOP_DS_DIR / ds_name) def wget_download_and_extract(url, out): - tmp_path = DOWNLOAD_DIR / url.split('/')[-1] + tmp_path = DOWNLOAD_DIR / url.split("/")[-1] if tmp_path.exists(): - logger.info(f'{url} already downloaded: {tmp_path}...') + logger.info(f"{url} already downloaded: {tmp_path}...") else: - logger.info(f'Download {url} at {tmp_path}...') + logger.info(f"Download {url} at {tmp_path}...") wget.download(url, out=tmp_path.as_posix()) - logger.info(f'Extracting {tmp_path} at {out}.') + logger.info(f"Extracting {tmp_path} at {out}.") zipfile.ZipFile(tmp_path).extractall(out) -if __name__ == '__main__': +if __name__ == "__main__": loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] main() diff --git a/happypose/toolbox/utils/load_model.py b/happypose/toolbox/utils/load_model.py index 7a39dc88..ce11c6f8 100644 --- a/happypose/toolbox/utils/load_model.py +++ b/happypose/toolbox/utils/load_model.py @@ -1,8 +1,12 @@ # MegaPose from happypose.pose_estimators.megapose.src.megapose.config import LOCAL_DATA_DIR +from happypose.pose_estimators.megapose.src.megapose.inference.icp_refiner import ( + ICPRefiner, +) +from happypose.pose_estimators.megapose.src.megapose.inference.pose_estimator import ( + PoseEstimator, +) from happypose.toolbox.datasets.object_dataset import RigidObjectDataset -from happypose.pose_estimators.megapose.src.megapose.inference.icp_refiner import ICPRefiner -from happypose.pose_estimators.megapose.src.megapose.inference.pose_estimator import PoseEstimator from happypose.toolbox.inference.utils import load_pose_models NAMED_MODELS = { @@ -53,7 +57,6 @@ def load_named_model( n_workers: int = 4, bsz_images: int = 128, ) -> PoseEstimator: - model = NAMED_MODELS[model_name] renderer_kwargs = { diff --git a/happypose/toolbox/utils/logging.py b/happypose/toolbox/utils/logging.py index 069ad638..5a54888f 100644 --- a/happypose/toolbox/utils/logging.py +++ b/happypose/toolbox/utils/logging.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,14 +14,10 @@ """ - # Standard Library -import contextlib import logging import time from datetime import timedelta -from io import StringIO -from typing import Optional class ElapsedFormatter: @@ -32,7 +27,7 @@ def __init__(self): def format(self, record): elapsed_seconds = record.created - self.start_time elapsed = timedelta(seconds=elapsed_seconds) - return "{} - {}".format(elapsed, record.getMessage()) + return f"{elapsed} - {record.getMessage()}" def get_logger(name: str): diff --git a/happypose/toolbox/utils/logs_bokeh.py b/happypose/toolbox/utils/logs_bokeh.py index 959e1ec5..9bcbdc65 100644 --- a/happypose/toolbox/utils/logs_bokeh.py +++ b/happypose/toolbox/utils/logs_bokeh.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import datetime import textwrap @@ -41,10 +39,7 @@ class Plotter: - def __init__( - self, - log_dir: Path - ): + def __init__(self, log_dir: Path): self.fill_config_fn = check_update_config_pose self.log_dir = Path(log_dir) assert self.log_dir.exists() @@ -77,16 +72,19 @@ def load_logs(self, run_ids): log_path = run_dir / "log.txt" if log_path.exists(): log_df = pd.read_json(run_dir / "log.txt", lines=True) - last_write = datetime.datetime.fromtimestamp((run_dir / "log.txt").stat().st_mtime) + last_write = datetime.datetime.fromtimestamp( + (run_dir / "log.txt").stat().st_mtime, + ) else: log_df = None last_write = datetime.datetime.now() configs[run_id]["delta_t"] = ( - f"{(datetime.datetime.now() - last_write).seconds / 60:.1f}" + f"({len(log_df)})" + f"{(datetime.datetime.now() - last_write).seconds / 60:.1f}" + + f"({len(log_df)})" ) log_dicts[run_id] = log_df - ds_eval = dict() + ds_eval = {} for f in run_dir.iterdir(): if "errors_" in f.name: ds = f.with_suffix("").name.split("errors_")[1] @@ -220,7 +218,9 @@ def plot_eval_field( name = f"{run_num}/{dataset}" name = "\n ".join(textwrap.wrap(name, width=20)) if len(x) == 1: - f.circle(x, y, color=color, line_dash=dash_pattern, name=name) + f.circle( + x, y, color=color, line_dash=dash_pattern, name=name, + ) x = np.concatenate(([0], x)) y = np.concatenate((y, y)) f.line( @@ -333,12 +333,13 @@ def show_configs(self, ignore=None, diff=True): config_df = df.copy() self.config_df = config_df - name2color = {k: v for k, v in zip(self.run_ids, self.colors_uint8)} + name2color = dict(zip(self.run_ids, self.colors_uint8)) def f_row(data): rgb = (np.array(name2color[data.name]) * 255).astype(np.uint8) return [ - f"background-color: rgba({rgb[0]},{rgb[1]},{rgb[2]},1.0)" for _ in range(len(data)) + f"background-color: rgba({rgb[0]},{rgb[1]},{rgb[2]},1.0)" + for _ in range(len(data)) ] if "possible_roots" in df.columns: diff --git a/happypose/toolbox/utils/models_compat.py b/happypose/toolbox/utils/models_compat.py index b0c39b91..aa9207b8 100644 --- a/happypose/toolbox/utils/models_compat.py +++ b/happypose/toolbox/utils/models_compat.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,9 +13,10 @@ limitations under the License. """ + def change_keys_of_older_models(state_dict): - new_state_dict = dict() - for k, v in state_dict.items(): + new_state_dict = {} + for k, _v in state_dict.items(): if k.startswith("backbone.backbone"): new_k = "backbone." + k[len("backbone.backbone.") :] elif k.startswith("backbone.head.0."): diff --git a/happypose/toolbox/utils/omegaconf.py b/happypose/toolbox/utils/omegaconf.py index 016d8bfe..e1f75266 100644 --- a/happypose/toolbox/utils/omegaconf.py +++ b/happypose/toolbox/utils/omegaconf.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party from omegaconf import OmegaConf diff --git a/happypose/toolbox/utils/random.py b/happypose/toolbox/utils/random.py index 831ddab1..6424c323 100644 --- a/happypose/toolbox/utils/random.py +++ b/happypose/toolbox/utils/random.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,22 +14,22 @@ """ - # Standard Library import contextlib import os -import time import random -import webdataset as wds +import time -# Third Party -import torch import numpy as np import pinocchio as pin +# Third Party +import torch +import webdataset as wds + def make_seed(*args): - """Copied from webdataset""" + """Copied from webdataset.""" seed = 0 for arg in args: seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF diff --git a/happypose/toolbox/utils/resources.py b/happypose/toolbox/utils/resources.py index 17d6c7a5..9fa1526a 100644 --- a/happypose/toolbox/utils/resources.py +++ b/happypose/toolbox/utils/resources.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import os import subprocess @@ -52,12 +50,13 @@ def get_cuda_memory(): def get_gpu_memory(): - devices = os.environ.get( "CUDA_VISIBLE_DEVICES", ).split(",") assert len(devices) == 1 - out = subprocess.check_output(["nvidia-smi", "--id=" + str(devices[0]), "-q", "--xml-format"]) + out = subprocess.check_output( + ["nvidia-smi", "--id=" + str(devices[0]), "-q", "--xml-format"], + ) tree = ET.fromstring(out) gpu = tree.findall("gpu")[0] memory = float(gpu.find("fb_memory_usage").find("used").text.split(" ")[0]) / 1024 diff --git a/happypose/toolbox/utils/tensor_collection.py b/happypose/toolbox/utils/tensor_collection.py index e019a3c1..63874097 100644 --- a/happypose/toolbox/utils/tensor_collection.py +++ b/happypose/toolbox/utils/tensor_collection.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library from pathlib import Path @@ -32,11 +30,13 @@ def concatenate(datas): if len(datas) == 0: return PandasTensorCollection(infos=pd.DataFrame()) classes = [data.__class__ for data in datas] - assert all([class_n == classes[0] for class_n in classes]) + assert all(class_n == classes[0] for class_n in classes) - infos = pd.concat([data.infos for data in datas], axis=0, sort=False).reset_index(drop=True) + infos = pd.concat([data.infos for data in datas], axis=0, sort=False).reset_index( + drop=True, + ) tensor_keys = datas[0].tensors.keys() - tensors = dict() + tensors = {} for k in tensor_keys: tensors[k] = torch.cat([getattr(data, k) for data in datas], dim=0) return PandasTensorCollection(infos=infos, **tensors) @@ -44,7 +44,7 @@ def concatenate(datas): class TensorCollection: def __init__(self, **kwargs): - self.__dict__["_tensors"] = dict() + self.__dict__["_tensors"] = {} for k, v in kwargs.items(): self.register_tensor(k, v) @@ -62,8 +62,8 @@ def __repr__(self): return s def __getitem__(self, ids): - tensors = dict() - for k, v in self._tensors.items(): + tensors = {} + for k, _v in self._tensors.items(): tensors[k] = getattr(self, k)[ids] return TensorCollection(**tensors) @@ -92,7 +92,8 @@ def __setstate__(self, state): def __setattr__(self, name, value): if "_tensors" not in self.__dict__: - raise ValueError("Please call __init__") + msg = "Please call __init__" + raise ValueError(msg) if name in self._tensors: self._tensors[name] = value else: @@ -119,8 +120,8 @@ def half(self): return self.to(torch.half) def clone(self): - tensors = dict() - for k, v in self.tensors.items(): + tensors = {} + for k, _v in self.tensors.items(): tensors[k] = getattr(self, k).clone() return TensorCollection(**tensors) @@ -129,7 +130,7 @@ class PandasTensorCollection(TensorCollection): def __init__(self, infos, **tensors): super().__init__(**tensors) self.infos = infos.reset_index(drop=True) - self.meta = dict() + self.meta = {} def register_buffer(self, k, v): assert len(v) == len(self) diff --git a/happypose/toolbox/utils/timer.py b/happypose/toolbox/utils/timer.py index b9845092..cf646d15 100644 --- a/happypose/toolbox/utils/timer.py +++ b/happypose/toolbox/utils/timer.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Standard Library import datetime diff --git a/happypose/toolbox/utils/transform_utils.py b/happypose/toolbox/utils/transform_utils.py index c1741716..605483b9 100644 --- a/happypose/toolbox/utils/transform_utils.py +++ b/happypose/toolbox/utils/transform_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,18 +19,17 @@ import torch # MegaPose -import happypose.pose_estimators.megapose.src.megapose as megapose from happypose.pose_estimators.megapose.src.megapose.config import PROJECT_DIR def load_SO3_grid(resolution): - """ - The data.qua files were generated with the following code - http://lavalle.pl/software/so3/so3.html + """The data.qua files were generated with the following code + http://lavalle.pl/software/so3/so3.html. They are in (x,y,z,w) ordering - Returns: + Returns + ------- rotmats: [N,3,3] """ data_fname = PROJECT_DIR / f"src/megapose/data/data_{resolution}.qua" @@ -42,7 +40,7 @@ def load_SO3_grid(resolution): with open(data_fname) as fp: lines = fp.readlines() for line in lines: - x, y, z, w = [float(i) for i in line.split()] + x, y, z, w = (float(i) for i in line.split()) quats.append([x, y, z, w]) quats = torch.tensor(quats) @@ -51,14 +49,11 @@ def load_SO3_grid(resolution): def compute_geodesic_distance(query, target): - """ - - Computes distance, in radians from query to target + """Computes distance, in radians from query to target Args: query: [N,3,3] - target: [M,3,3] + target: [M,3,3]. """ - N = query.shape[0] M = target.shape[0] query_exp = query.unsqueeze(1).expand([-1, M, -1, -1]).flatten(0, 1) diff --git a/happypose/toolbox/utils/types.py b/happypose/toolbox/utils/types.py index a4c083c8..dade98bb 100644 --- a/happypose/toolbox/utils/types.py +++ b/happypose/toolbox/utils/types.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/happypose/toolbox/utils/webdataset.py b/happypose/toolbox/utils/webdataset.py index 6ea4ef86..908dd00d 100644 --- a/happypose/toolbox/utils/webdataset.py +++ b/happypose/toolbox/utils/webdataset.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,9 +50,11 @@ def group_by_keys(data, keys=base_plus_ext, lcase=True, suffixes=None, handler=N if current_sample is None or prefix != current_sample["__key__"]: if valid_sample(current_sample): yield current_sample - current_sample = dict(__key__=prefix, __url__=filesample["__url__"]) + current_sample = {"__key__": prefix, "__url__": filesample["__url__"]} if suffix in current_sample: - print(f"{fname}: duplicate file name in tar file {suffix} {current_sample.keys()}") + print( + f"{fname}: duplicate file name in tar file {suffix} {current_sample.keys()}", + ) current_sample["__bad__"] = True if suffixes is None or suffix in suffixes: current_sample[suffix] = value diff --git a/happypose/toolbox/utils/xarray.py b/happypose/toolbox/utils/xarray.py index 2bbb3c20..8bcb3a24 100644 --- a/happypose/toolbox/utils/xarray.py +++ b/happypose/toolbox/utils/xarray.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +14,6 @@ """ - # Third Party import numpy as np diff --git a/happypose/toolbox/visualization/__init__.py b/happypose/toolbox/visualization/__init__.py index 73a7b275..09aba5e2 100644 --- a/happypose/toolbox/visualization/__init__.py +++ b/happypose/toolbox/visualization/__init__.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,4 +12,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - diff --git a/happypose/toolbox/visualization/bokeh_plotter.py b/happypose/toolbox/visualization/bokeh_plotter.py index 176e2b76..17a22923 100644 --- a/happypose/toolbox/visualization/bokeh_plotter.py +++ b/happypose/toolbox/visualization/bokeh_plotter.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,7 +46,7 @@ def __init__( Contains an internal state `source_map` holding pointers to image data. This can be useful for updating images in real-time without re-creating figures. """ - self.source_map: Dict[str, bokeh.models.sources.ColumnDataSource] = dict() + self.source_map: Dict[str, bokeh.models.sources.ColumnDataSource] = {} self.dump_image_dir = dump_image_dir self.read_image_dir = read_image_dir if is_notebook: @@ -61,7 +60,9 @@ def hex_colors(self) -> Iterator[str]: def colors(self) -> Iterator[Tuple[float, float, float]]: return cycle(sns.color_palette(n_colors=40)) - def get_source(self, name: str) -> Tuple[bokeh.models.sources.ColumnDataSource, bool]: + def get_source( + self, name: str, + ) -> Tuple[bokeh.models.sources.ColumnDataSource, bool]: if name in self.source_map: source = self.source_map[name] new = False @@ -77,7 +78,6 @@ def plot_image( figure: Optional[bokeh.plotting.figure] = None, name: str = "image", ) -> bokeh.plotting.figure: - im_np = image_to_np_uint8(im) h, w, _ = im_np.shape @@ -89,18 +89,20 @@ def plot_image( if self.dump_image_dir is not None: if new: - figure.image_url("url", x=0, y=0, w=w, h=h, source=source, anchor="bottom_left") + figure.image_url( + "url", x=0, y=0, w=w, h=h, source=source, anchor="bottom_left", + ) im_np.flags.writeable = False im_hash = sha1(im_np.copy().data).hexdigest() im_path = str(self.dump_image_dir / f"{im_hash}.jpg") Image.fromarray(im_np).save(im_path) im_url = str(self.read_image_dir) + str(Path(im_path).name) print(im_url) - source.data = dict(url=[im_url]) + source.data = {"url": [im_url]} else: if new: figure.image_rgba("image", x=0, y=0, dw=w, dh=h, source=source) - source.data = dict(image=[to_rgba(im_np)]) + source.data = {"image": [to_rgba(im_np)]} return figure def plot_overlay( @@ -115,6 +117,7 @@ def plot_overlay( All images are np.uint8 with values in (0, 255) Args: + ---- rgb_input: (h, w, 3) rgb_rendered: (h, w, 3) with values <15 px as background. figure: Optional figure in which the data should be plotted. @@ -140,11 +143,13 @@ def plot_detections( line_width: int = 2, source_id: str = "", ) -> bokeh.plotting.figure: - boxes = detections.bboxes.cpu().numpy() if text_auto: if "score" in detections.infos.columns: - text = [f"{row.label} {row.score:.2f}" for _, row in detections.infos.iterrows()] + text = [ + f"{row.label} {row.score:.2f}" + for _, row in detections.infos.iterrows() + ] else: text = [f"{row.label}" for _, row in detections.infos.iterrows()] @@ -196,7 +201,7 @@ def plot_detections( text_font_size=text_font_size, ) f.add_layout(labelset) - data = dict(xs=xs, ys=ys, colors=patch_colors) + data = {"xs": xs, "ys": ys, "colors": patch_colors} if text is not None: data.update(text_x=text_x, text_y=text_y, text=text) source.data = data diff --git a/happypose/toolbox/visualization/bokeh_utils.py b/happypose/toolbox/visualization/bokeh_utils.py index 7eb300c6..6be1a781 100644 --- a/happypose/toolbox/visualization/bokeh_utils.py +++ b/happypose/toolbox/visualization/bokeh_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,7 +48,7 @@ def save_image_figure(f: bokeh.plotting.figure, im_path: Path) -> PIL.Image: def to_rgba(im: np.ndarray) -> np.ndarray: """Converts (h, w, 3) to (h, w, 4) data for bokeh. im must have values in (0, 255) - NOTE: Maybe this could be simplified only using Pillow ? + NOTE: Maybe this could be simplified only using Pillow ?. """ out_im = np.empty((im.shape[0], im.shape[1]), dtype=np.uint32) view = out_im.view(dtype=np.uint8).reshape((im.shape[0], im.shape[1], 4)) @@ -76,7 +75,7 @@ def plot_image( h, w = im.shape[:2] else: h, w = im_size - source = bokeh.models.sources.ColumnDataSource(dict(rgba=[im])) + source = bokeh.models.sources.ColumnDataSource({"rgba": [im]}) f = image_figure("rgba", source, im_size=(h, w), gray=gray, figure=figure) return f, source diff --git a/happypose/toolbox/visualization/meshcat_utils.py b/happypose/toolbox/visualization/meshcat_utils.py index 07e3ff06..5391c094 100644 --- a/happypose/toolbox/visualization/meshcat_utils.py +++ b/happypose/toolbox/visualization/meshcat_utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,14 +14,12 @@ """ - # Third Party import meshcat import meshcat.geometry as g import meshcat.transformations as mtf import numpy as np import trimesh -import trimesh.transformations as tra """ Some code borrowed from https://github.com/google-research/ravens @@ -35,7 +32,9 @@ def isRotationMatrix(M, tol=1e-4): tag = False I = np.identity(M.shape[0]) - if (np.linalg.norm((np.matmul(M, M.T) - I)) < tol) and (np.abs(np.linalg.det(M) - 1) < tol): + if (np.linalg.norm(np.matmul(M, M.T) - I) < tol) and ( + np.abs(np.linalg.det(M) - 1) < tol + ): tag = True if tag is False: @@ -52,7 +51,6 @@ def trimesh_to_meshcat_geometry(mesh, use_vertex_colors=False): Args: mesh: trimesh.TriMesh object """ - if use_vertex_colors: visual = mesh.visual if isinstance(visual, trimesh.visual.TextureVisuals): @@ -62,17 +60,19 @@ def trimesh_to_meshcat_geometry(mesh, use_vertex_colors=False): vertex_colors = vertex_colors / 255.0 else: vertex_colors = None - return meshcat.geometry.TriangularMeshGeometry(mesh.vertices, mesh.faces, vertex_colors) + return meshcat.geometry.TriangularMeshGeometry( + mesh.vertices, mesh.faces, vertex_colors, + ) def rgb2hex(rgb): - """ - Converts rgb color to hex + """Converts rgb color to hex. Args: + ---- rgb: color in rgb, e.g. (255,0,0) """ - return "0x%02x%02x%02x" % (rgb) + return "0x{:02x}{:02x}{:02x}".format(*rgb) def visualize_mesh(vis, mesh, transform=None, color=None, texture_png=None): @@ -93,7 +93,7 @@ def visualize_mesh(vis, mesh, transform=None, color=None, texture_png=None): if texture_png is not None: material = g.MeshLambertMaterial( - map=g.ImageTexture(image=g.PngImage.from_file(texture_png)) + map=g.ImageTexture(image=g.PngImage.from_file(texture_png)), ) print("material") @@ -103,9 +103,7 @@ def visualize_mesh(vis, mesh, transform=None, color=None, texture_png=None): def visualize_scene(vis, object_dict, randomize_color=True): - for name, data in object_dict.items(): - # try assigning a random color if randomize_color: if "color" in data: @@ -135,7 +133,7 @@ def visualize_scene(vis, object_dict, randomize_color=True): def create_visualizer(clear=True, zmq_url="tcp://127.0.0.1:6000"): print( "Waiting for meshcat server... have you started a server? Run `meshcat-server` to start a" - f" server. Communicating on zmq_url={zmq_url}" + f" server. Communicating on zmq_url={zmq_url}", ) vis = meshcat.Visualizer(zmq_url=zmq_url) if clear: @@ -146,7 +144,14 @@ def create_visualizer(clear=True, zmq_url="tcp://127.0.0.1:6000"): def make_frame( - vis, name, h=0.15, radius=0.001, o=1.0, T=None, transform=None, ignore_invalid_transform=False + vis, + name, + h=0.15, + radius=0.001, + o=1.0, + T=None, + transform=None, + ignore_invalid_transform=False, ): """Add a red-green-blue triad to the Meschat visualizer. @@ -186,18 +191,24 @@ def make_frame( transform = T if transform is not None: - if not ignore_invalid_transform: is_valid = isRotationMatrix(transform[:3, :3]) if not is_valid: - raise ValueError("meshcat_utils:attempted to visualize invalid transform T") + msg = "meshcat_utils:attempted to visualize invalid transform T" + raise ValueError( + msg, + ) vis[name].set_transform(transform) -def draw_grasp(vis, line_name, transform, h=0.15, radius=0.001, o=1.0, color=[255, 0, 0]): +def draw_grasp( + vis, line_name, transform, h=0.15, radius=0.001, o=1.0, color=[255, 0, 0], +): """Draws line to the Meshcat visualizer. + Args: + ---- vis (Meshcat Visualizer): the visualizer line_name (string): name for the line associated with the grasp. transform (numpy array): 4x4 specifying transformation of grasps. @@ -215,13 +226,13 @@ def draw_grasp(vis, line_name, transform, h=0.15, radius=0.001, o=1.0, color=[25 def visualize_pointcloud(vis, name, pc, color=None, transform=None, **kwargs): - """ - Args: + """Args: + ---- vis: meshcat visualizer object name: str pc: Nx3 or HxWx3 color: (optional) same shape as pc[0 - 255] scale or just rgb tuple - transform: (optional) 4x4 homogeneous transform + transform: (optional) 4x4 homogeneous transform. """ if pc.ndim == 3: pc = pc.reshape(-1, pc.shape[-1]) @@ -241,7 +252,9 @@ def visualize_pointcloud(vis, name, pc, color=None, transform=None, **kwargs): else: color = np.ones_like(pc) - vis[name].set_object(meshcat.geometry.PointCloud(position=pc.T, color=color.T, **kwargs)) + vis[name].set_object( + meshcat.geometry.PointCloud(position=pc.T, color=color.T, **kwargs), + ) if transform is not None: vis[name].set_transform(transform) @@ -251,6 +264,7 @@ def visualize_bbox(vis, name, dims, transform=None, T=None): """Visualize a bounding box using a wireframe. Args: + ---- vis (MeshCat Visualizer): the visualizer name (string): name for this frame (should be unique) dims (array-like): shape (3,), dimensions of the bounding box @@ -276,17 +290,18 @@ def visualize_transform_manager(vis, tm, frame, **kwargs): def get_pointcloud(depth, intrinsics, flatten=False, remove_zero_depth_points=True): - """Projects depth image to pointcloud + """Projects depth image to pointcloud. Args: + ---- depth: HxW float array of perspective depth in meters. intrinsics: 3x3 float array of camera intrinsics matrix. flatten: whether to flatten pointcloud Returns: + ------- points: HxWx3 float array of 3D points in camera coordinates. """ - height, width = depth.shape xlin = np.linspace(0, width - 1, width) ylin = np.linspace(0, height - 1, height) diff --git a/happypose/toolbox/visualization/meshcat_visualizer.py b/happypose/toolbox/visualization/meshcat_visualizer.py index c6d446d0..a8bbdda3 100644 --- a/happypose/toolbox/visualization/meshcat_visualizer.py +++ b/happypose/toolbox/visualization/meshcat_visualizer.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,13 +14,10 @@ """ - # Standard Library -import io from pathlib import Path # Third Party -import meshcat import numpy as np import trimesh from meshcat.geometry import ( @@ -35,12 +31,13 @@ from happypose.toolbox.datasets.datasets_cfg import make_object_dataset # Local Folder -from .meshcat_utils import create_visualizer, trimesh_to_meshcat_geometry +from .meshcat_utils import create_visualizer class MeshcatSceneViewer: - def __init__(self, obj_ds_name, use_textures=True, zmq_url="tcp://127.0.0.1:6000", clear=True): - + def __init__( + self, obj_ds_name, use_textures=True, zmq_url="tcp://127.0.0.1:6000", clear=True, + ): self.obj_ds = make_object_dataset(obj_ds_name) self.label_to_object = {} self.visualizer = create_visualizer(zmq_url=zmq_url, clear=clear) @@ -67,11 +64,14 @@ def get_meshcat_object(self, label): # Needed to deal with the fact that some objects might # be saved as trimesh.Scene instead of trimesh.Trimesh if hasattr(mesh, "visual"): - if isinstance(mesh.visual, trimesh.visual.TextureVisuals) and self.use_textures: + if ( + isinstance(mesh.visual, trimesh.visual.TextureVisuals) + and self.use_textures + ): texture_path = f"/dev/shm/{label}_texture.png" mesh.visual.material.image.save(texture_path) material = MeshLambertMaterial( - map=ImageTexture(image=PngImage.from_file(texture_path)) + map=ImageTexture(image=PngImage.from_file(texture_path)), ) self.label_to_object[label] = (geometry, material) return self.label_to_object[label] diff --git a/happypose/toolbox/visualization/utils.py b/happypose/toolbox/visualization/utils.py index 35a26f36..d93aa0e9 100644 --- a/happypose/toolbox/visualization/utils.py +++ b/happypose/toolbox/visualization/utils.py @@ -1,5 +1,4 @@ -""" -Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. +"""Copyright (c) 2022 Inria & NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,7 +25,7 @@ def image_to_np_uint8(im: Union[torch.Tensor, np.ndarray]) -> np.ndarray: - """Returns a np.uint8 image""" + """Returns a np.uint8 image.""" if isinstance(im, torch.Tensor): im_np = im.detach().cpu().numpy() else: @@ -59,7 +58,6 @@ def make_contour_overlay( color: Optional[Tuple[int, int, int]] = None, dilate_iterations: int = 1, ) -> Dict[str, Any]: - if color is None: color = (0, 255, 0) @@ -120,7 +118,8 @@ def get_ds_info(ds_name): urdf_ds_name = None # not sure if this exists obj_ds_name = "custom.panda3d" else: - raise ValueError("Unknown dataset") + msg = "Unknown dataset" + raise ValueError(msg) return urdf_ds_name, obj_ds_name @@ -134,9 +133,9 @@ def draw_bounding_box( """Draw a bounding box onto a numpy array image. Args: + ---- bbox: [xmin, ymin, xmax, ymax] """ - if color is None: color = [255, 0, 0] diff --git a/pyproject.toml b/pyproject.toml index 492251af..be802ed3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,9 @@ version = "0.1.0" [tool.poetry.dependencies] addict = "^2.4.0" bokeh = "^3.1.1" -#bop-toolkit-lib = {path = "bop_toolkit"} +# bop-toolkit-lib = {path = "bop_toolkit"} imageio = "^2.31.1" +ipython = "^8.12.0" joblib = "^1.2.0" omegaconf = "^2.3.0" # cosypose = {path = "happypose/pose_estimators/cosypose"} TODO: pep517 @@ -36,6 +37,7 @@ opencv-contrib-python = "^4.7.0.72" opencv-python = "^4.7.0.72" panda3d = "=1.10.13" pin = "^2.6.17" +pybind11 = "^2.10.4" pybullet = "^3.2.5" pypng = "^0.20220715.0" python = ">=3.8,<3.11" @@ -45,6 +47,7 @@ scikit-image = "^0.21.0" scikit-learn = "^1.2.2" scipy = "^1.10.1" seaborn = "^0.12.2" +selenium = "^4.10.0" simplejson = "^3.19.1" torch = {source = "cpu", version = "<2"} torchvision = {source = "cpu", version = "<0.15.2"} @@ -53,9 +56,6 @@ transforms3d = "^0.4.1" trimesh = "^3.22.1" webdataset = "^0.2.48" wget = "^3.2" -selenium = "^4.10.0" -ipython = "^8.12.0" -pybind11 = "^2.10.4" [tool.poetry.group.dev] optional = true diff --git a/rclone.conf b/rclone.conf index b914a7bd..5fd5c5f7 100644 --- a/rclone.conf +++ b/rclone.conf @@ -1,4 +1,3 @@ [happypose] type = http url = https://www.paris.inria.fr/archive_ylabbeprojectsdata/ -