From 3b2c9bc63f8da7c3f1543441117bda8993692ac0 Mon Sep 17 00:00:00 2001 From: Samuel Felton Date: Mon, 27 Nov 2023 17:48:45 +0100 Subject: [PATCH] Model-based tracker with realsense camera --- modules/python/bindings/include/blob.hpp | 1 + modules/python/docs/todos.rst | 9 +- modules/python/examples/realsense_mbt.py | 129 ++++++++--------------- 3 files changed, 46 insertions(+), 93 deletions(-) diff --git a/modules/python/bindings/include/blob.hpp b/modules/python/bindings/include/blob.hpp index 091b976510..d11a3c4dbd 100644 --- a/modules/python/bindings/include/blob.hpp +++ b/modules/python/bindings/include/blob.hpp @@ -20,6 +20,7 @@ void bindings_vpDot2(py::class_ &pyDot2) }, R"doc( Wrapper for the defineDots method, see the C++ ViSP documentation. )doc", py::arg("dots"), py::arg("dotFile"), py::arg("I"), py::arg("color"), py::arg("trackDot") = true); + pyDot2.def_static("trackAndDisplay", [](std::vector &dots, vpImage &I, std::vector &cogs, diff --git a/modules/python/docs/todos.rst b/modules/python/docs/todos.rst index 7f29f8a017..8a579269af 100644 --- a/modules/python/docs/todos.rst +++ b/modules/python/docs/todos.rst @@ -12,7 +12,7 @@ Changes to ViSP Code generation ------------------- -* There is an issue with vpFeatureMomentDatabse::get and vpMomentDatabase::get, ignored for now +* There is an issue with vpFeatureMomentDatabse::get and vpMomentDatabase::get, ignored for now => a tester * n ary operators * Exclude get operators for vpArray2D ? * Parse subnamespaces @@ -67,13 +67,6 @@ To be written: -Packaging ------------------- - -* Root CMake - - * Build after doc if doc can be generated - Python side ----------------- diff --git a/modules/python/examples/realsense_mbt.py b/modules/python/examples/realsense_mbt.py index 53b6013aa3..7da5380ef6 100644 --- a/modules/python/examples/realsense_mbt.py +++ b/modules/python/examples/realsense_mbt.py @@ -1,7 +1,7 @@ import argparse from dataclasses import dataclass from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple import numpy as np import time import faulthandler @@ -9,7 +9,7 @@ from visp.core import XmlParserCamera, CameraParameters, ColVector, HomogeneousMatrix, Display, ImageConvert -from visp.core import ImageGray, ImageUInt16 +from visp.core import ImageGray, ImageUInt16, ImageRGBA from visp.io import ImageIo from visp.mbt import MbGenericTracker, MbTracker from visp.gui import DisplayOpenCV @@ -30,32 +30,19 @@ class MBTModelData: - def __init__(self, data_root: Path): - model_path = data_root / 'model' / 'teabox' + def __init__(self, data_root: Path, object_name: str): + model_path = data_root / 'model' / object_name assert model_path.exists() - self.config_color = model_path / 'teabox_color.xml' - self.config_depth = model_path / 'teabox_depth.xml' - self.cad_file = model_path / 'teabox.cao' - self.init_file = model_path / 'teabox.init' + self.config_color = model_path / f'{object_name}.xml' + self.config_depth = model_path / f'{object_name}_depth.xml' + self.cad_file = model_path / f'{object_name}.cao' + self.init_file = model_path / f'{object_name}.init' class MBTConfig: def __init__(self, data_root: Path): - data_path = data_root / 'data' / 'teabox' + data_path = data_root / 'data' assert data_path.exists() - self.color_camera_name = 'Camera_L' - self.depth_camera_name = 'Camera_R' - - self.color_intrinsics_file = data_path / f'{self.color_camera_name}.xml' - self.depth_intrinsics_file = data_path / f'{self.depth_camera_name}.xml' - - self.color_images_dir = data_path / 'color' - self.depth_images_dir = data_path / 'depth' - self.ground_truth_dir = data_path / 'ground-truth' - - - self.depth_intrinsics_file = data_path / f'{self.depth_camera_name}.xml' - self.extrinsic_file = str(data_path / 'depth_M_color.txt') # self.ground_truth = str(data_root / 'data' / 'depth_M_color.txt') @@ -64,76 +51,42 @@ class FrameData: I: ImageGray I_depth: Optional[ImageUInt16] point_cloud: Optional[np.ndarray] - cMo_ground_truth: HomogeneousMatrix -def read_data(exp_config: MBTConfig, cam_depth: CameraParameters | None, I: ImageGray): - pipe = rs.pipeline() - profile = pipe.start() - +def read_data(cam_depth: CameraParameters | None, I: ImageGray, pipe: rs.pipeline): use_depth = cam_depth is not None iteration = 1 + point_cloud_computer = rs.pointcloud() while True: frames = pipe.wait_for_frames() - print(frames) - I_depth_raw = np.asanyarray(frames.get_depth_frame().as_frame().get_data()) - - print(I_depth_raw.shape) - I = np.asanyarray(frames.get_color_frame().as_frame().get_data()) - print(I.shape) + I_np = np.asanyarray(frames.get_color_frame().as_frame().get_data()) + I_np = np.concatenate((I_np, np.ones_like(I_np[..., 0:1], dtype=np.uint8)), axis=-1) + I_rgba = ImageRGBA(I_np) + ImageConvert.convert(I_rgba, I, 0) + I_depth_raw = None point_cloud = None if use_depth: - t = time.time() - depth_filepath = exp_config.depth_images_dir / depth_format.format(iteration) - if not depth_filepath.exists(): - print(f'Could not find image {depth_filepath}') - return - I_depth_np = cv2.imread(str(depth_filepath), cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) - I_depth_np = I_depth_np[..., 0] - print(f'\tDepth load took {(time.time() - t) * 1000}ms') - I_depth_raw = ImageUInt16(I_depth_np * 32767.5) - if I_depth_np.size == 0: - print('Could not successfully read the depth image') - return - t = time.time() - # point_cloud = np.empty((*I_depth_np.shape, 3), dtype=np.float64) - Z = I_depth_np.copy() - Z[Z > 2] = 0.0 # Clamping values that are too high - - vs, us = np.meshgrid(range(I_depth_np.shape[0]), range(I_depth_np.shape[1]), indexing='ij') - xs, ys = PixelMeterConversion.convertPoints(cam_depth, us, vs) - point_cloud = np.stack((xs * Z, ys * Z, Z), axis=-1) - - print(f'\tPoint_cloud took {(time.time() - t) * 1000}ms') - - cMo_ground_truth = HomogeneousMatrix() - ground_truth_file = exp_config.ground_truth_dir / (exp_config.color_camera_name + '_{:04d}.txt'.format(iteration)) - cMo_ground_truth.load(str(ground_truth_file)) + depth_proc_start = time.time() + I_depth_raw = np.asanyarray(frames.get_depth_frame().as_frame().get_data()) + point_cloud = np.asanyarray(point_cloud_computer.calculate(frames.get_depth_frame()).get_vertices()).view((np.float32, 3)) + print(f'Depth processing took {(time.time() - depth_proc_start) * 1000}ms') iteration += 1 - end_parse_time = time.time() - print(f'Data parsing took: {(end_parse_time - start_parse_time) * 1000}ms') - yield FrameData(I, I_depth_raw, point_cloud, cMo_ground_truth) + yield FrameData(I, ImageUInt16(I_depth_raw), point_cloud) -def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters: - cam = CameraParameters() - xml_parser = XmlParserCamera() - if is_color: - camera_name, file_path = exp_config.color_camera_name, exp_config.color_intrinsics_file - else: - camera_name, file_path = exp_config.depth_camera_name, exp_config.depth_intrinsics_file - parse_res = xml_parser.parse(cam, str(file_path), camera_name, - CameraParameters.perspectiveProjWithoutDistortion, 0, 0, True) - assert parse_res == XmlParserCamera.SEQUENCE_OK # Check result - return cam +def cam_from_rs_profile(profile) -> Tuple[CameraParameters, int, int]: + intr = profile.as_video_stream_profile().get_intrinsics() # Downcast to video_stream_profile and fetch intrinsics + return CameraParameters(intr.fx, intr.fy, intr.ppx, intr.ppy), intr.height, intr.width if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data-root', type=str, required=True, - help='Path to the folder containing all the data for the synthetic MBT example') + help='Path to the folder containing all the data for the MBT example') + parser.add_argument('--object-name', type=str, required=True, + help='Name of the object to track') parser.add_argument('--display-ground-truth', action='store_true') parser.add_argument('--disable-klt', action='store_true') parser.add_argument('--disable-depth', action='store_true') @@ -144,11 +97,23 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters args = parser.parse_args() data_root = Path(args.data_root) - mbt_model = MBTModelData(data_root) - exp_config = MBTConfig(data_root) + + # Initialize realsense2 + pipe = rs.pipeline() + cfg = pipe.start() + + for i in range(10): + pipe.wait_for_frames() assert data_root.exists() and data_root.is_dir() + mbt_model = MBTModelData(data_root, args.object_name) + exp_config = MBTConfig(data_root) + + cam_color, color_height, color_width = cam_from_rs_profile(cfg.get_stream(rs.stream.color)) + cam_depth, depth_height, depth_width = cam_from_rs_profile(cfg.get_stream(rs.stream.depth)) + + rgb_tracker: int = MbGenericTracker.EDGE_TRACKER | (MbGenericTracker.KLT_TRACKER if not args.disable_klt else 0) tracker_types: List[int] = [rgb_tracker] if not args.disable_depth: @@ -164,8 +129,6 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters tracker.loadModel(str(mbt_model.cad_file)) # Camera intrinsics - cam_color = parse_camera_file(exp_config, True) - cam_depth = parse_camera_file(exp_config, False) if not args.disable_depth else None tracker.setCameraParameters(*((cam_color,) if args.disable_depth else (cam_color, cam_depth))) tracker.setDisplayFeatures(True) @@ -173,7 +136,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters print('Color intrinsics:', cam_color) print('Depth intrinsics:', cam_depth) I = ImageGray() - data_generator = read_data(exp_config, cam_depth, I) + data_generator = read_data(cam_depth, I, pipe) frame_data = next(data_generator) # Get first frame for init depth_M_color = HomogeneousMatrix() @@ -190,11 +153,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters if not args.disable_depth: ImageConvert.createDepthHistogram(frame_data.I_depth, I_depth) dDepth.init(I_depth, I.getWidth(), 0, 'Depth') - - if args.init_ground_truth: - tracker.initFromPose(I, frame_data.cMo_ground_truth) - else: - tracker.initClick(I, str(mbt_model.init_file)) + tracker.initClick(I, str(mbt_model.init_file)) start_time = time.time() for frame_data in data_generator: if frame_data.I_depth is not None: @@ -212,7 +171,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters 'Camera1': I } t = time.time() - tracker.track(image_dict, {'Camera2': pc}) + tracker.track(image_dict, {'Camera2': pc.reshape(depth_height, depth_width, 3)}) print(f'Tracking took {(time.time() - t) * 1000}ms') cMo = HomogeneousMatrix() tracker.getPose(cMo)