From 3b2c9bc63f8da7c3f1543441117bda8993692ac0 Mon Sep 17 00:00:00 2001
From: Samuel Felton <samuel.felton@irisa.fr>
Date: Mon, 27 Nov 2023 17:48:45 +0100
Subject: [PATCH] Model-based tracker with realsense camera

---
 modules/python/bindings/include/blob.hpp |   1 +
 modules/python/docs/todos.rst            |   9 +-
 modules/python/examples/realsense_mbt.py | 129 ++++++++---------------
 3 files changed, 46 insertions(+), 93 deletions(-)
diff --git a/modules/python/bindings/include/blob.hpp b/modules/python/bindings/include/blob.hpp
index 091b976510..d11a3c4dbd 100644
--- a/modules/python/bindings/include/blob.hpp
+++ b/modules/python/bindings/include/blob.hpp
@@ -20,6 +20,7 @@ void bindings_vpDot2(py::class_<vpDot2, vpTracker> &pyDot2)
   }, R"doc(
 Wrapper for the defineDots method, see the C++ ViSP documentation.
 )doc", py::arg("dots"), py::arg("dotFile"), py::arg("I"), py::arg("color"), py::arg("trackDot") = true);
+
   pyDot2.def_static("trackAndDisplay", [](std::vector<vpDot2> &dots,
                                           vpImage<unsigned char> &I,
                                           std::vector<vpImagePoint> &cogs,
diff --git a/modules/python/docs/todos.rst b/modules/python/docs/todos.rst
index 7f29f8a017..8a579269af 100644
--- a/modules/python/docs/todos.rst
+++ b/modules/python/docs/todos.rst
@@ -12,7 +12,7 @@ Changes to ViSP
 Code generation
 -------------------
 
-* There is an issue with vpFeatureMomentDatabse::get and vpMomentDatabase::get, ignored for now
+* There is an issue with vpFeatureMomentDatabse::get and vpMomentDatabase::get, ignored for now => a tester
 * n ary operators
 * Exclude get operators for vpArray2D ?
 * Parse subnamespaces
@@ -67,13 +67,6 @@ To be written:
 
 
 
-Packaging
-------------------
-
-* Root CMake
-
-  * Build after doc if doc can be generated
-
 
 Python side
 -----------------
diff --git a/modules/python/examples/realsense_mbt.py b/modules/python/examples/realsense_mbt.py
index 53b6013aa3..7da5380ef6 100644
--- a/modules/python/examples/realsense_mbt.py
+++ b/modules/python/examples/realsense_mbt.py
@@ -1,7 +1,7 @@
 import argparse
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
 import numpy as np
 import time
 import faulthandler
@@ -9,7 +9,7 @@
 
 
 from visp.core import XmlParserCamera, CameraParameters, ColVector, HomogeneousMatrix, Display, ImageConvert
-from visp.core import ImageGray, ImageUInt16
+from visp.core import ImageGray, ImageUInt16, ImageRGBA
 from visp.io import ImageIo
 from visp.mbt import MbGenericTracker, MbTracker
 from visp.gui import DisplayOpenCV
@@ -30,32 +30,19 @@
 
 
 class MBTModelData:
-  def __init__(self, data_root: Path):
-    model_path = data_root / 'model' / 'teabox'
+  def __init__(self, data_root: Path, object_name: str):
+    model_path = data_root / 'model' / object_name
     assert model_path.exists()
-    self.config_color = model_path / 'teabox_color.xml'
-    self.config_depth = model_path / 'teabox_depth.xml'
-    self.cad_file = model_path / 'teabox.cao'
-    self.init_file = model_path / 'teabox.init'
+    self.config_color = model_path / f'{object_name}.xml'
+    self.config_depth = model_path / f'{object_name}_depth.xml'
+    self.cad_file = model_path / f'{object_name}.cao'
+    self.init_file = model_path / f'{object_name}.init'
 
 
 class MBTConfig:
   def __init__(self, data_root: Path):
-    data_path = data_root / 'data' / 'teabox'
+    data_path = data_root / 'data'
     assert data_path.exists()
-    self.color_camera_name = 'Camera_L'
-    self.depth_camera_name = 'Camera_R'
-
-    self.color_intrinsics_file = data_path / f'{self.color_camera_name}.xml'
-    self.depth_intrinsics_file = data_path / f'{self.depth_camera_name}.xml'
-
-    self.color_images_dir = data_path / 'color'
-    self.depth_images_dir = data_path / 'depth'
-    self.ground_truth_dir = data_path / 'ground-truth'
-
-
-    self.depth_intrinsics_file = data_path / f'{self.depth_camera_name}.xml'
-
     self.extrinsic_file = str(data_path / 'depth_M_color.txt')
     # self.ground_truth = str(data_root / 'data' / 'depth_M_color.txt')
 
@@ -64,76 +51,42 @@ class FrameData:
   I: ImageGray
   I_depth: Optional[ImageUInt16]
   point_cloud: Optional[np.ndarray]
-  cMo_ground_truth: HomogeneousMatrix
 
 
 
 
-def read_data(exp_config: MBTConfig, cam_depth: CameraParameters | None, I: ImageGray):
-  pipe = rs.pipeline()
-  profile = pipe.start()
-
+def read_data(cam_depth: CameraParameters | None, I: ImageGray, pipe: rs.pipeline):
   use_depth = cam_depth is not None
   iteration = 1
+  point_cloud_computer = rs.pointcloud()
   while True:
     frames = pipe.wait_for_frames()
-    print(frames)
-    I_depth_raw = np.asanyarray(frames.get_depth_frame().as_frame().get_data())
-
-    print(I_depth_raw.shape)
-    I = np.asanyarray(frames.get_color_frame().as_frame().get_data())
-    print(I.shape)
+    I_np = np.asanyarray(frames.get_color_frame().as_frame().get_data())
+    I_np = np.concatenate((I_np, np.ones_like(I_np[..., 0:1], dtype=np.uint8)), axis=-1)
+    I_rgba = ImageRGBA(I_np)
+    ImageConvert.convert(I_rgba, I, 0)
+    I_depth_raw = None
     point_cloud = None
     if use_depth:
-      t = time.time()
-      depth_filepath = exp_config.depth_images_dir / depth_format.format(iteration)
-      if not depth_filepath.exists():
-        print(f'Could not find image {depth_filepath}')
-        return
-      I_depth_np = cv2.imread(str(depth_filepath), cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
-      I_depth_np = I_depth_np[..., 0]
-      print(f'\tDepth load took {(time.time() - t) * 1000}ms')
-      I_depth_raw = ImageUInt16(I_depth_np * 32767.5)
-      if I_depth_np.size == 0:
-        print('Could not successfully read the depth image')
-        return
-      t = time.time()
-      # point_cloud = np.empty((*I_depth_np.shape, 3), dtype=np.float64)
-      Z = I_depth_np.copy()
-      Z[Z > 2] = 0.0 # Clamping values that are too high
-
-      vs, us = np.meshgrid(range(I_depth_np.shape[0]), range(I_depth_np.shape[1]), indexing='ij')
-      xs, ys = PixelMeterConversion.convertPoints(cam_depth, us, vs)
-      point_cloud = np.stack((xs * Z, ys * Z, Z), axis=-1)
-
-      print(f'\tPoint_cloud took {(time.time() - t) * 1000}ms')
-
-    cMo_ground_truth = HomogeneousMatrix()
-    ground_truth_file = exp_config.ground_truth_dir / (exp_config.color_camera_name + '_{:04d}.txt'.format(iteration))
-    cMo_ground_truth.load(str(ground_truth_file))
+      depth_proc_start = time.time()
+      I_depth_raw = np.asanyarray(frames.get_depth_frame().as_frame().get_data())
+      point_cloud = np.asanyarray(point_cloud_computer.calculate(frames.get_depth_frame()).get_vertices()).view((np.float32, 3))
+      print(f'Depth processing took {(time.time() - depth_proc_start) * 1000}ms')
     iteration += 1
-    end_parse_time = time.time()
-    print(f'Data parsing took: {(end_parse_time - start_parse_time) * 1000}ms')
-    yield FrameData(I, I_depth_raw, point_cloud, cMo_ground_truth)
+    yield FrameData(I, ImageUInt16(I_depth_raw), point_cloud)
 
 
 
-def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters:
-  cam = CameraParameters()
-  xml_parser = XmlParserCamera()
-  if is_color:
-    camera_name, file_path = exp_config.color_camera_name, exp_config.color_intrinsics_file
-  else:
-    camera_name, file_path = exp_config.depth_camera_name, exp_config.depth_intrinsics_file
-  parse_res = xml_parser.parse(cam, str(file_path), camera_name,
-                               CameraParameters.perspectiveProjWithoutDistortion, 0, 0, True)
-  assert parse_res == XmlParserCamera.SEQUENCE_OK # Check result
-  return cam
+def cam_from_rs_profile(profile) -> Tuple[CameraParameters, int, int]:
+  intr = profile.as_video_stream_profile().get_intrinsics() # Downcast to video_stream_profile and fetch intrinsics
+  return CameraParameters(intr.fx, intr.fy, intr.ppx, intr.ppy), intr.height, intr.width
 
 if __name__ == '__main__':
   parser = argparse.ArgumentParser()
   parser.add_argument('--data-root', type=str, required=True,
-                      help='Path to the folder containing all the data for the synthetic MBT example')
+                      help='Path to the folder containing all the data for the MBT example')
+  parser.add_argument('--object-name', type=str, required=True,
+                      help='Name of the object to track')
   parser.add_argument('--display-ground-truth', action='store_true')
   parser.add_argument('--disable-klt', action='store_true')
   parser.add_argument('--disable-depth', action='store_true')
@@ -144,11 +97,23 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters
   args = parser.parse_args()
   data_root = Path(args.data_root)
 
-  mbt_model = MBTModelData(data_root)
-  exp_config = MBTConfig(data_root)
+
+  # Initialize realsense2
+  pipe = rs.pipeline()
+  cfg = pipe.start()
+
+  for i in range(10):
+    pipe.wait_for_frames()
 
   assert data_root.exists() and data_root.is_dir()
 
+  mbt_model = MBTModelData(data_root, args.object_name)
+  exp_config = MBTConfig(data_root)
+
+  cam_color, color_height, color_width = cam_from_rs_profile(cfg.get_stream(rs.stream.color))
+  cam_depth, depth_height, depth_width = cam_from_rs_profile(cfg.get_stream(rs.stream.depth))
+
+
   rgb_tracker: int = MbGenericTracker.EDGE_TRACKER | (MbGenericTracker.KLT_TRACKER if not args.disable_klt else 0)
   tracker_types: List[int] = [rgb_tracker]
   if not args.disable_depth:
@@ -164,8 +129,6 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters
   tracker.loadModel(str(mbt_model.cad_file))
 
   # Camera intrinsics
-  cam_color = parse_camera_file(exp_config, True)
-  cam_depth = parse_camera_file(exp_config, False) if not args.disable_depth else None
 
   tracker.setCameraParameters(*((cam_color,) if args.disable_depth else (cam_color, cam_depth)))
   tracker.setDisplayFeatures(True)
@@ -173,7 +136,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters
   print('Color intrinsics:', cam_color)
   print('Depth intrinsics:', cam_depth)
   I = ImageGray()
-  data_generator = read_data(exp_config, cam_depth, I)
+  data_generator = read_data(cam_depth, I, pipe)
   frame_data = next(data_generator) # Get first frame for init
 
   depth_M_color = HomogeneousMatrix()
@@ -190,11 +153,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters
   if not args.disable_depth:
     ImageConvert.createDepthHistogram(frame_data.I_depth, I_depth)
     dDepth.init(I_depth,  I.getWidth(), 0, 'Depth')
-
-  if args.init_ground_truth:
-    tracker.initFromPose(I, frame_data.cMo_ground_truth)
-  else:
-    tracker.initClick(I, str(mbt_model.init_file))
+  tracker.initClick(I, str(mbt_model.init_file))
   start_time =  time.time()
   for frame_data in data_generator:
     if frame_data.I_depth is not None:
@@ -212,7 +171,7 @@ def parse_camera_file(exp_config: MBTConfig, is_color: bool) -> CameraParameters
         'Camera1': I
       }
       t = time.time()
-      tracker.track(image_dict, {'Camera2': pc})
+      tracker.track(image_dict, {'Camera2': pc.reshape(depth_height, depth_width, 3)})
       print(f'Tracking took {(time.time() - t) * 1000}ms')
     cMo = HomogeneousMatrix()
     tracker.getPose(cMo)