diff --git a/script/Blender/Keypoints_extraction/README.md b/script/Blender/Keypoints_extraction/README.md new file mode 100644 index 0000000000..f99c95cb91 --- /dev/null +++ b/script/Blender/Keypoints_extraction/README.md @@ -0,0 +1,107 @@ +# Blender keypoints extraction: an experimentation + +Some tests to use Blender to automatically sample camera viewpoints around an object and to extract SIFT descriptors. + +## Install Python OpenCV + +Here, Blender is directly installed from an [archive](https://www.blender.org/download/) for Linux platforms. + +This has the advantage to be able to use the latest version and not depend on the package manager. +To install OpenCV (`opencv-python`), the following [script](https://github.com/luckychris/install_blender_python_modules) (credit to "luckychris") is used: +- launch Blender from the terminal +- switch to the `Scripting` tab +- open and run the script (use `opencv-python` for the module to be installed) +- in the terminal, info about the installation should be displayed + +With this workflow, Python OpenCV is installed locally and directly into the Blender folder. + +## Blender scene + +For these tests, a simple object is used: +- add the Suzanne model (`Add` > `Mesh` > `Monkey`) +- update the camera focal length and the output resolution + +[Triangulate](https://docs.blender.org/manual/en/4.3/modeling/meshes/editing/face/triangulate_faces.html): +- `Edit` mode > `Face` > `Triangulate Faces` (`Ctrl T`) + +For the texturing: +- `Material` > `New` > `Base Color` > `Image Texture` +- here [`Unwrap`](https://docs.blender.org/manual/en/4.3/modeling/meshes/editing/uv.html) is used +- `UV Editing` window > Open the image +- move to center the mesh + +## Run the keypoints extraction script + +Switch to the `Scripting` window, open and run the [`extract_sift_sampling_Suzanne.py`](extract_sift_sampling_Suzanne.py) script. + +The main principles are: +- sample equidistant camera poses around the object +- run SIFT detector and for each feature: + - perform [back-face culling](https://en.wikipedia.org/wiki/Back-face_culling) for visibility test + - compute [ray-triangle intersection](https://docs.blender.org/api/4.3/mathutils.geometry.html#mathutils.geometry.intersect_ray_tri) to get the 3D coordinates and transform to the object frame + - save the list of detected features (SIFT descriptors + 3D object coordinates) into a `.npz` file format + +## Render a test image + +Move the camera, the light at the desired positions and use the [`render_still_image_Suzanne.py`](render_still_image_Suzanne.py) script to conveniently render the image and save the camera pose. + +## Keypoints matching and pose computation + +Use the [`keypoints_matching_Suzanne.py`](keypoints_matching_Suzanne.py) script. + +## Results + +### Keypoints matching + +A brief overview of the keypoints matching process can be seen: + +![keypoints matching mosaic](img_mosaic.png) + +Matching lines are those from the keypoints matching process, RANSAC results are not displayed. + +### Pose computation + +Running the script should give result similar to: + +```bash +rvec=[[ 1.83592335 -0.06509804 -0.60995061]] +rvec_gt=[[ 1.83833405 -0.06378919 -0.60991766]] + +c_T_o_est: +[[ 0.86373993 0.25108214 -0.43693374 -0.4630725 ] + [-0.33764126 -0.35531447 -0.8716364 -0.11690215] + [-0.37410121 0.90039402 -0.2221236 5.08996118] + [ 0. 0. 0. 1. ]] +c_T_o_gt: +[[ 0.86392033 0.25132278 -0.4364382 -0.46306577] + [-0.33618754 -0.35746148 -0.87132031 -0.11758688] + [-0.37499252 0.89947647 -0.22432667 5.09212732] + [ 0. 0. 0. 1. ]] + +``` + +Result with a rough simplified model should give: + +![pose computation](img_result.png) + +### Illumination change test + +Same camera viewpoint but with a different illumination condition: + +```bash +rvec=[[ 2.3786033 -0.60075198 -0.0076497 ]] +rvec_gt=[[ 1.83833405 -0.06378919 -0.60991766]] + +c_T_o_est: +[[ 0.89370737 -0.41880363 -0.16090572 -1.16669042] + [-0.42276497 -0.66606072 -0.61451842 -0.50213294] + [ 0.15018957 0.61722495 -0.77231888 5.45509487] + [ 0. 0. 0. 1. ]] +c_T_o_gt: +[[ 0.86392033 0.25132278 -0.4364382 -0.46306577] + [-0.33618754 -0.35746148 -0.87132031 -0.11758688] + [-0.37499252 0.89947647 -0.22432667 5.09212732] + [ 0. 0. 0. 1. ]] +``` + +![Illumination chage](img_result_compare.png) diff --git a/script/Blender/Keypoints_extraction/extract_sift_sampling_Suzanne.py b/script/Blender/Keypoints_extraction/extract_sift_sampling_Suzanne.py new file mode 100755 index 0000000000..4f67ab41cc --- /dev/null +++ b/script/Blender/Keypoints_extraction/extract_sift_sampling_Suzanne.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2024 ViSP contributor +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import bpy +import os +import cv2 as cv +import numpy as np +from mathutils import geometry, Vector + +# https://www.rojtberg.net/1601/from-blender-to-opencv-camera-and-back/ +def get_calibration_matrix_K_from_blender(camera_name): + # get the relevant data + cam = bpy.data.objects[camera_name].data + scene = bpy.context.scene + # assume image is not scaled + assert scene.render.resolution_percentage == 100 + # assume angles describe the horizontal field of view + assert cam.sensor_fit != 'VERTICAL' + + f_in_mm = cam.lens + sensor_width_in_mm = cam.sensor_width + + w = scene.render.resolution_x + h = scene.render.resolution_y + + pixel_aspect = scene.render.pixel_aspect_y / scene.render.pixel_aspect_x + + f_x = f_in_mm / sensor_width_in_mm * w + f_y = f_x * pixel_aspect + + # yes, shift_x is inverted. WTF blender? + c_x = w * (0.5 - cam.shift_x) + # and shift_y is still a percentage of width.. + c_y = h * 0.5 + w * cam.shift_y + + K = np.array([ + [f_x, 0, c_x], + [0, f_y, c_y], + [0, 0, 1] + ]) + + return K + +def inv_transfo(w_T_o): + R = w_T_o[:3,:3] + t = w_T_o[:3,3] + + o_T_w = np.eye(4) + o_T_w[:3,:3] = R.T + o_T_w[:3,3] = -R.T @ t + + return o_T_w + +def get_camera_pose(cameraName, objectName): + M = np.eye(4) + M[1][1] = -1 + M[2][2] = -1 + + cam = bpy.data.objects[cameraName] + object_pose = bpy.data.objects[objectName].matrix_world + + # Normalize orientation with respect to the scale + object_pose_normalized_blender = object_pose.copy() + object_orientation_normalized_blender = object_pose_normalized_blender.to_3x3().normalized() + for i in range(3): + for j in range(3): + object_pose_normalized_blender[i][j] = object_orientation_normalized_blender[i][j] + + w_T_o = np.array(object_pose_normalized_blender) + print(f"object_pose_normalized:\n{object_pose_normalized_blender}") + + w_T_c = np.array(cam.matrix_world) + print(f"w_T_c:\n{w_T_c}") + + c_T_w = np.array(cam.matrix_world.inverted()) + print(f"c_T_w:\n{c_T_w}") + + c_T_o = M @ c_T_w @ w_T_o + print(f"c_T_o:\n{c_T_o}") + + return c_T_o + +def get_object_vertices(objectName): + # https://blender.stackexchange.com/questions/3637/get-indices-of-vertices-of-triangulated-faces-in-python/3657#3657 + obj = bpy.data.objects[objectName] + mesh = obj.data + + model_faces = [] + model_normals = [] + for face in mesh.polygons: + face_vert = [] + for i in range(len(face.vertices)): + vert = mesh.vertices[face.vertices[i]] + vert_xyz = vert.co.xyz + face_vert.append(vert_xyz) + model_normals.append(vert.normal) + + model_faces.append(face_vert) + + return [model_faces, model_normals] + +def vec_augment(pt_3d): + return np.array([pt_3d[0], pt_3d[1], pt_3d[2], 1]) + +def convert_pt(pt_3d, w_T_o): + o_pt = vec_augment(pt_3d) + w_pt = w_T_o @ o_pt + + return w_pt[:,3] + +def compute_ray(K, im_pt): + fx = K[0,0] + fy = K[1,1] + cx = K[0,2] + cy = K[1,2] + + x = (im_pt[0] - cx) / fx + y = (im_pt[1] - cy) / fy + return np.array([x, y, 1]) + +def is_face_visible(ray_, vertex0_, normal_): + # back-face culling + # https://en.wikipedia.org/wiki/Back-face_culling + ray = np.array(ray_) + vertex0 = np.array(vertex0_) + normal = np.array(normal_) + eps = 1e-9 + return np.dot((vertex0 - ray), normal) > eps + +def ray_vertex_intersection(ray, vertex, c_T_o): + c_vert0 = c_T_o @ vec_augment(vertex[0]) + c_vert1 = c_T_o @ vec_augment(vertex[1]) + c_vert2 = c_T_o @ vec_augment(vertex[2]) + + # https://docs.blender.org/api/4.3/mathutils.geometry.html#mathutils.geometry.intersect_ray_tri + clip = True + intersect = geometry.intersect_ray_tri(c_vert0, c_vert1, c_vert2, ray, [0,0,0], clip) + return intersect + +def getNED(lon_, lat_, r, in_radian=False): + """ + Get the homogeneous transformation matrix corresponding to the local tangent plane transformation at the specified + longitude/latitude and radius coordinates, using the NED and ECEF conventions and a perfect sphere. + See also: + - https://en.wikipedia.org/wiki/Earth-centered,_Earth-fixed_coordinate_system + - https://en.wikipedia.org/wiki/Local_tangent_plane_coordinates + + Parameters + ---------- + lon_: float + The longitude coordinate. + lat_: float + The latitude coordinate. + r: float + The sphere radius. + in_radian: boolean + If true coordinates are in radian, otherwise in degree. + + Returns: + ------- + numpy matrix + The homogeneous matrix allowing converting a 3D point expressed in the NED frame to the ECEF frame. + """ + if not in_radian: + # lambda + lon = np.radians(lon_) + # phi + lat = np.radians(lat_) + else: + lon = lon_ + lat = lat_ + + Tdata = [ [-np.sin(lat)*np.cos(lon), -np.sin(lon), -np.cos(lat)*np.cos(lon), r*np.cos(lon)*np.cos(lat)], \ + [-np.sin(lat)*np.sin(lon), np.cos(lon), -np.cos(lat)*np.sin(lon), r*np.sin(lon)*np.cos(lat)], \ + [ np.cos(lat), 0, -np.sin(lat), r*np.sin(lat)], \ + [ 0, 0, 0, 1] \ + ] + T = np.matrix(Tdata) + + return T + +def getENU(lon_, lat_, r, in_radian=False): + """ + Get the homogeneous transformation matrix corresponding to the local tangent plane transformation at the specified + longitude/latitude and radius coordinates, using the ENU and ECEF conventions and a perfect sphere. + See also: + - https://en.wikipedia.org/wiki/Earth-centered,_Earth-fixed_coordinate_system + - https://en.wikipedia.org/wiki/Local_tangent_plane_coordinates + + Parameters + ---------- + lon_: float + The longitude coordinate. + lat_: float + The latitude coordinate. + r: float + The sphere radius. + in_radian: boolean + If true coordinates are in radian, otherwise in degree. + + Returns: + ------- + numpy matrix + The homogeneous matrix allowing converting a 3D point expressed in the ENU frame to the ECEF frame. + """ + if not in_radian: + # lambda + lon = np.radians(lon_) + # phi + lat = np.radians(lat_) + else: + lon = lon_ + lat = lat_ + + Tdata = [ [-np.sin(lon), -np.sin(lat)*np.cos(lon), np.cos(lat)*np.cos(lon), r*np.cos(lon)*np.cos(lat)], \ + [ np.cos(lon), -np.sin(lat)*np.sin(lon), np.cos(lat)*np.sin(lon), r*np.sin(lon)*np.cos(lat)], \ + [ 0, np.cos(lat), np.sin(lat), r*np.sin(lat)], \ + [ 0, 0, 0, 1] \ + ] + T = np.matrix(Tdata) + + return T + +def regular_on_sphere_points(num, full_sphere=False): + """ + Generate equidistributed points on the surface of a sphere. + From: + - "How to generate equidistributed points on the surface of a sphere", Markus Deserno + - https://www.cmu.edu/biolphys/deserno/pdf/sphere_equi.pdf + - https://gist.github.com/dinob0t/9597525 + + Parameters + ---------- + num: int + The desired number of points on the surface of a sphere. + + Returns: + ------- + list + The list of equidistributed points on the surface of a sphere in the lon-lat coordinates. + + Note: + ------- + This method does not return exactly the specified number of points. + """ + r = 1 + points = [] + # Break out if zero points + if num == 0: + return points + + a = 4.0 * np.pi*(r**2.0 / num) + d = np.sqrt(a) + m_theta = int(round(np.pi / d)) + d_theta = np.pi / m_theta + d_phi = a / d_theta + pi_2 = np.pi/2 + + if full_sphere: + m_upper_bound = m_theta + else: + m_upper_bound = int(m_theta/2) + + for m in range(m_upper_bound): + theta = np.pi * (m + 0.5) / m_theta + m_phi = int(round(2.0 * np.pi * np.sin(theta) / d_phi)) + + for n in range(m_phi): + phi = 2.0 * np.pi * n / m_phi + lon = phi + lat = pi_2-theta + points.append([lon,lat]) + + return points + +def look_at(ecef_T_cv, point): + # https://blender.stackexchange.com/questions/5210/pointing-the-camera-in-a-particular-direction-programmatically/5220#5220 + # https://docs.blender.org/api/current/mathutils.html#mathutils.Vector + direction = Vector((point[0], point[1], point[2])) - Vector((ecef_T_cv[0,3], ecef_T_cv[1,3], ecef_T_cv[2,3])) + # point the cameras '-Z' and use its 'Y' as up + rot_quat = direction.to_track_quat('-Z', 'Y') + + ecef_T_cv_look_at = np.eye(4) + # https://docs.blender.org/api/current/mathutils.html#mathutils.Quaternion.to_matrix + ecef_T_cv_look_at[:3,:3] = rot_quat.to_matrix() + ecef_T_cv_look_at[:3,3] = ecef_T_cv[:3,3].ravel() + + return ecef_T_cv_look_at + +def set_camera_pose(obj_camera, pose): + # gl2cv = Matrix().to_4x4() + # gl2cv[1][1] = -1 + # gl2cv[2][2] = -1 + # obj_camera.matrix_world = (gl2cv * Matrix(pose)).inverted() + + # cv_T_gl = np.eye(4) + # cv_T_gl[1,1] = -1 + # cv_T_gl[1,2] = -1 + + # w_T_cv = pose + # w_T_gl = w_T_cv @ cv_T_gl + # # obj_camera.matrix_world = w_T_gl.T + # obj_camera.matrix_world = w_T_gl + + # Column-major? + obj_camera.matrix_world = pose.T + +if __name__ == "__main__": + output_dir = "/tmp" + output_file_pattern_string = "blender_render_%04d.png" + + K = get_calibration_matrix_K_from_blender("Camera") + print(f"Camera Matrix:\n", K) + + data_dict = dict() + data_dict["K"] = K + + debug_print = False + + npoints = 60 + full_sphere = False + regular_surf_points = regular_on_sphere_points(npoints, full_sphere) + print(f"regular_surf_points={len(regular_surf_points)}\n{regular_surf_points}") + + # Transformation from CV frame to NED frame + ned_T_cv = np.eye(4) + ned_T_cv[0,0] = 0 + ned_T_cv[0,1] = -1 + ned_T_cv[1,0] = 1 + ned_T_cv[1,1] = 0 + print(f"ned_T_cv:\n{ned_T_cv}") + + radius = 5.0 + camera_name = "Camera" + object_name = "Suzanne" + camera = bpy.data.objects[camera_name] + for keyframe, point in enumerate(regular_surf_points): + print() + print(f"{keyframe+1}/{len(regular_surf_points)}") + lon = point[0] + lat = point[1] + + in_radian = True + ecef_T_ned = getNED(lon, lat, radius, in_radian) + ecef_T_cv = ecef_T_ned @ ned_T_cv + print(f"ecef_T_cv:\n{ecef_T_cv}") + + ecef_T_cv_look_at = look_at(ecef_T_cv, np.zeros((3,1))) + print(f"ecef_T_cv_look_at:\n{ecef_T_cv_look_at}") + set_camera_pose(camera, ecef_T_cv_look_at) + + output_filepath = os.path.join(output_dir, (output_file_pattern_string % keyframe)) + bpy.context.scene.render.filepath = output_filepath + bpy.ops.render.render(write_still = True) + + img = cv.imread(output_filepath) + print(f"img: {img.shape}") + + sift_detector = cv.SIFT.create() + keypoints, descriptors = sift_detector.detectAndCompute(img, None) + if keypoints is not None and descriptors is not None: + print(f"keypoints: {len(keypoints)} ; type={type(keypoints)}") + print(f"descriptors: {len(descriptors)} ; type={type(descriptors)} ; shape={descriptors.shape}") + if keypoints is None: + keypoints = [] + if descriptors is None: + descriptors = [] + + c_T_o = get_camera_pose(camera_name, object_name) + + obj_faces_vert, obj_faces_normal = get_object_vertices(object_name) + print(f"obj_faces_vert={len(obj_faces_vert)}") + + if debug_print: + for face_id, face in enumerate(obj_faces_vert): + print(f"\nFace {face_id}") + print(f" normal: {obj_faces_normal[face_id]}") + for vertex_id, vertex in enumerate(face): + print(f" vertex {vertex_id}: {face}") + + pointcloud = [] + image_pts = [] + object_pts = [] + descriptors_pcl = [] + img_results = np.copy(img) + for idx, kpt in enumerate(keypoints): + kpt_pt_normalized = compute_ray(K, kpt.pt) + # print(f"kpt: {kpt_pt_normalized}") + + kpt_pt = np.array(kpt.pt, dtype=np.int32) + cv.drawMarker(img_results, kpt_pt, (0,0,255),cv.MARKER_CROSS, 6, 1) + + for face_id, face in enumerate(obj_faces_vert): + is_visible = is_face_visible(kpt_pt_normalized, face[0], obj_faces_normal[face_id]) + if is_visible: + intersect_pt = ray_vertex_intersection(kpt_pt_normalized, face, c_T_o) + if intersect_pt is not None: + if debug_print: + print(f"Face {face_id}, is visible={is_visible}, intersect_pt={intersect_pt}") + + pointcloud.append([intersect_pt[0], intersect_pt[1], intersect_pt[2]]) + image_pts.append(kpt_pt) + c_object_pt = vec_augment([intersect_pt[0], intersect_pt[1], intersect_pt[2]]) + o_object_pt = inv_transfo(c_T_o) @ c_object_pt + object_pts.append(o_object_pt[:3]) + descriptors_pcl.append(descriptors[idx]) + + cv.imwrite("/tmp/blender_render_{:04d}_results.png".format(keyframe), img_results) + + data_dict["image_{:04d}".format(keyframe)] = img + data_dict["pointcloud_{:04d}".format(keyframe)] = pointcloud + data_dict["image_pts_{:04d}".format(keyframe)] = image_pts + data_dict["object_pts_{:04d}".format(keyframe)] = object_pts + data_dict["descriptors_pcl_{:04d}".format(keyframe)] = descriptors_pcl + data_dict["descriptors_{:04d}".format(keyframe)] = descriptors + data_dict["c_T_o_{:04d}".format(keyframe)] = c_T_o + + data_dict["nb_data"] = len(regular_surf_points) + np.savez("/tmp/blender_render_keypoints_sampling.npz", **data_dict) diff --git a/script/Blender/Keypoints_extraction/img_mosaic.png b/script/Blender/Keypoints_extraction/img_mosaic.png new file mode 100644 index 0000000000..69891147e9 Binary files /dev/null and b/script/Blender/Keypoints_extraction/img_mosaic.png differ diff --git a/script/Blender/Keypoints_extraction/img_result.png b/script/Blender/Keypoints_extraction/img_result.png new file mode 100644 index 0000000000..64e6af4d59 Binary files /dev/null and b/script/Blender/Keypoints_extraction/img_result.png differ diff --git a/script/Blender/Keypoints_extraction/img_result_2.png b/script/Blender/Keypoints_extraction/img_result_2.png new file mode 100644 index 0000000000..8b21975b00 Binary files /dev/null and b/script/Blender/Keypoints_extraction/img_result_2.png differ diff --git a/script/Blender/Keypoints_extraction/img_result_compare.png b/script/Blender/Keypoints_extraction/img_result_compare.png new file mode 100644 index 0000000000..591627a3b4 Binary files /dev/null and b/script/Blender/Keypoints_extraction/img_result_compare.png differ diff --git a/script/Blender/Keypoints_extraction/keypoints_matching_Suzanne.py b/script/Blender/Keypoints_extraction/keypoints_matching_Suzanne.py new file mode 100755 index 0000000000..14b656a865 --- /dev/null +++ b/script/Blender/Keypoints_extraction/keypoints_matching_Suzanne.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2024 ViSP contributor +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import cv2 as cv +import numpy as np +import math +import random + +def vec_augment(pt_3d): + return np.array([pt_3d[0], pt_3d[1], pt_3d[2], 1]) + +def project(pt_3d, K, c_T_o): + pt_4d = vec_augment(pt_3d) + perspective_proj = np.zeros((3,4)) + perspective_proj[0:3,0:3] = np.eye(3) + + pt_2d = K @ perspective_proj @ c_T_o @ pt_4d + pt_2d[0] /= pt_2d[2] + pt_2d[1] /= pt_2d[2] + + return pt_2d[:2] + +def draw_keypoints(img, pointcloud, K, c_T_o): + for idx in range(pointcloud.shape[0]): + pt_3d = pointcloud[idx,] + # print(f"pt_3d={pt_3d}") + # pt_2d = project(pt_3d, K, np.eye(4)) + pt_2d = project(pt_3d, K, c_T_o) + # print(f"pt_2d={pt_2d}") + pos = pt_2d.astype(np.int32) + # print(f"pos={pos}") + cv.drawMarker(img, pos, (0,0,255), cv.MARKER_CROSS, 6, 1) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Keypoints matching.') + parser.add_argument("--input", type=str, default="/tmp/blender_render.png", help="Input image") + parser.add_argument("--npz", type=str, default="/tmp/blender_render_keypoints_sampling.npz", help="NPZ filepath") + parser.add_argument("--gt", type=str, default="/tmp/c_T_o.txt", help="Ground truth pose") + parser.add_argument("--display-ransac", action="store_true", help="Display matching after RANSAC") + parser.add_argument("--random-colors", action="store_true", help="Random color") + parser.add_argument("--flann", action="store_true", help="Use FLANN-based matching, other BF with cross-check") + parser.add_argument('--ransac-iter', type=int, default=1000, help='RANSAC iterations count.') + parser.add_argument('--ransac-reproj-error', type=float, default=6.0, help='RANSAC reprojection error.') + args = parser.parse_args() + + # print(cv.getBuildInformation()) + + input_img = args.input + print(f"Input image filepath: {input_img}") + img = cv.imread(input_img) + print(f"img: {img.shape}") + + input_npz = args.npz + print(f"Input NPZ: {input_npz}") + + npz_data = np.load(input_npz) + nb_data = npz_data["nb_data"] + print(f"nb_data: {nb_data}") + + input_gt = args.gt + c_T_o_gt = np.loadtxt(input_gt) + print(f"c_T_o_gt:\n{c_T_o_gt}") + + display_after_ransac = args.display_ransac + print(f"Display keypoints matching after RANSAC? {display_after_ransac}") + + random_colors = args.random_colors + print(f"random_colors={random_colors}") + + flann_matching = args.flann + print(f"Use FLANN-based matching? {flann_matching}") + + ransac_iter = args.ransac_iter + print(f"RANSAC iterations: {ransac_iter}") + ransac_reproj_error = args.ransac_reproj_error + print(f"RANSAC reprojection error: {ransac_reproj_error}") + + cam_K = npz_data["K"] + print(f"cam_K:\n{cam_K}") + + nb_data = npz_data["nb_data"] + train_descriptors = npz_data["descriptors_pcl_{:04d}".format(0)] + train_object_pts = npz_data["object_pts_{:04d}".format(0)] + train_image_pts = npz_data["image_pts_{:04d}".format(0)] + + keypoints_img_dict = dict() + for idx in range(train_image_pts.shape[0]): + keypoints_img_dict[idx] = 0 + + idx_start = train_image_pts.shape[0] + for idx1 in range(1, nb_data): + train_descriptors = np.concatenate((train_descriptors, npz_data["descriptors_pcl_{:04d}".format(idx1)])) + train_object_pts = np.concatenate((train_object_pts, npz_data["object_pts_{:04d}".format(idx1)])) + train_image_pts_cur = npz_data["image_pts_{:04d}".format(idx1)] + train_image_pts = np.concatenate((train_image_pts, train_image_pts_cur)) + + for _ in range(train_image_pts_cur.shape[0]): + keypoints_img_dict[idx_start] = idx1 + idx_start += 1 + + print(f"train_descriptors: {train_descriptors.shape}") + print(f"train_object_pts: {train_object_pts.shape}") + print(f"train_image_pts: {train_image_pts.shape}") + print(f"keypoints_img_dict: {len(keypoints_img_dict)}") + + + sift_detector = cv.SIFT.create() + keypoints, descriptors = sift_detector.detectAndCompute(img, None) + print(f"Current keypoints={len(keypoints)}") + print(f"Current descriptors={descriptors.shape}") + + crossCheck = True + if flann_matching: + # https://docs.opencv.org/4.x/dc/de2/classcv_1_1FlannBasedMatcher.html + matcher = cv.FlannBasedMatcher.create() + else: + # https://docs.opencv.org/4.x/d3/da1/classcv_1_1BFMatcher.html + matcher = cv.BFMatcher.create(crossCheck=crossCheck) + + # https://docs.opencv.org/4.x/db/d39/classcv_1_1DescriptorMatcher.html#a695c0aceafc907c024c24a0b5cdff758 + matches = matcher.match(descriptors, train_descriptors) + print(f"matches: {len(matches)}") + + pts_3d = [] + pts_2d = [] + pts_2d_train = [] + img_idx = [] + for match in matches: + pts_3d.append(train_object_pts[match.trainIdx]) + pts_2d.append(keypoints[match.queryIdx].pt) + pts_2d_train.append(train_image_pts[match.trainIdx]) + img_idx.append(keypoints_img_dict[match.trainIdx]) + + pts_3d_np = np.array(pts_3d, dtype=np.float64) + pts_2d_np = np.array(pts_2d, dtype=np.float64) + print(f"pts_3d_np={pts_3d_np.shape}") + print(f"pts_2d_np={pts_2d_np.shape}") + + rvec = None + tvec = None + # https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga50620f0e26e02caa2e9adc07b5fbf24e + retval, rvec, tvec, inliers = cv.solvePnPRansac(pts_3d_np, pts_2d_np, cam_K, None, iterationsCount=ransac_iter, reprojectionError=ransac_reproj_error) + print(f"retval={retval}") + if retval == 0: + print(f"Running solvePnPRansac() returns 0") + exit() + rot_3x3, _ = cv.Rodrigues(rvec) + print(f"retval={retval} ; inliers={len(inliers)}") + c_T_o_est = np.eye(4) + c_T_o_est[:3,:3] = rot_3x3 + c_T_o_est[:3,3] = tvec.ravel() + + rvec_gt, _ = cv.Rodrigues(c_T_o_gt[:3,:3]) + tvec_gt = c_T_o_gt[:3,3] + + for pt_2d in pts_2d: + cv.drawMarker(img, np.array(pt_2d).astype(np.int32), (0,0,255), cv.MARKER_CROSS, 6, 1) + + frame_length = 0.1 + cv.drawFrameAxes(img, cam_K, None, rvec, tvec, frame_length, 2) + # cv.drawFrameAxes(img, cam_K, None, rvec_gt, tvec_gt, frame_length, 1) + + model = [] + model.append([ [0.367188, -0.53125, -0.890625], [0.351562, -0.570312, -0.695312] ]) + model.append([ [0.351562, -0.570312, -0.695312], [0.3125, -0.570312, -0.4375] ]) + model.append([ [0.3125, -0.570312, -0.4375], [0.257812, -0.554688, -0.3125] ]) + model.append([ [0.257812, -0.554688, -0.3125], [0.21875, -0.429688, -0.28125] ]) + model.append([ [0.21875, -0.429688, -0.28125], [0.40625, -0.148438, -0.171875] ]) + model.append([ [0.40625, -0.148438, -0.171875], [0.59375, 0.164062, -0.125] ]) + model.append([ [0.59375, 0.164062, -0.125], [0.789062, 0.328125, -0.125] ]) + model.append([ [0.789062, 0.328125, -0.125], [1.03906, 0.492188, -0.085938] ]) + model.append([ [1.03906, 0.492188, -0.085938], [1.3125, 0.53125, 0.054688] ]) + model.append([ [1.3125, 0.53125, 0.054688], [1.36719, 0.5, 0.296875] ]) + model.append([ [1.36719, 0.5, 0.296875], [1.25, 0.546875, 0.46875] ]) + model.append([ [1.25, 0.546875, 0.46875], [0.859375, 0.382812, 0.382812] ]) + model.append([ [0.859375, 0.382812, 0.382812], [0.796875, 0.359375, 0.539062] ]) + model.append([ [0.796875, 0.359375, 0.539062], [0.640625, 0.195312, 0.75] ]) + model.append([ [0.640625, 0.195312, 0.75], [0.453125, 0.070312, 0.929688] ]) + model.append([ [0.453125, 0.070312, 0.929688], [0, 0.078125, 0.984375] ]) + model.append([ [0, 0.078125, 0.984375], [-0.453125, 0.070312, 0.929688] ]) + model.append([ [-0.453125, 0.070312, 0.929688], [-0.640625, 0.195312, 0.75] ]) + model.append([ [-0.640625, 0.195312, 0.75], [-0.796875, 0.359375, 0.539062] ]) + model.append([ [-0.796875, 0.359375, 0.539062], [-1.02344, 0.3125, 0.476562] ]) + model.append([ [-1.02344, 0.3125, 0.476562], [-1.23438, 0.421875, 0.507812] ]) + model.append([ [-1.23438, 0.421875, 0.507812], [-1.35156, 0.421875, 0.320312] ]) + model.append([ [-1.35156, 0.421875, 0.320312], [-1.28125, 0.429688, 0.054688] ]) + model.append([ [-1.28125, 0.429688, 0.054688], [-1.03906, 0.328125, -0.101562] ]) + model.append([ [-1.03906, 0.328125, -0.101562], [-0.773438, 0.125, -0.140625] ]) + model.append([ [-0.773438, 0.125, -0.140625], [-0.59375, 0.164062, -0.125] ]) + model.append([ [-0.59375, 0.164062, -0.125], [-0.40625, -0.148438, -0.171875] ]) + model.append([ [-0.40625, -0.148438, -0.171875], [-0.234375, -0.40625, -0.351562] ]) + model.append([ [-0.234375, -0.40625, -0.351562], [-0.25, -0.390625, -0.5] ]) + model.append([ [-0.25, -0.390625, -0.5], [-0.289062, -0.382812, -0.710938] ]) + model.append([ [-0.289062, -0.382812, -0.710938], [-0.328125, -0.398438, -0.914062] ]) + model.append([ [-0.328125, -0.398438, -0.914062], [-0.164062, -0.4375, -0.945312] ]) + model.append([ [-0.164062, -0.4375, -0.945312], [0, -0.460938, -0.976562] ]) + + # Left eye + model.append([ [-0.375, -0.742188, 0.0625], [-0.226562, -0.78125, 0.109375] ]) + model.append([ [-0.226562, -0.78125, 0.109375], [-0.1875, -0.773438, 0.15625] ]) + model.append([ [-0.1875, -0.773438, 0.15625], [-0.171875, -0.78125, 0.21875] ]) + model.append([ [-0.171875, -0.78125, 0.21875], [-0.179688, -0.78125, 0.296875] ]) + model.append([ [-0.179688, -0.78125, 0.296875], [-0.210938, -0.78125, 0.375] ]) + model.append([ [-0.210938, -0.78125, 0.375], [-0.335938, -0.75, 0.40625] ]) + model.append([ [-0.335938, -0.75, 0.40625], [-0.414062, -0.75, 0.390625] ]) + model.append([ [-0.414062, -0.75, 0.390625], [-0.53125, -0.679688, 0.335938] ]) + model.append([ [-0.53125, -0.679688, 0.335938], [-0.554688, -0.671875, 0.28125] ]) + model.append([ [-0.554688, -0.671875, 0.28125], [-0.476562, -0.71875, 0.101562] ]) + model.append([ [-0.476562, -0.71875, 0.101562], [-0.375, -0.742188, 0.0625] ]) + + # Right eye + model.append([ [0.375, -0.742188, 0.0625], [0.476562, -0.71875, 0.101562] ]) + model.append([ [0.476562, -0.71875, 0.101562], [0.578125, -0.679688, 0.195312] ]) + model.append([ [0.578125, -0.679688, 0.195312], [0.585938, -0.6875, 0.289062] ]) + model.append([ [0.585938, -0.6875, 0.289062], [0.5625, -0.695312, 0.351562] ]) + model.append([ [0.5625, -0.695312, 0.351562], [0.421875, -0.773438, 0.398438] ]) + model.append([ [0.421875, -0.773438, 0.398438], [0.335938, -0.75, 0.40625] ]) + model.append([ [0.335938, -0.75, 0.40625], [0.28125, -0.765625, 0.398438] ]) + model.append([ [0.28125, -0.765625, 0.398438], [0.210938, -0.78125, 0.375] ]) + model.append([ [0.210938, -0.78125, 0.375], [0.179688, -0.78125, 0.296875] ]) + model.append([ [0.179688, -0.78125, 0.296875], [0.171875, -0.78125, 0.21875] ]) + model.append([ [0.171875, -0.78125, 0.21875], [0.1875, -0.773438, 0.15625] ]) + model.append([ [0.1875, -0.773438, 0.15625], [0.226562, -0.78125, 0.109375] ]) + model.append([ [0.226562, -0.78125, 0.109375], [0.375, -0.742188, 0.0625] ]) + + for line_model in model: + start_line = vec_augment(line_model[0]) + end_line = vec_augment(line_model[1]) + + start_line_px = project(start_line, cam_K, c_T_o_est).astype(np.int32) + end_line_px = project(end_line, cam_K, c_T_o_est).astype(np.int32) + + cv.line(img, start_line_px, end_line_px, (0,0,255), 2) + + cv.imwrite("/tmp/img_result.png", img) + + + # Mosaic + nb_data_all = nb_data + 1 + center_idx = nb_data_all // 2 + mosaic_root = math.ceil(math.sqrt(nb_data_all)) + print(f"center_idx={center_idx} ; mosaic_root={mosaic_root}") + + img0 = npz_data["image_{:04d}".format(0)] + mosaic_img = np.zeros((mosaic_root*img0.shape[0], mosaic_root*img0.shape[1], 3), dtype=np.uint8) + print(f"mosaic_img={mosaic_img.shape}") + idx_shift = 0 + for idx1 in range(mosaic_root): + for idx2 in range(mosaic_root): + idx = idx1*mosaic_root + idx2 + idx_shift + + if idx == center_idx: + mosaic_img[idx1*img.shape[0]:(idx1+1)*img.shape[0], idx2*img.shape[1]:(idx2+1)*img.shape[1],] = img + idx_shift = 1 + idx = idx1*mosaic_root + idx2 + idx_shift + + img_load = npz_data["image_{:04d}".format(idx)] + idx1_ = idx // mosaic_root + idx2_ = idx % mosaic_root + mosaic_img[idx1_*img.shape[0]:(idx1_+1)*img.shape[0], idx2_*img.shape[1]:(idx2_+1)*img.shape[1],] = img_load + + elif idx < nb_data_all: + idx1_ = idx // mosaic_root + idx2_ = idx % mosaic_root + img_load = npz_data["image_{:04d}".format(idx-idx_shift)] + mosaic_img[idx1_*img.shape[0]:(idx1_+1)*img.shape[0], idx2_*img.shape[1]:(idx2_+1)*img.shape[1],] = img_load + + if display_after_ransac: + for idx_inlier in range(0, len(inliers), 1): + idx = inliers[idx_inlier,0] + idx_img = img_idx[idx] + if idx_img >= center_idx: + idx_img += 1 + + shift_w = idx_img % mosaic_root + shift_h = idx_img // mosaic_root + + pt_2d_1 = np.array(pts_2d_train[idx]).astype(np.int32) + pt_2d_1[0] += img0.shape[1]*shift_w + pt_2d_1[1] += img0.shape[0]*shift_h + cv.drawMarker(mosaic_img, pt_2d_1.astype(np.int32), (255,0,0), cv.MARKER_CROSS, 6, 1) + + pt_2d_2 = np.array(pts_2d[idx]).astype(np.int32) + pt_2d_2[0] += img0.shape[1] * (center_idx % mosaic_root) + pt_2d_2[1] += img0.shape[0] * (center_idx // mosaic_root) + + color = (0,255,0) + if random_colors: + color = (random.randrange(256), random.randrange(256), random.randrange(256)) + cv.line(mosaic_img, pt_2d_1, pt_2d_2, color, 1) + else: + for idx in range(0, len(pts_2d_train), 1): + idx_img = img_idx[idx] + if idx_img >= center_idx: + idx_img += 1 + + shift_w = idx_img % mosaic_root + shift_h = idx_img // mosaic_root + + pt_2d_1 = np.array(pts_2d_train[idx]).astype(np.int32) + pt_2d_1[0] += img0.shape[1]*shift_w + pt_2d_1[1] += img0.shape[0]*shift_h + cv.drawMarker(mosaic_img, pt_2d_1.astype(np.int32), (255,0,0), cv.MARKER_CROSS, 6, 1) + + pt_2d_2 = np.array(pts_2d[idx]).astype(np.int32) + pt_2d_2[0] += img0.shape[1] * (center_idx % mosaic_root) + pt_2d_2[1] += img0.shape[0] * (center_idx // mosaic_root) + cv.line(mosaic_img, pt_2d_1, pt_2d_2, (0,255,0), 1) + + cv.imwrite("/tmp/img_mosaic.png", mosaic_img) + + print() + print(f"rvec={rvec.T}") + print(f"rvec_gt={rvec_gt.T}") + + print() + print(f"rot_3x3=\n{rot_3x3}") + print(f"rot_3x3_gt=\n{c_T_o_gt[:3,:3]}") + + print() + print(f"tvec={tvec.T}") + print(f"tvec_gt={tvec_gt}") + + print() + print(f"c_T_o_est:\n{c_T_o_est}") + print(f"c_T_o_gt:\n{c_T_o_gt}") diff --git a/script/Blender/Keypoints_extraction/render_still_image_Suzanne.py b/script/Blender/Keypoints_extraction/render_still_image_Suzanne.py new file mode 100755 index 0000000000..14ae6fc3b1 --- /dev/null +++ b/script/Blender/Keypoints_extraction/render_still_image_Suzanne.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2024 ViSP contributor +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import bpy +import os +import numpy as np + +# https://www.rojtberg.net/1601/from-blender-to-opencv-camera-and-back/ +def get_calibration_matrix_K_from_blender(camera_name): + # get the relevant data + cam = bpy.data.objects[camera_name].data + scene = bpy.context.scene + # assume image is not scaled + assert scene.render.resolution_percentage == 100 + # assume angles describe the horizontal field of view + assert cam.sensor_fit != 'VERTICAL' + + f_in_mm = cam.lens + sensor_width_in_mm = cam.sensor_width + + w = scene.render.resolution_x + h = scene.render.resolution_y + + pixel_aspect = scene.render.pixel_aspect_y / scene.render.pixel_aspect_x + + f_x = f_in_mm / sensor_width_in_mm * w + f_y = f_x * pixel_aspect + + # yes, shift_x is inverted. WTF blender? + c_x = w * (0.5 - cam.shift_x) + # and shift_y is still a percentage of width.. + c_y = h * 0.5 + w * cam.shift_y + + K = np.array([ + [f_x, 0, c_x], + [0, f_y, c_y], + [0, 0, 1] + ]) + + return K + +def inv_transfo(w_T_o): + R = w_T_o[:3,:3] + t = w_T_o[:3,3] + + o_T_w = np.eye(4) + o_T_w[:3,:3] = R.T + o_T_w[:3,3] = -R.T @ t + + return o_T_w + +def get_camera_pose(cameraName, objectName): + M = np.eye(4) + M[1][1] = -1 + M[2][2] = -1 + + cam = bpy.data.objects[cameraName] + object_pose = bpy.data.objects[objectName].matrix_world + + # Normalize orientation with respect to the scale + object_pose_normalized_blender = object_pose.copy() + object_orientation_normalized_blender = object_pose_normalized_blender.to_3x3().normalized() + for i in range(3): + for j in range(3): + object_pose_normalized_blender[i][j] = object_orientation_normalized_blender[i][j] + + w_T_o = np.array(object_pose_normalized_blender) + print(f"object_pose_normalized:\n{object_pose_normalized_blender}") + + w_T_c = np.array(cam.matrix_world) + print(f"w_T_c:\n{w_T_c}") + + c_T_w = np.array(cam.matrix_world.inverted()) + print(f"c_T_w:\n{c_T_w}") + + c_T_w2 = inv_transfo(w_T_c) + print(f"c_T_w2:\n{c_T_w2}") + + c_T_o = M @ c_T_w @ w_T_o + print(f"c_T_o:\n{c_T_o}") + + return c_T_o + +if __name__ == "__main__": + K = get_calibration_matrix_K_from_blender("Camera") + print(f"Camera Matrix:\n", K) + + output_dir = "/tmp" + output_image_name = "blender_render.png" + + output_image_filepath = os.path.join(output_dir, output_image_name) + bpy.context.scene.render.filepath = output_image_filepath + bpy.ops.render.render(write_still = True) + + camera_name = "Camera" + object_name = "Suzanne" + c_T_o = get_camera_pose(camera_name, object_name) + + output_pose_name = "c_T_o.txt" + output_pose_filepath = os.path.join(output_dir, output_pose_name) + np.savetxt(output_pose_filepath, c_T_o) \ No newline at end of file