diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml new file mode 100644 index 0000000000000..a557b8b47db10 --- /dev/null +++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml index fa6bce0e38e55..05d0d1beae8e1 100644 --- a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml +++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml @@ -9,7 +9,7 @@ - + @@ -18,6 +18,7 @@ + @@ -91,6 +92,12 @@ + + + + + + @@ -116,6 +123,7 @@ + diff --git a/launch/tier4_localization_launch/package.xml b/launch/tier4_localization_launch/package.xml index b3dc75bbf79cc..69c951db79b58 100644 --- a/launch/tier4_localization_launch/package.xml +++ b/launch/tier4_localization_launch/package.xml @@ -27,6 +27,7 @@ geo_pose_projector gyro_odometer ndt_scan_matcher + nerf_based_localizer pose_estimator_arbiter pose_initializer pose_instability_detector diff --git a/localization/nerf_based_localizer/CMakeLists.txt b/localization/nerf_based_localizer/CMakeLists.txt new file mode 100644 index 0000000000000..5cabbb500575f --- /dev/null +++ b/localization/nerf_based_localizer/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.14) +project(nerf_based_localizer) + +find_package(ament_cmake_auto REQUIRED) +ament_auto_find_build_dependencies() + +find_package(CUDA REQUIRED) +find_package(CUDAToolkit REQUIRED) + +if(NOT ${CUDA_FOUND}) + message(WARNING "cuda is not found, so the nerf_based_localizer package won't be built.") + return() +endif() + +# libtorch +set(CMAKE_PREFIX_PATH ${DCMAKE_PREFIX_PATH}$ ${CMAKE_SOURCE_DIR}/external/libtorch/) +find_package(Torch PATHS ${CMAKE_SOURCE_DIR}/external/libtorch NO_DEFAULT_PATH) + +# skip if libtorch is not found +if(NOT Torch_FOUND) + message(WARNING "libtorch is not found, so the nerf_based_localizer package won't be built.") + return() +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -w") +link_directories(${CMAKE_SOURCE_DIR}/external/libtorch/lib/) +include_directories(${TORCH_INCLUDE_DIRS}) + +# OpenCV +find_package(OpenCV REQUIRED) +link_directories(${OpenCV_LIBRARIES}) +include_directories(${OpenCV_INCLUDE_DIRS}) + +file(GLOB_RECURSE SRC_ALL + src/*.cpp + src/*.cu) + +ament_auto_add_executable(nerf_based_localizer ${SRC_ALL}) +ament_export_dependencies(CUDA) +target_link_libraries(nerf_based_localizer + ${TORCH_LIBRARIES} + ${TORCH_CUDA_LIBRARIES} + ${OpenCV_LIBS} + stdc++fs +) + +ament_auto_package( + INSTALL_TO_SHARE + launch + config +) diff --git a/localization/nerf_based_localizer/ORIGINAL_LICENSE b/localization/nerf_based_localizer/ORIGINAL_LICENSE new file mode 100644 index 0000000000000..f2473c3b78569 --- /dev/null +++ b/localization/nerf_based_localizer/ORIGINAL_LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2023] [Peng Wang] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/localization/nerf_based_localizer/README.md b/localization/nerf_based_localizer/README.md new file mode 100644 index 0000000000000..1d47df61ffc92 --- /dev/null +++ b/localization/nerf_based_localizer/README.md @@ -0,0 +1,131 @@ +# NeRF Based Localizer + +NeRFBasedLocalizer is a vision-based localization package. + +![example_of_result](./doc_image/example_of_result.png) + +## Node diagram + +![node diagram](./doc_image/node_diagram.drawio.svg) + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| :-------------- | :---------------------------------------------- | :------------------------------- | +| `~/input/pose` | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction. | +| `~/input/image` | `sensor_msgs::msg::Image` | Camera Image | + +### Output + +| Name | Type | Description | +| :------------------------------ | :---------------------------------------------- | :----------------------------- | +| `~/output/pose` | `geometry_msgs::msg::PoseStamped` | estimated pose | +| `~/output/pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | estimated pose with covariance | +| `~/output/score` | `std_msgs::msg::Float32` | estimated score of nerf | +| `~/output/image` | `sensor_msgs::msg::Image` | estimated image of nerf | + +## How to build + +Download libtorch and extract it to the `nerf_based_localizer/external` directory. + +For example, + +```bash +cd nerf_based_localizer/external + +# v1.13.1 +wget https://download.pytorch.org/libtorch/cu117/libtorch-cxx11-abi-shared-with-deps-1.13.1%2Bcu117.zip +unzip ./libtorch-cxx11-abi-shared-with-deps-1.13.1+cu117.zip + +# v2.1.2 +wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcu121.zip +unzip ./libtorch-cxx11-abi-shared-with-deps-2.1.2+cu121.zip +``` + +If libtorch is prepared, `nerf_based_localizer` can be built as a normal package. +Otherwise, building `nerf_based_localizer` will be skipped. + +## How to launch + +Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`. + +When launching Autoware, set `nerf` for `pose_source`. + +```bash +ros2 launch autoware_launch ... \ + pose_source:=nerf \ + ... +``` + +For example, to run `logging_simulator`, the command is as follows. + +```bash +ros2 launch autoware_launch logging_simulator.launch.xml \ + map_path:=/path/to/map \ + pose_source:=nerf \ + vehicle_model:=sample_vehicle \ + sensor_model:=awsim_sensor_kit \ + perception:=false \ + planning:=false \ + control:=false +``` + +[This trained weights](https://drive.google.com/file/d/1w4hLw7aJ_o6OM8XCCXyNPZTGIy4ah9aZ/view?usp=sharing) and [this rosbag data](https://drive.google.com/file/d/1uMVwQQFcfs8JOqfoA1FqfH_fLPwQ71jK/view) can be used as sample data. + +## How to train + +### Prepare training data + +Use `prepare_data.py`. + +```bash +python3 prepare_data.py /path/to/rosbag /path/to/prepared_data/ +``` + +The rosbag must contain the following topics. + +| Topic name | Message type | Description | +| :------------------------------------------------------------------- | :---------------------------------------------- | :------------------------------ | +| `/tf_static` | `tf2_msgs::msg::TFMessage` | tf_static | +| `/localization/pose_twist_fusion_filter/biased_pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction | +| `/sensing/camera/traffic_light/image_raw` | `sensor_msgs::msg::Image` | Camera Image | +| `/sensing/camera/traffic_light/camera_info` | `sensor_msgs::msg::CameraInfo` | Camera Info | + +For example, the following rosbag obtained from AWSIM can be used. + + + +### Execute training + +Use training_tool. + +```bash +cd nerf_based_localizer/training_tool/script +./build_and_exec_training.sh /path/to/result_dir/ /path/to/prepared_data/ +``` + +Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`. + +## Principle + +[NeRF](https://www.matthewtancik.com/nerf), standing for Neural Radiance Fields, presents a novel approach to synthesize novel views of a scene by leveraging the power of neural networks. It was introduced with an aim to handle the challenges of view synthesis, which includes creating novel, previously unseen views of a 3D scene given a sparse set of input photographs. + +Training Phase: The model is trained with a set of 2D images of a 3D scene and their corresponding camera parameters (position and orientation). The neural network learns to predict the color and transparency of rays cast through the scene, effectively learning a representation of the 3D scene. The objective is to minimize the difference between the rendered images and the input images. + +Inference Phase: After training, NeRF synthesizes novel views of the scene by sampling and summing the colors of volumetric points along the rays cast from a new camera viewpoint. The synthesized images exhibit high-quality view synthesis even under significant viewpoint changes. + +### Application in Localization + +Implementing NeRF for localization involves utilizing the learned 3D scene representation to estimate the position and orientation of a camera (or observer) in the scene. By comparing the synthesized views and the actual camera view, the algorithm iteratively refines the estimated camera parameters to minimize the difference between the rendered and actual views. + +This approach unlocks the potential to achieve accurate and robust self-localization in various environments, allowing devices and robots to comprehend their position and orientation within a previously learned 3D space. + +## Acknowledgement + +The code for this package is based on [F2-NeRF](https://github.com/Totoro97/f2-nerf) with significant code changes. + +The license of F2-NeRF is Apache License 2.0: . + +The license file is also copied to this directory as [ORIGINAL_LICENSE](./ORIGINAL_LICENSE). diff --git a/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml new file mode 100644 index 0000000000000..eedf165cac55a --- /dev/null +++ b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml @@ -0,0 +1,23 @@ +/**: + ros__parameters: + train_result_dir: "/path/to/train_result_dir" + optimization_mode: 1 # 0: random_search, 1: differential + + # if optimization_mode is 0, the following parameters are used + particle_num: 50 + render_pixel_num: 128 + noise_position_x: 0.10 # [m] + noise_position_y: 0.10 # [m] + noise_position_z: 0.05 # [m] + noise_rotation_x: 0.05 # [deg] + noise_rotation_y: 0.05 # [deg] + noise_rotation_z: 0.10 # [deg] + + # if optimization_mode is 1, the following parameters are used + iteration_num: 1 + learning_rate: 0.0001 + + # The following parameters are used in both optimization_mode 0 and 1 + output_covariance: 0.000225 + resize_factor: 20 + sample_num_per_ray: 1024 diff --git a/localization/nerf_based_localizer/doc_image/example_of_result.png b/localization/nerf_based_localizer/doc_image/example_of_result.png new file mode 100644 index 0000000000000..0a29719e937d1 Binary files /dev/null and b/localization/nerf_based_localizer/doc_image/example_of_result.png differ diff --git a/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg new file mode 100644 index 0000000000000..e10d00335b71d --- /dev/null +++ b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg @@ -0,0 +1,63 @@ + + + + + + + NeRF Based + Localizer + + + + Other Autoware + packages + + + + + + /twist_with_covariance + + + + + + /image + + + + + + /pose_with_covariance + + + + /nerf_based_localizer + + + + /ekf_localizer + + + + + + /tf_static + + + + + + /ekf_pose_with_covariance + + + diff --git a/localization/nerf_based_localizer/external/.gitignore b/localization/nerf_based_localizer/external/.gitignore new file mode 100644 index 0000000000000..d6b7ef32c8478 --- /dev/null +++ b/localization/nerf_based_localizer/external/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml new file mode 100644 index 0000000000000..77551a5bbe7b0 --- /dev/null +++ b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/localization/nerf_based_localizer/package.xml b/localization/nerf_based_localizer/package.xml new file mode 100644 index 0000000000000..bf68bd690df8b --- /dev/null +++ b/localization/nerf_based_localizer/package.xml @@ -0,0 +1,40 @@ + + + + nerf_based_localizer + 0.1.0 + The nerf_based_localizer package + Shintaro Sakoda + Apache License 2.0 + Shintaro Sakoda + + ament_cmake_auto + autoware_cmake + + autoware_map_msgs + autoware_universe_utils + diagnostic_msgs + fmt + geometry_msgs + libpcl-all-dev + nav_msgs + rclcpp + sensor_msgs + std_msgs + std_srvs + tf2 + tf2_eigen + tf2_geometry_msgs + tf2_ros + tf2_sensor_msgs + tier4_debug_msgs + tier4_localization_msgs + visualization_msgs + + ament_cmake_cppcheck + ament_lint_auto + + + ament_cmake + + diff --git a/localization/nerf_based_localizer/src/main.cpp b/localization/nerf_based_localizer/src/main.cpp new file mode 100644 index 0000000000000..ca9b12a47c0e5 --- /dev/null +++ b/localization/nerf_based_localizer/src/main.cpp @@ -0,0 +1,24 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "nerf_based_localizer.hpp" + +#include + +int main(int argc, char * argv[]) +{ + rclcpp::init(argc, argv); + std::shared_ptr node = std::make_shared(); + rclcpp::spin(node); + rclcpp::shutdown(); +} diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp new file mode 100644 index 0000000000000..a856542b6be3d --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp @@ -0,0 +1,38 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cpp +// +// Created by ppwang on 2022/10/5. +// + +#include "CustomOps.hpp" + +namespace torch::autograd +{ + +variable_list TruncExp::forward(AutogradContext * ctx, Tensor input) +{ + ctx->save_for_backward({input}); + return {torch::exp(input)}; +} + +variable_list TruncExp::backward(AutogradContext * ctx, variable_list grad_output) +{ + Tensor x = ctx->get_saved_variables()[0]; + return {grad_output[0] * torch::exp(x.clamp(-100.f, 5.f))}; +} + +} // namespace torch::autograd diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu new file mode 100644 index 0000000000000..87e6b246b0686 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu @@ -0,0 +1,135 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cu +// +// Created by ppwang on 2023/3/17. +// + +#include "../common.hpp" +#include "../common_cuda.hpp" +#include "CustomOps.hpp" + +#define SCALE (16.f) + +using Tensor = torch::Tensor; + +__global__ void WeightVarLossForwardKernel( + int n_outs, float * weights, int * idx_start_end, float * out_vars) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + if (idx_start >= idx_end) { + out_vars[idx] = 0.f; + return; + } + float mean = 0.f; + float weight_sum = 1e-6f; + float len = SCALE; + for (int i = 0; i + idx_start < idx_end; i++) { + mean += weights[i + idx_start] * (float(i) / len); + weight_sum += weights[i + idx_start]; + } + mean /= weight_sum; + float variance = 0.f; + for (int i = 0; i + idx_start < idx_end; i++) { + float bias = float(i) / len - mean; + variance += weights[i + idx_start] * bias * bias; + } + out_vars[idx] = variance; +} + +__global__ void WeightVarLossBackwardKernel( + int n_outs, float * weights, int * idx_start_end, float * dl_dvars, float * dl_dw) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + if (idx_start >= idx_end) { + return; + } + float mean = 0.f; + float weight_sum = 1e-6f; + float len = SCALE; + for (int i = 0; i + idx_start < idx_end; i++) { + mean += weights[i + idx_start] * (float(i) / len); + weight_sum += weights[i + idx_start]; + } + mean /= weight_sum; + float variance = 0.f; + float tmp = 0.f; + for (int i = 0; i + idx_start < idx_end; i++) { + float bias = float(i) / len - mean; + variance += weights[i + idx_start] * bias * bias; + tmp += weights[i + idx_start] * 2.f * bias; + } + for (int i = 0; i + idx_start < idx_end; i++) { + float bias = float(i) / len - mean; + float grad = (bias * bias + tmp * -(float(i) / len) / weight_sum); + dl_dw[i + idx_start] = dl_dvars[idx] * grad; + } +} + +namespace torch::autograd +{ + +class WeightVarLoss : public Function +{ +public: + static variable_list forward(AutogradContext * ctx, Tensor weights, Tensor idx_start_end) + { + CHECK(weights.is_contiguous()); + CHECK(idx_start_end.is_contiguous()); + int n_outs = idx_start_end.size(0); + Tensor out_vars = torch::empty({n_outs}, CUDAFloat); + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + WeightVarLossForwardKernel<<>>( + n_outs, weights.data_ptr(), idx_start_end.data_ptr(), out_vars.data_ptr()); + ctx->save_for_backward({weights, idx_start_end}); + return {out_vars}; + } + + static variable_list backward(AutogradContext * ctx, variable_list grad_output) + { + Tensor dl_dvar = grad_output[0].contiguous(); + auto saved_tensors = ctx->get_saved_variables(); + Tensor & weights = saved_tensors[0]; + Tensor & idx_start_end = saved_tensors[1]; + + int n_outs = idx_start_end.size(0); + int n_all = weights.size(0); + + Tensor dl_dw = torch::empty({n_all}, CUDAFloat); + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + + WeightVarLossBackwardKernel<<>>( + n_outs, weights.data_ptr(), idx_start_end.data_ptr(), dl_dvar.data_ptr(), + dl_dw.data_ptr()); + + return {dl_dw, Tensor()}; + } +}; + +} // namespace torch::autograd + +Tensor CustomOps::WeightVar(Tensor weights, Tensor idx_start_end) +{ + return torch::autograd::WeightVarLoss::apply(weights.contiguous(), idx_start_end.contiguous())[0]; +} diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp new file mode 100644 index 0000000000000..e62aa9fd84bba --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp @@ -0,0 +1,46 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.h +// +// Created by ppwang on 2022/10/5. +// + +#ifndef NERF__CUSTOMOPS__CUSTOMOPS_HPP_ +#define NERF__CUSTOMOPS__CUSTOMOPS_HPP_ + +#include + +namespace torch::autograd +{ + +class TruncExp : public Function +{ +public: + static variable_list forward(AutogradContext * ctx, Tensor input); + + static variable_list backward(AutogradContext * ctx, variable_list grad_output); +}; + +} // namespace torch::autograd + +namespace CustomOps +{ + +torch::Tensor WeightVar(torch::Tensor weights, torch::Tensor idx_start_end); + +} + +#endif // NERF__CUSTOMOPS__CUSTOMOPS_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp new file mode 100644 index 0000000000000..8eaf748bc13ea --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp @@ -0,0 +1,21 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cpp +// +// Created by ppwang on 2023/2/11. +// + +#include "FlexOps.hpp" diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu new file mode 100644 index 0000000000000..2aee27d4b74aa --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu @@ -0,0 +1,245 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cu + +#include "../common_cuda.hpp" +#include "FlexOps.hpp" + +using Tensor = torch::Tensor; + +__global__ void FlexSumForwardKernel(int n_outs, float * val, int * idx_start_end, float * sum) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + float out_val = 0.f; + for (int i = idx_start; i < idx_end; i++) { + out_val += val[i]; + } + sum[idx] = out_val; +} + +__global__ void FlexSumBackwardKernel( + int n_outs, float * dl_dsum, int * idx_start_end, float * dl_dval) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + float fill_val = dl_dsum[idx]; + for (int i = idx_start; i < idx_end; i++) { + dl_dval[i] = fill_val; + } +} + +__global__ void FlexSumVecForwardKernel( + int n_outs, int vec_size, float * val, int * idx_start_end, float * sum) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + for (int j = 0; j < vec_size; j++) { + float out_val = 0.f; + for (int i = idx_start; i < idx_end; i++) { + out_val += val[i * vec_size + j]; + } + sum[idx * vec_size + j] = out_val; + } +} + +__global__ void FlexSumVecBackwardKernel( + int n_outs, int vec_size, float * dl_dsum, int * idx_start_end, float * dl_dval) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + for (int j = 0; j < vec_size; j++) { + float fill_val = dl_dsum[idx * vec_size + j]; + for (int i = idx_start; i < idx_end; i++) { + dl_dval[i * vec_size + j] = fill_val; + } + } +} + +__global__ void FlexAccumulateSumForwardKernel( + int n_outs, bool include_this, float * val, int * idx_start_end, float * sum) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + float out_val = 0.f; + if (include_this) { + for (int i = idx_start; i < idx_end; i++) { + out_val += val[i]; + sum[i] = out_val; + } + } else { + for (int i = idx_start; i < idx_end; i++) { + sum[i] = out_val; + out_val += val[i]; + } + } +} + +__global__ void FlexAccumulateSumBackwardKernel( + int n_outs, bool include_this, float * dl_dsum, int * idx_start_end, float * dl_dval) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_outs) return; + int idx_start = idx_start_end[idx * 2]; + int idx_end = idx_start_end[idx * 2 + 1]; + float wp = 0.f; + if (include_this) { + for (int i = idx_end - 1; i >= idx_start; i--) { + wp += dl_dsum[i]; + dl_dval[i] = wp; + } + } else { + for (int i = idx_end - 1; i >= idx_start; i--) { + dl_dval[i] = wp; + wp += dl_dsum[i]; + } + } +} + +namespace torch::autograd +{ + +class FlexSum : public Function +{ +public: + static variable_list forward(AutogradContext * ctx, Tensor val, Tensor idx_start_end) + { + CHECK(val.is_contiguous()); + CHECK(idx_start_end.is_contiguous()); + int n_outs = idx_start_end.size(0); + Tensor sum; + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + + if (val.sizes().size() == 1) { + sum = torch::empty({n_outs}, CUDAFloat); + FlexSumForwardKernel<<>>( + n_outs, val.data_ptr(), idx_start_end.data_ptr(), sum.data_ptr()); + } else { + int vec_size = val.size(1); + sum = torch::empty({n_outs, vec_size}, CUDAFloat); + FlexSumVecForwardKernel<<>>( + n_outs, vec_size, val.data_ptr(), idx_start_end.data_ptr(), + sum.data_ptr()); + } + ctx->save_for_backward({val, idx_start_end}); + return {sum}; + } + + static variable_list backward(AutogradContext * ctx, variable_list grad_output) + { + Tensor dl_dsum = grad_output[0].contiguous(); + auto saved_tensors = ctx->get_saved_variables(); + Tensor & val = saved_tensors[0]; + Tensor & idx_start_end = saved_tensors[1]; + int n_outs = idx_start_end.size(0); + int n_all = val.size(0); + + Tensor dl_dval; + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + + if (val.sizes().size() == 1) { + dl_dval = torch::empty({n_all}, CUDAFloat); + FlexSumBackwardKernel<<>>( + n_outs, dl_dsum.data_ptr(), idx_start_end.data_ptr(), + dl_dval.data_ptr()); + + } else { + int vec_size = val.size(1); + dl_dval = torch::empty({n_all, vec_size}, CUDAFloat); + FlexSumVecBackwardKernel<<>>( + n_outs, vec_size, dl_dsum.data_ptr(), idx_start_end.data_ptr(), + dl_dval.data_ptr()); + } + return {dl_dval, Tensor()}; + } +}; + +class FlexAccumulateSum : public Function +{ +public: + static variable_list forward( + AutogradContext * ctx, Tensor val, Tensor idx_start_end, torch::IValue include_this_ivalue) + { + CHECK(val.is_contiguous()); + CHECK(idx_start_end.is_contiguous()); + bool include_this = include_this_ivalue.toBool(); + int n_all = val.size(0); + int n_outs = idx_start_end.size(0); + Tensor sum = torch::empty({n_all}, CUDAFloat); + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + + FlexAccumulateSumForwardKernel<<>>( + n_outs, include_this, val.data_ptr(), idx_start_end.data_ptr(), + sum.data_ptr()); + + ctx->save_for_backward({val, idx_start_end}); + ctx->saved_data["include_this"] = include_this_ivalue; + return {sum}; + } + + static variable_list backward(AutogradContext * ctx, variable_list grad_output) + { + Tensor dl_dsum = grad_output[0].contiguous(); + + auto saved_tensors = ctx->get_saved_variables(); + bool include_this = ctx->saved_data["include_this"].toBool(); + Tensor & val = saved_tensors[0]; + Tensor & idx_start_end = saved_tensors[1]; + int n_outs = idx_start_end.size(0); + int n_all = val.size(0); + + Tensor dl_dval = torch::empty({n_all}, CUDAFloat); + dim3 grid_dim = LIN_GRID_DIM(n_outs); + dim3 block_dim = LIN_BLOCK_DIM; + + FlexAccumulateSumBackwardKernel<<>>( + n_outs, include_this, dl_dsum.data_ptr(), idx_start_end.data_ptr(), + dl_dval.data_ptr()); + + return {dl_dval, Tensor(), Tensor()}; + } +}; + +} // namespace torch::autograd + +namespace FlexOps +{ + +Tensor Sum(Tensor val, Tensor idx_start_end) +{ + return torch::autograd::FlexSum::apply(val.contiguous(), idx_start_end.contiguous())[0]; +} + +Tensor AccumulateSum(Tensor val, Tensor idx_start_end, bool include_this) +{ + return torch::autograd::FlexAccumulateSum::apply( + val.contiguous(), idx_start_end.contiguous(), torch::IValue(include_this))[0]; +} + +} // namespace FlexOps diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp new file mode 100644 index 0000000000000..d8171d06d7798 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp @@ -0,0 +1,36 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.h +// +// Created by ppwang on 2023/2/11. +// + +#ifndef NERF__CUSTOMOPS__FLEXOPS_HPP_ +#define NERF__CUSTOMOPS__FLEXOPS_HPP_ + +#include "../common.hpp" + +#include + +namespace FlexOps +{ + +torch::Tensor Sum(torch::Tensor val, torch::Tensor idx_start_end); +torch::Tensor AccumulateSum(torch::Tensor val, torch::Tensor idx_start_end, bool include_this); + +} // namespace FlexOps + +#endif // NERF__CUSTOMOPS__FLEXOPS_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp new file mode 100644 index 0000000000000..5af970d6b98f9 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp @@ -0,0 +1,21 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cpp +// +// Created by ppwang on 2023/3/27. +// + +#include "Scatter.hpp" diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu new file mode 100644 index 0000000000000..05eba608c858d --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu @@ -0,0 +1,149 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cu +// +// Created by ppwang on 2023/3/27. +// + +#include "../common_cuda.hpp" +#include "Scatter.hpp" + +using Tensor = torch::Tensor; + +__global__ void ScatterAddFuncForward( + int n_all, int n_channels, float * emb, int * scatter_idx, float * to_add) +{ + int pts_idx = blockIdx.x * blockDim.x + threadIdx.x; + int c_idx = blockIdx.y; + if (pts_idx >= n_all || c_idx >= n_channels) return; + int emb_idx = scatter_idx[pts_idx]; + to_add = to_add + pts_idx * n_channels + c_idx; + emb = emb + emb_idx * n_channels + c_idx; + to_add[0] += emb[0]; +} + +__global__ void ScatterAddFuncBackwardBlock( + int n_emb, int n_blocks, int n_all, int block_size, int n_channels, float * dl_demb_pool, + int * scatter_idx, float * dl_dsum) +{ + int block_idx = blockIdx.x * blockDim.x + threadIdx.x; + int emb_idx = blockIdx.y; + if (emb_idx >= n_emb || block_idx >= n_blocks || block_idx * block_size >= n_all) return; + scatter_idx = scatter_idx + block_idx * block_size; + dl_dsum = dl_dsum + (block_idx * block_size) * n_channels; + dl_demb_pool = dl_demb_pool + emb_idx * n_blocks * n_channels + block_idx * n_channels; + int block_min = block_size; + if (block_idx * block_size + block_size > n_all) { + block_min = n_all - (block_idx * block_size); + } + for (int i = 0; i < block_min; i++) { + if (scatter_idx[i] == emb_idx) { + for (int c = 0; c < n_channels; c++) { + dl_demb_pool[c] += dl_dsum[i * n_channels + c]; + } + } + } +} + +namespace torch::autograd +{ + +class ScatterAddFunc : public Function +{ +public: + static variable_list forward(AutogradContext * ctx, Tensor emb, Tensor idx, Tensor to_add) + { + emb = emb.contiguous(); + idx = idx.contiguous(); + int n_all = idx.size(0); + int n_channels = emb.size(1); + CHECK(n_all == to_add.size(0)); + CHECK(n_channels == to_add.size(1)); + + const unsigned thread_cap = 512; + dim3 grid_dim = {unsigned(n_all + thread_cap - 1) / thread_cap, unsigned(n_channels), 1}; + dim3 block_dim = {unsigned(thread_cap), 1, 1}; + + Tensor sum = to_add.clone().contiguous(); + ScatterAddFuncForward<<>>( + n_all, n_channels, emb.data_ptr(), idx.data_ptr(), sum.data_ptr()); + ctx->save_for_backward({emb, idx}); + return {sum}; + } + + static variable_list backward(AutogradContext * ctx, variable_list grad_output) + { + Tensor dl_dsum = grad_output[0].contiguous(); + auto saved_tensors = ctx->get_saved_variables(); + Tensor & emb = saved_tensors[0]; + Tensor & idx = saved_tensors[1]; + + int n_all = idx.size(0); + int n_channels = dl_dsum.size(1); + + int n_emb = emb.size(0); + int block_size = (int(std::sqrt(n_all + 1024)) >> 5) << 5; + int n_blocks = (n_all + block_size - 1) / block_size; + + const unsigned thread_cap = 512; + dim3 grid_dim = {unsigned(n_blocks + thread_cap - 1) / thread_cap, unsigned(n_emb), 1}; + dim3 block_dim = {unsigned(thread_cap), 1, 1}; + + Tensor dl_demb_pool = torch::zeros({n_emb, n_blocks, n_channels}, CUDAFloat); + ScatterAddFuncBackwardBlock<<>>( + n_emb, n_blocks, n_all, block_size, n_channels, dl_demb_pool.data_ptr(), + idx.data_ptr(), dl_dsum.data_ptr()); + + Tensor dl_demb = torch::sum(dl_demb_pool, 1, false); + Tensor dl_dto_add = dl_dsum.clone(); + + return {dl_demb, Tensor(), dl_dto_add}; + } +}; + +} // namespace torch::autograd + +Tensor CustomOps::ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add) +{ + return torch::autograd::ScatterAddFunc::apply(emb, idx, to_add)[0]; +} + +__global__ void ScatterIdxKernal(int n_rays, int * idx_start_end, int * emb_idx, int * all_emb_idx) +{ + int ray_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (ray_idx >= n_rays) return; + int idx_start = idx_start_end[ray_idx * 2]; + int idx_end = idx_start_end[ray_idx * 2 + 1]; + + int fill = emb_idx[ray_idx]; + for (int i = idx_start; i < idx_end; i++) { + all_emb_idx[i] = fill; + } +} + +Tensor CustomOps::ScatterIdx(int n_all_pts, Tensor idx_start_end, Tensor emb_idx) +{ + Tensor ret = + torch::empty({n_all_pts}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA)); + int n_rays = idx_start_end.size(0); + dim3 grid_dim = LIN_GRID_DIM(n_rays); + dim3 block_dim = LIN_BLOCK_DIM; + + ScatterIdxKernal<<>>( + n_rays, idx_start_end.data_ptr(), emb_idx.data_ptr(), ret.data_ptr()); + + return ret; +} diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp new file mode 100644 index 0000000000000..fafc79b616db9 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp @@ -0,0 +1,39 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.h +// +// Created by ppwang on 2023/3/27. +// + +#ifndef NERF__CUSTOMOPS__SCATTER_HPP_ +#define NERF__CUSTOMOPS__SCATTER_HPP_ + +#include "../common.hpp" + +#include + +class Scatter +{ +}; + +namespace CustomOps +{ + +torch::Tensor ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add); +torch::Tensor ScatterIdx(int n_all_pts, torch::Tensor idx_start_end, torch::Tensor emb_idx); +} // namespace CustomOps + +#endif // NERF__CUSTOMOPS__SCATTER_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/common.hpp b/localization/nerf_based_localizer/src/nerf/common.hpp new file mode 100644 index 0000000000000..107be3081dac5 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/common.hpp @@ -0,0 +1,29 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h +// +// Created by ppwang on 2022/5/8. +// + +#ifndef NERF__COMMON_HPP_ +#define NERF__COMMON_HPP_ + +#include + +using Slc = torch::indexing::Slice; +const auto CUDAFloat = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA); + +#endif // NERF__COMMON_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/common_cuda.hpp b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp new file mode 100644 index 0000000000000..c80d4c458c39e --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp @@ -0,0 +1,39 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h +// +// Created by ppwang on 2022/5/8. +// + +#ifndef NERF__COMMON_CUDA_HPP_ +#define NERF__COMMON_CUDA_HPP_ + +#include + +inline unsigned int DivUp(const unsigned int x, const unsigned int y) +{ + return (x + y - 1) / y; +} + +constexpr unsigned int THREAD_CAP = 512; +constexpr dim3 LIN_BLOCK_DIM = {THREAD_CAP, 1, 1}; + +inline dim3 LIN_GRID_DIM(const int x) +{ + return dim3{DivUp(x, THREAD_CAP), 1, 1}; +} + +#endif // NERF__COMMON_CUDA_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/dataset.cpp b/localization/nerf_based_localizer/src/nerf/dataset.cpp new file mode 100644 index 0000000000000..267426ec3d6c1 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/dataset.cpp @@ -0,0 +1,195 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.cpp +// +// Created by ppwang on 2022/5/7. +// +#include "dataset.hpp" + +#include "stop_watch.hpp" +#include "utils.hpp" + +#include + +#include +#include + +#include +#include + +using Tensor = torch::Tensor; + +namespace fs = std::experimental::filesystem::v1; + +Dataset::Dataset(const std::string & data_path) +{ + ScopeWatch dataset_watch("Dataset::Dataset"); + + std::cout << "data_path = " << data_path << std::endl; + + // Load camera pose + CHECK(fs::exists(data_path + "/cams_meta.tsv")); + { + std::ifstream ifs(data_path + "/cams_meta.tsv"); + std::string line; + std::getline(ifs, line); // header + std::vector poses_vec, intrinsics_vec, dist_params_vec; + while (std::getline(ifs, line)) { + std::istringstream iss(line); + std::vector tokens; + std::string token; + while (std::getline(iss, token, '\t')) { + tokens.push_back(token); + } + const int POSE_NUM = 12; //(3, 4) + const int INTRINSIC_NUM = 9; //(3, 3) + const int DISTORTION_NUM = 4; //(k1, k2, p1, p2) + CHECK(tokens.size() == POSE_NUM + INTRINSIC_NUM + DISTORTION_NUM); + Tensor pose = torch::zeros({3, 4}, torch::kFloat32); + for (int i = 0; i < POSE_NUM; i++) { + pose.index_put_({i / 4, i % 4}, std::stof(tokens[i])); + } + pose = pose.reshape({3, 4}); + poses_vec.push_back(pose); + + Tensor intrinsic = torch::zeros({3, 3}, torch::kFloat32); + for (int i = 0; i < INTRINSIC_NUM; i++) { + intrinsic.index_put_({i / 3, i % 3}, std::stof(tokens[POSE_NUM + i])); + } + intrinsic = intrinsic.reshape({3, 3}); + intrinsics_vec.push_back(intrinsic); + + Tensor dist_param = torch::zeros({4}, torch::kFloat32); + for (int i = 0; i < DISTORTION_NUM; i++) { + dist_param.index_put_({i}, std::stof(tokens[POSE_NUM + INTRINSIC_NUM + i])); + } + dist_params_vec.push_back(dist_param); + } + + n_images = poses_vec.size(); + poses = torch::stack(poses_vec, 0).contiguous().to(torch::kCUDA); + intrinsics = torch::stack(intrinsics_vec, 0).contiguous().to(torch::kCUDA); + dist_params = torch::stack(dist_params_vec, 0).contiguous().to(torch::kCUDA); + } + + // Normalize scene + { + Tensor cam_pos = poses.index({Slc(), Slc(0, 3), 3}).clone(); + center = cam_pos.mean(0, false); + Tensor bias = cam_pos - center.unsqueeze(0); + radius = torch::linalg_norm(bias, 2, -1, false).max().item(); + cam_pos = (cam_pos - center.unsqueeze(0)) / radius; + poses.index_put_({Slc(), Slc(0, 3), 3}, cam_pos); + poses = poses.contiguous(); + } + + std::vector images_vec; + // Load images + { + ScopeWatch watch("LoadImages"); + const std::vector image_paths = glob_image_paths(data_path + "/images/"); + for (int i = 0; i < n_images; i++) { + const std::string image_path = image_paths[i]; + images_vec.push_back(utils::read_image_tensor(image_path).to(torch::kCPU)); + } + } + + std::cout << "Number of images: " << n_images << std::endl; + + height = images_vec[0].size(0); + width = images_vec[0].size(1); + images = torch::stack(images_vec, 0).contiguous(); +} + +void Dataset::save_inference_params(const std::string & train_result_dir) const +{ + std::ofstream ofs(train_result_dir + "/inference_params.yaml"); + ofs << std::fixed; + ofs << "%YAML 1.2" << std::endl; + ofs << "---" << std::endl; + ofs << "n_images: " << n_images << std::endl; + ofs << "height: " << height << std::endl; + ofs << "width: " << width << std::endl; + + ofs << "intrinsic: ["; + ofs << intrinsics[0][0][0].item() << ", "; + ofs << intrinsics[0][0][1].item() << ", "; + ofs << intrinsics[0][0][2].item() << "," << std::endl; + ofs << " "; + ofs << intrinsics[0][1][0].item() << ", "; + ofs << intrinsics[0][1][1].item() << ", "; + ofs << intrinsics[0][1][2].item() << "," << std::endl; + ofs << " "; + ofs << intrinsics[0][2][0].item() << ", "; + ofs << intrinsics[0][2][1].item() << ", "; + ofs << intrinsics[0][2][2].item() << "]" << std::endl; + + ofs << "normalizing_center: [" << center[0].item(); + ofs << ", " << center[1].item(); + ofs << ", " << center[2].item() << "]" << std::endl; + ofs << "normalizing_radius: " << radius << std::endl; +} + +Rays Dataset::get_all_rays_of_camera(int idx) +{ + int H = height; + int W = width; + Tensor ii = torch::linspace(0.f, H - 1.f, H, CUDAFloat); + Tensor jj = torch::linspace(0.f, W - 1.f, W, CUDAFloat); + auto ij = torch::meshgrid({ii, jj}, "ij"); + Tensor i = ij[0].reshape({-1}); + Tensor j = ij[1].reshape({-1}); + + auto [rays_o, rays_d] = get_rays_from_pose( + poses[idx].unsqueeze(0), intrinsics[idx].unsqueeze(0), torch::stack({i, j}, -1)); + return {rays_o, rays_d}; +} + +std::tuple Dataset::sample_random_rays(int batch_size) +{ + const auto CPULong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU); + Tensor cam_indices = torch::randint(n_images, {batch_size}, CPULong); + Tensor i = torch::randint(0, height, batch_size, CPULong); + Tensor j = torch::randint(0, width, batch_size, CPULong); + Tensor ij = torch::stack({i, j}, -1).to(torch::kCUDA).contiguous(); + + Tensor gt_colors = images.view({-1, 3}) + .index({(cam_indices * height * width + i * width + j).to(torch::kLong)}) + .to(torch::kCUDA) + .contiguous(); + cam_indices = cam_indices.to(torch::kCUDA); + cam_indices = cam_indices.to(torch::kInt32); + ij = ij.to(torch::kInt32); + + Tensor selected_poses = torch::index_select(poses, 0, cam_indices); + Tensor selected_intrinsics = torch::index_select(intrinsics, 0, cam_indices); + auto [rays_o, rays_d] = get_rays_from_pose(selected_poses, selected_intrinsics, ij); + + return {{rays_o, rays_d}, gt_colors, cam_indices.to(torch::kInt32).contiguous()}; +} + +std::vector Dataset::glob_image_paths(const std::string & input_dir) +{ + glob_t buffer; + std::vector files; + glob((input_dir + "*.png").c_str(), 0, NULL, &buffer); + for (size_t i = 0; i < buffer.gl_pathc; i++) { + files.push_back(buffer.gl_pathv[i]); + } + globfree(&buffer); + std::sort(files.begin(), files.end()); + return files; +} diff --git a/localization/nerf_based_localizer/src/nerf/dataset.hpp b/localization/nerf_based_localizer/src/nerf/dataset.hpp new file mode 100644 index 0000000000000..7842a82befe4a --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/dataset.hpp @@ -0,0 +1,55 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.h +// +// Created by ppwang on 2022/5/7. +// + +#ifndef NERF__DATASET_HPP_ +#define NERF__DATASET_HPP_ + +#include "common.hpp" +#include "rays.hpp" + +#include + +#include +#include +#include + +struct Dataset +{ + using Tensor = torch::Tensor; + +public: + Dataset(const std::string & data_path); + + void save_inference_params(const std::string & train_result_dir) const; + + Rays get_all_rays_of_camera(int idx); + + std::tuple sample_random_rays(int batch_size); + + static std::vector glob_image_paths(const std::string & input_dir); + + int n_images; + Tensor poses, images, intrinsics, dist_params; + Tensor center; + float radius; + int height, width; +}; + +#endif // NERF__DATASET_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp new file mode 100644 index 0000000000000..64ed1a69cc647 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp @@ -0,0 +1,130 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cpp +// +// Created by ppwang on 2022/7/17. +// + +#include "hash_3d_anchored.hpp" + +#include "common.hpp" +#include "stop_watch.hpp" + +#include + +using Tensor = torch::Tensor; + +TORCH_LIBRARY(dec_hash3d_anchored, m) +{ + m.class_("Hash3DAnchoredInfo").def(torch::init()); +} + +Hash3DAnchored::Hash3DAnchored() +{ + pool_size_ = (1 << 19) * N_LEVELS; + + // Feat pool + feat_pool_ = (torch::rand({pool_size_, N_CHANNELS}, CUDAFloat) * .2f - 1.f) * 1e-4f; + feat_pool_.requires_grad_(true); + CHECK(feat_pool_.is_contiguous()); + + // Get prime numbers + auto is_prim = [](int x) { + for (int i = 2; i * i <= x; i++) { + if (x % i == 0) return false; + } + return true; + }; + + std::vector prim_selected; + int min_local_prim = 1 << 28; + int max_local_prim = 1 << 30; + + const auto CPUInt = torch::TensorOptions().dtype(torch::kInt).device(torch::kCPU); + + for (int i = 0; i < 3 * N_LEVELS; i++) { + int val; + do { + val = torch::randint(min_local_prim, max_local_prim, {1}, CPUInt).item(); + } while (!is_prim(val)); + prim_selected.push_back(val); + } + + CHECK(prim_selected.size() == 3 * N_LEVELS); + + prim_pool_ = torch::from_blob(prim_selected.data(), 3 * N_LEVELS, CPUInt).to(torch::kCUDA); + prim_pool_ = prim_pool_.reshape({N_LEVELS, 3}).contiguous(); + + bias_pool_ = (torch::rand({N_LEVELS, 3}, CUDAFloat) * 1000.f + 100.f).contiguous(); + + local_size_ = pool_size_ / N_LEVELS; + local_size_ = (local_size_ >> 4) << 4; + + // MLP + const int mlp_out_dim = 16; + mlp_ = torch::nn::Linear(N_LEVELS * N_CHANNELS, mlp_out_dim); + + register_parameter("feat_pool", feat_pool_); + register_parameter("prim_pool", prim_pool_, false); + register_parameter("bias_pool", bias_pool_); + register_module("mlp", mlp_); +} + +Tensor Hash3DAnchored::query(const Tensor & points) +{ +#ifdef PROFILE + ScopeWatch watch(__func__); +#endif + + auto info = torch::make_intrusive(); + info->hash3d_ = this; + + const float radius = 1.0f; + Tensor norm = points.norm(2, {1}, true); + Tensor mask = (norm <= radius); + Tensor x = points * mask + ~mask * (1 + radius - radius / norm) * points / norm; + + Tensor feat = + torch::autograd::Hash3DAnchoredFunction::apply(x, feat_pool_, torch::IValue(info))[0]; + Tensor output = mlp_->forward(feat); + return output; +} + +std::vector Hash3DAnchored::optim_param_groups(float lr) +{ + std::vector ret; + + { + auto opt = std::make_unique(lr); + opt->betas() = {0.9, 0.99}; + opt->eps() = 1e-15; + + std::vector params = {feat_pool_}; + ret.emplace_back(std::move(params), std::move(opt)); + } + + { + auto opt = std::make_unique(lr); + opt->betas() = {0.9, 0.99}; + opt->eps() = 1e-15; + opt->weight_decay() = 1e-6; + + std::vector params = mlp_->parameters(); + ret.emplace_back(std::move(params), std::move(opt)); + } + + return ret; +} diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu new file mode 100644 index 0000000000000..00ef8c5619416 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu @@ -0,0 +1,231 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cu +// +// Created by ppwang on 2022/7/17. +// +#include "common.hpp" +#include "common_cuda.hpp" +#include "hash_3d_anchored.hpp" + +#include + +#include + +using Tensor = torch::Tensor; + +constexpr float RES_FINE_POW_2 = 10.f; +constexpr float RES_BASE_POW_2 = 3.f; + +const auto CUDAFlex = torch::TensorOptions().dtype(torch::kFloat16).device(torch::kCUDA); +using FlexType = __half; + +__device__ inline void calculate_pos_and_w( + const Eigen::Vector3f & pt, const int local_size, const int * const prim_pool, + const int level_idx, unsigned pos[8], float w[8]) +{ + const int offset = level_idx * 3; + const unsigned pa = prim_pool[offset + 0]; + const unsigned pb = prim_pool[offset + 1]; + const unsigned pc = prim_pool[offset + 2]; + + const unsigned pos_x = static_cast(floorf(pt[0])); + const unsigned pos_y = static_cast(floorf(pt[1])); + const unsigned pos_z = static_cast(floorf(pt[2])); + pos[0] = ((pos_x * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size; + pos[1] = ((pos_x * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size; + pos[2] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size; + pos[3] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size; + pos[4] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size; + pos[5] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size; + pos[6] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size; + pos[7] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size; + + const float a = pt[0] - floorf(pt[0]); + const float b = pt[1] - floorf(pt[1]); + const float c = pt[2] - floorf(pt[2]); + + w[0] = (1.f - a) * (1.f - b) * (1.f - c); + w[1] = (1.f - a) * (1.f - b) * c; + w[2] = (1.f - a) * b * (1.f - c); + w[3] = (1.f - a) * b * c; + w[4] = a * (1.f - b) * (1.f - c); + w[5] = a * (1.f - b) * c; + w[6] = a * b * (1.f - c); + w[7] = a * b * c; +} + +template +__global__ void Hash3DAnchoredForwardKernel( + int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool, + Eigen::Vector3f * points_ptr, T * out_feat) +{ + const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x; + const int level_idx = blockIdx.y; + if (pts_idx >= n_points) { + return; + } + feat_pool = feat_pool + local_size * level_idx; + + const float mul = exp2f( + (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2); + const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]); + + float ws[8] = {}; + unsigned pos[8] = {}; + calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws); + + out_feat = out_feat + pts_idx * (N_LEVELS * N_CHANNELS); + +#pragma unroll + for (int k = 0; k < N_CHANNELS; k++) { + out_feat[level_idx * N_CHANNELS + k] = (T)(ws[0] * float(feat_pool[pos[0] * N_CHANNELS + k]) + + ws[1] * float(feat_pool[pos[1] * N_CHANNELS + k]) + + ws[2] * float(feat_pool[pos[2] * N_CHANNELS + k]) + + ws[3] * float(feat_pool[pos[3] * N_CHANNELS + k]) + + ws[4] * float(feat_pool[pos[4] * N_CHANNELS + k]) + + ws[5] * float(feat_pool[pos[5] * N_CHANNELS + k]) + + ws[6] * float(feat_pool[pos[6] * N_CHANNELS + k]) + + ws[7] * float(feat_pool[pos[7] * N_CHANNELS + k])); + } +} + +template +__global__ void Hash3DAnchoredBackwardKernel( + int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool, + Eigen::Vector3f * points_ptr, + T * grad_in, // [ n_points, n_levels, n_channels ] + T * grad_points, // [ n_points, 3 ] + T * grad_embeds // [ pool_size, n_channels ] +) +{ + const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x; + const int level_idx = blockIdx.y; + if (pts_idx >= n_points) { + return; + } + feat_pool = feat_pool + local_size * level_idx; + + const float mul = exp2f( + (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2); + const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]); + + float ws[8] = {}; + unsigned pos[8] = {}; + calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws); + + const float sign_x[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + const float sign_y[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f}; + const float sign_z[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f}; + + grad_in = grad_in + (N_LEVELS * N_CHANNELS) * pts_idx + level_idx * N_CHANNELS; + + grad_points = grad_points + pts_idx * 3; + grad_embeds = grad_embeds + local_size * level_idx; + +#pragma unroll + for (int d = 0; d < 8; d++) { + for (int k = 0; k < N_CHANNELS; k += 2) { + float w0 = (float)grad_in[k]; + float w1 = (float)grad_in[k + 1]; + if (w0 != 0.f || w1 != 0.f) { + __half2 cur_w = {(__half)(float(w0) * ws[d]), (__half)(float(w1) * ws[d])}; + atomicAdd((__half2 *)(grad_embeds + pos[d] * N_CHANNELS + k), cur_w); + } + } + for (int k = 0; k < N_CHANNELS; k++) { + const float norm = (float)(feat_pool[pos[d] * N_CHANNELS + k]) * mul * (float)grad_in[k]; + atomicAdd(grad_points + 0, (T)(sign_x[d] * norm)); + atomicAdd(grad_points + 1, (T)(sign_y[d] * norm)); + atomicAdd(grad_points + 2, (T)(sign_z[d] * norm)); + } + } +} + +namespace torch::autograd +{ + +variable_list Hash3DAnchoredFunction::forward( + AutogradContext * ctx, Tensor points, Tensor feat_pool, IValue hash3d_info) +{ + auto info_ptr = hash3d_info.toCustomClass(); + ctx->saved_data["hash3d_info"] = hash3d_info; + ctx->saved_data["points"] = points; + ctx->saved_data["feat_pool"] = feat_pool; + Tensor & prim_pool = info_ptr->hash3d_->prim_pool_; + Tensor & bias_pool = info_ptr->hash3d_->bias_pool_; + CHECK(points.device().is_cuda()); + + int n_points = points.sizes()[0]; + + dim3 block_dim = LIN_BLOCK_DIM; + dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1}; + + Tensor out_feat = torch::zeros({n_points, N_LEVELS * N_CHANNELS}, CUDAFlex); + CHECK(out_feat.is_contiguous()); + + Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous(); + + Hash3DAnchoredForwardKernel<<>>( + n_points, info_ptr->hash3d_->local_size_, + reinterpret_cast(feat_pool_true.data_ptr()), prim_pool.data_ptr(), + reinterpret_cast(bias_pool.data_ptr()), + reinterpret_cast(points.data_ptr()), + reinterpret_cast(out_feat.data_ptr())); + + return {out_feat.to(torch::kFloat32)}; +} + +variable_list Hash3DAnchoredFunction::backward(AutogradContext * ctx, variable_list grad_output) +{ + auto info_ptr = ctx->saved_data["hash3d_info"].toCustomClass(); + Tensor & points = ctx->saved_data["points"].toTensor(); // [ n_points, 3 ] + Tensor & feat_pool = ctx->saved_data["feat_pool"].toTensor(); + Tensor & prim_pool = info_ptr->hash3d_->prim_pool_; + Tensor & bias_pool = info_ptr->hash3d_->bias_pool_; + CHECK(points.device().is_cuda()); + + const float grad_scale = 128.f; + int n_points = points.sizes()[0]; + + int pool_size = info_ptr->hash3d_->pool_size_; + + dim3 block_dim = LIN_BLOCK_DIM; + dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1}; + + Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous(); + + Tensor grad_in = (grad_output[0] * grad_scale).to(torch::kFloat16).contiguous(); + + Tensor points_grad = torch::zeros({n_points, 3}, CUDAFlex); + Tensor embeds_grad = torch::zeros({pool_size, N_CHANNELS}, CUDAFlex); + + Hash3DAnchoredBackwardKernel<<>>( + n_points, info_ptr->hash3d_->local_size_, + reinterpret_cast(feat_pool_true.data_ptr()), prim_pool.data_ptr(), + reinterpret_cast(bias_pool.data_ptr()), + reinterpret_cast(points.data_ptr()), + reinterpret_cast(grad_in.data_ptr()), + reinterpret_cast(points_grad.data_ptr()), + reinterpret_cast(embeds_grad.data_ptr())); + + points_grad = points_grad.to(torch::kFloat32) / grad_scale; + embeds_grad = embeds_grad.to(torch::kFloat32) / grad_scale; + + return {points_grad, embeds_grad, Tensor()}; +} + +} // namespace torch::autograd diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp new file mode 100644 index 0000000000000..2c0cce1096b51 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp @@ -0,0 +1,70 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.h +// +// Created by ppwang on 2022/7/17. +// + +#ifndef NERF__HASH_3D_ANCHORED_HPP_ +#define NERF__HASH_3D_ANCHORED_HPP_ + +#include + +static constexpr int64_t N_CHANNELS = 2; +static constexpr int64_t N_LEVELS = 16; + +class Hash3DAnchored : public torch::nn::Module +{ + using Tensor = torch::Tensor; + +public: + Hash3DAnchored(); + + Tensor query(const Tensor & points); + + std::vector optim_param_groups(float lr); + + int pool_size_; + int local_size_; + + Tensor feat_pool_; // [ pool_size_, n_channels_ ]; + Tensor prim_pool_; // [ n_levels, 3 ]; + Tensor bias_pool_; // [ n_levels, 3 ]; + + torch::nn::Linear mlp_ = nullptr; +}; + +class Hash3DAnchoredInfo : public torch::CustomClassHolder +{ +public: + Hash3DAnchored * hash3d_ = nullptr; +}; + +namespace torch::autograd +{ + +class Hash3DAnchoredFunction : public Function +{ +public: + static variable_list forward( + AutogradContext * ctx, Tensor points, Tensor feat_pool_, IValue hash3d_info); + + static variable_list backward(AutogradContext * ctx, variable_list grad_output); +}; + +} // namespace torch::autograd + +#endif // NERF__HASH_3D_ANCHORED_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/localizer.cpp b/localization/nerf_based_localizer/src/nerf/localizer.cpp new file mode 100644 index 0000000000000..b36f9fae819cd --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/localizer.cpp @@ -0,0 +1,354 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "localizer.hpp" + +#include "dataset.hpp" +#include "stop_watch.hpp" + +#include +#include + +#include + +using Tensor = torch::Tensor; + +Localizer::Localizer(const LocalizerParam & param) : param_(param) +{ + const std::string train_result_dir = param.train_result_dir; + + cv::FileStorage inference_params( + train_result_dir + "/inference_params.yaml", cv::FileStorage::READ); + if (!inference_params.isOpened()) { + throw std::runtime_error("Failed to open " + train_result_dir + "/inference_params.yaml"); + } + + const int n_images = (int)inference_params["n_images"]; + const int train_height = (int)inference_params["height"]; + const int train_width = (int)inference_params["width"]; + + std::vector intrinsic_vector; + inference_params["intrinsic"] >> intrinsic_vector; + intrinsic_ = torch::tensor(intrinsic_vector, torch::kFloat).view({3, 3}).to(torch::kCUDA); + + std::vector normalizing_center; + inference_params["normalizing_center"] >> normalizing_center; + center_ = torch::tensor(normalizing_center, torch::kFloat).to(torch::kCUDA); + + radius_ = (float)inference_params["normalizing_radius"]; + + renderer_ = std::make_shared(n_images, param.sample_num_per_ray); + + torch::load(renderer_, train_result_dir + "/checkpoints/latest/renderer.pt"); + + // set + infer_height_ = train_height / param.resize_factor; + infer_width_ = train_width / param.resize_factor; + intrinsic_ /= param.resize_factor; + intrinsic_[2][2] = 1.0; + + /* + [[+1, 0, 0, 0 ], + [ 0, -1, 0, 0 ], + [ 0, 0, -1, 0 ], + [ 0, 0, 0, +1 ]] +*/ + axis_convert_mat_ = torch::zeros({4, 4}); + axis_convert_mat_[0][0] = +1; + axis_convert_mat_[1][1] = -1; + axis_convert_mat_[2][2] = -1; + axis_convert_mat_[3][3] = +1; + axis_convert_mat_ = axis_convert_mat_.to(torch::kCUDA); +} + +std::vector Localizer::optimize_pose_by_random_search( + Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff) +{ + torch::NoGradGuard no_grad_guard; + + std::mt19937_64 engine(std::random_device{}()); + + // Note that the order of the axes is different + // World coordinates (x: Front, y: Left, z: Up) + // NeRF coordinates (x: Right, y: Up, z: Back) + const float pos_noise_x_in_nerf = param_.noise_position_y * noise_coeff / radius_; + const float pos_noise_y_in_nerf = param_.noise_position_z * noise_coeff / radius_; + const float pos_noise_z_in_nerf = param_.noise_position_x * noise_coeff / radius_; + const float theta_x_in_nerf = param_.noise_rotation_y * noise_coeff; + const float theta_y_in_nerf = param_.noise_rotation_z * noise_coeff; + const float theta_z_in_nerf = param_.noise_rotation_x * noise_coeff; + + std::normal_distribution dist_position_x(0.0f, pos_noise_x_in_nerf); + std::normal_distribution dist_position_y(0.0f, pos_noise_y_in_nerf); + std::normal_distribution dist_position_z(0.0f, pos_noise_z_in_nerf); + std::normal_distribution dist_rotation_x(0.0f, theta_x_in_nerf); + std::normal_distribution dist_rotation_y(0.0f, theta_y_in_nerf); + std::normal_distribution dist_rotation_z(0.0f, theta_z_in_nerf); + + std::vector poses; + for (int64_t i = 0; i < particle_num; i++) { + // Sample a random translation + Tensor curr_pose = initial_pose.clone(); + if (i == 0) { + poses.push_back(curr_pose); + continue; + } + curr_pose[0][3] += dist_position_x(engine); + curr_pose[1][3] += dist_position_y(engine); + curr_pose[2][3] += dist_position_z(engine); + + // orientation + const float theta_x = dist_rotation_x(engine) * M_PI / 180.0; + const float theta_y = dist_rotation_y(engine) * M_PI / 180.0; + const float theta_z = dist_rotation_z(engine) * M_PI / 180.0; + Eigen::Matrix3f rotation_matrix_x(Eigen::AngleAxisf(theta_x, Eigen::Vector3f::UnitX())); + Eigen::Matrix3f rotation_matrix_y(Eigen::AngleAxisf(theta_y, Eigen::Vector3f::UnitY())); + Eigen::Matrix3f rotation_matrix_z(Eigen::AngleAxisf(theta_z, Eigen::Vector3f::UnitZ())); + const torch::Device dev = initial_pose.device(); + Tensor rotation_tensor_x = + torch::from_blob(rotation_matrix_x.data(), {3, 3}).to(torch::kFloat32).to(dev); + Tensor rotation_tensor_y = + torch::from_blob(rotation_matrix_y.data(), {3, 3}).to(torch::kFloat32).to(dev); + Tensor rotation_tensor_z = + torch::from_blob(rotation_matrix_z.data(), {3, 3}).to(torch::kFloat32).to(dev); + Tensor rotated = rotation_tensor_z.mm( + rotation_tensor_y.mm(rotation_tensor_x.mm(curr_pose.index({Slc(0, 3), Slc(0, 3)})))); + curr_pose.index_put_({Slc(0, 3), Slc(0, 3)}, rotated); + poses.push_back(curr_pose); + } + + const std::vector weights = evaluate_poses(poses, image_tensor); + const int pose_num = poses.size(); + + std::vector result; + for (int i = 0; i < pose_num; i++) { + result.push_back({poses[i], weights[i]}); + } + return result; +} + +torch::Tensor gram_schmidt(torch::Tensor A) +{ + A = A.clone(); + for (int i = 0; i < A.size(0); ++i) { + for (int j = 0; j < i; ++j) { + A[i] -= torch::dot(A[j], A[i]) * A[j]; + } + A[i] = A[i] / A[i].norm(); + } + return A; +} + +std::vector Localizer::optimize_pose_by_differential( + Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate) +{ + std::vector results; + initial_pose = initial_pose.requires_grad_(true); + image_tensor = image_tensor.view({infer_height_, infer_width_, 3}); + torch::optim::Adam optimizer({initial_pose}, learning_rate); + for (int64_t i = 0; i < iteration_num; i++) { + Tensor pred_img = render_image(initial_pose); + Tensor loss = torch::nn::functional::mse_loss(pred_img, image_tensor); + optimizer.zero_grad(); + // For some reason, backward may fail, so check here + try { + loss.backward(); + } catch (const std::runtime_error & e) { + return results; + } + optimizer.step(); + + Tensor curr_result = initial_pose.clone().detach(); + results.push_back(curr_result); + } + return results; +} + +Tensor Localizer::render_image(const Tensor & pose) +{ + auto [image, _] = + renderer_->render_image(pose, intrinsic_, infer_height_, infer_width_, param_.ray_batch_size); + return image; +} + +std::vector Localizer::evaluate_poses( + const std::vector & poses, const Tensor & image) +{ + torch::NoGradGuard no_grad_guard; + Timer timer; + + const int pixel_num = param_.render_pixel_num; + const auto CUDALong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCUDA); + + // Pick rays by constant interval + // const int step = H * W / pixel_num; + // std::vector i_vec, j_vec; + // for (int k = 0; k < pixel_num; k++) { + // const int v = k * step; + // const int64_t i = v / W; + // const int64_t j = v % W; + // i_vec.push_back(i); + // j_vec.push_back(j); + // } + // const Tensor i = torch::tensor(i_vec, CUDALong); + // const Tensor j = torch::tensor(j_vec, CUDALong); + + // Pick rays by random sampling without replacement + std::vector indices(infer_height_ * infer_width_); + std::iota(indices.begin(), indices.end(), 0); + std::mt19937 engine(std::random_device{}()); + std::shuffle(indices.begin(), indices.end(), engine); + std::vector i_vec, j_vec; + for (int k = 0; k < pixel_num; k++) { + const int v = indices[k]; + const int64_t i = v / infer_width_; + const int64_t j = v % infer_width_; + i_vec.push_back(i); + j_vec.push_back(j); + } + Tensor i = torch::tensor(i_vec, CUDALong); + Tensor j = torch::tensor(j_vec, CUDALong); + + // Pick rays by random sampling with replacement + // const Tensor i = torch::randint(0, H, pixel_num, CUDALong); + // const Tensor j = torch::randint(0, infer_width_, pixel_num, CUDALong); + + const Tensor ij = torch::stack({i, j}, -1).to(torch::kFloat32); + std::vector rays_o_vec; + std::vector rays_d_vec; + for (const Tensor & pose : poses) { + auto [rays_o, rays_d] = get_rays_from_pose(pose.unsqueeze(0), intrinsic_.unsqueeze(0), ij); + rays_o_vec.push_back(rays_o); + rays_d_vec.push_back(rays_d); + } + + const int64_t pose_num = poses.size(); + const int64_t numel = pixel_num * pose_num; + + Tensor rays_o = torch::cat(rays_o_vec); // (numel, 3) + Tensor rays_d = torch::cat(rays_d_vec); // (numel, 3) + + timer.start(); + auto [pred_colors, _] = renderer_->render_all_rays(rays_o, rays_d, (1 << 16)); + + Tensor pred_pixels = pred_colors.view({pose_num, pixel_num, 3}); + pred_pixels = pred_pixels.clip(0.f, 1.f); + pred_pixels = pred_pixels.to(image.device()); // (pose_num, pixel_num, 3) + + i = i.to(image.device()); + j = j.to(image.device()); + + Tensor gt_pixels = image.index({i, j}); // (pixel_num, 3) + Tensor diff = pred_pixels - gt_pixels; // (pose_num, pixel_num, 3) + Tensor loss = (diff * diff).mean(-1).sum(-1).cpu(); // (pose_num,) + loss = pixel_num / (loss + 1e-6f); + loss = torch::pow(loss, 5); + loss /= loss.sum(); + + std::vector result(loss.data_ptr(), loss.data_ptr() + loss.numel()); + return result; +} + +Eigen::Matrix3d compute_rotation_average( + const std::vector & rotations, const std::vector & weights) +{ + // cf. https://stackoverflow.com/questions/12374087/average-of-multiple-quaternions + std::vector quaternions; + for (const Eigen::Matrix3d & rot : rotations) { + Eigen::Quaterniond quat(rot); + quaternions.push_back(quat); + } + + Eigen::Vector4d cumulative(0.0, 0.0, 0.0, 0.0); + const Eigen::Quaterniond & front = quaternions[0]; + + for (Eigen::Quaterniond & q : quaternions) { + if (q.dot(front) < 0.0) { + q = Eigen::Quaterniond(-q.coeffs()); + } + cumulative += q.coeffs(); + } + + cumulative /= quaternions.size(); + + Eigen::Quaterniond average_quaternion; + average_quaternion.coeffs() = cumulative; + average_quaternion.normalize(); + + return average_quaternion.toRotationMatrix(); +} + +Tensor Localizer::calc_average_pose(const std::vector & particles) +{ + torch::Device device = particles.front().pose.device(); + torch::Tensor avg_position_tensor = torch::zeros({3, 1}, device).to(torch::kFloat32); + std::vector rotations; + std::vector weights; + + for (const Particle & particle : particles) { + torch::Tensor pose = particle.pose; + torch::Tensor position = pose.index({Slc(0, 3), Slc(3, 4)}); + avg_position_tensor += position * particle.weight; + + // slice to get 3x3 rotation matrix, convert it to Eigen::Matrix3f + torch::Tensor rotation_tensor = pose.index({Slc(0, 3), Slc(0, 3)}).to(torch::kDouble).cpu(); + Eigen::Matrix3d rotation; + std::memcpy( + rotation.data(), rotation_tensor.data_ptr(), sizeof(double) * rotation_tensor.numel()); + rotations.push_back(rotation); + weights.push_back(particle.weight); + } + + Eigen::Matrix3d avg_rotation_matrix = compute_rotation_average(rotations, weights); + torch::Tensor avg_rotation_tensor = torch::from_blob( + avg_rotation_matrix.data(), {3, 3}, torch::TensorOptions().dtype(torch::kDouble)); + avg_rotation_tensor = avg_rotation_tensor.to(torch::kFloat32); + avg_rotation_tensor = avg_rotation_tensor.to(device); + + // combine average position and rotation to form average pose + torch::Tensor avg_pose = torch::zeros_like(particles.front().pose); + avg_pose.index_put_({Slc(0, 3), Slc(3, 4)}, avg_position_tensor); + avg_pose.index_put_({Slc(0, 3), Slc(0, 3)}, avg_rotation_tensor); + + return avg_pose; +} + +torch::Tensor Localizer::camera2nerf(const torch::Tensor & pose_in_world) +{ + torch::Tensor x = pose_in_world; + x = torch::mm(x, axis_convert_mat_); + x = torch::mm(axis_convert_mat_.t(), x); + + // normalize t + Tensor t = x.index({Slc(0, 3), 3}).clone(); + t = (t - center_.unsqueeze(0)) / radius_; + x.index_put_({Slc(0, 3), 3}, t); + + x = x.index({Slc(0, 3), Slc(0, 4)}); + return x; +} + +torch::Tensor Localizer::nerf2camera(const torch::Tensor & pose_in_camera) +{ + torch::Tensor x = pose_in_camera; + x = torch::cat({x, torch::tensor({0, 0, 0, 1}).view({1, 4}).to(torch::kCUDA)}); + + // denormalize t + Tensor t = x.index({Slc(0, 3), 3}).clone(); + t = t * radius_ + center_.unsqueeze(0); + x.index_put_({Slc(0, 3), 3}, t); + + x = torch::mm(x, axis_convert_mat_.t()); + x = torch::mm(axis_convert_mat_, x); + return x; +} diff --git a/localization/nerf_based_localizer/src/nerf/localizer.hpp b/localization/nerf_based_localizer/src/nerf/localizer.hpp new file mode 100644 index 0000000000000..020ffca3e5dcc --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/localizer.hpp @@ -0,0 +1,81 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef NERF__LOCALIZER_HPP_ +#define NERF__LOCALIZER_HPP_ + +#include "dataset.hpp" +#include "renderer.hpp" + +#include + +struct Particle +{ + torch::Tensor pose; // (3, 4) + float weight; +}; + +struct LocalizerParam +{ + std::string train_result_dir; + int32_t render_pixel_num = 256; + float noise_position_x = 0.025f; + float noise_position_y = 0.025f; + float noise_position_z = 0.025f; + float noise_rotation_x = 2.5f; + float noise_rotation_y = 2.5f; + float noise_rotation_z = 2.5f; + int32_t resize_factor = 1; + int32_t sample_num_per_ray = 1024; + int32_t ray_batch_size = (1 << 8); +}; + +class Localizer +{ + using Tensor = torch::Tensor; + +public: + Localizer() = default; + Localizer(const LocalizerParam & param); + + Tensor render_image(const Tensor & pose); + std::vector optimize_pose_by_random_search( + Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff); + std::vector optimize_pose_by_differential( + Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate); + + torch::Tensor camera2nerf(const torch::Tensor & pose_in_world); + torch::Tensor nerf2camera(const torch::Tensor & pose_in_camera); + + static Tensor calc_average_pose(const std::vector & particles); + + float radius() const { return radius_; } + int infer_height() const { return infer_height_; } + int infer_width() const { return infer_width_; } + +private: + std::vector evaluate_poses(const std::vector & poses, const Tensor & image); + + LocalizerParam param_; + + std::shared_ptr renderer_; + + torch::Tensor axis_convert_mat_; + + int infer_height_, infer_width_; + Tensor intrinsic_; + Tensor center_; + float radius_; +}; + +#endif // NERF__LOCALIZER_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.cpp b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp new file mode 100644 index 0000000000000..3d302a50d6cf3 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp @@ -0,0 +1,79 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.cpp +// +// Created by ppwang on 2022/9/26. +// + +#include "points_sampler.hpp" + +#include "dataset.hpp" +#include "stop_watch.hpp" +#include "utils.hpp" + +#include +#include + +using Tensor = torch::Tensor; + +PtsSampler::PtsSampler(const int sample_num_per_ray) : sample_num_per_ray_(sample_num_per_ray) +{ +} + +SampleResultFlex PtsSampler::get_samples( + const Tensor & rays_o_raw, const Tensor & rays_d_raw, RunningMode mode) +{ + Tensor rays_o = rays_o_raw.contiguous(); + Tensor rays_d = (rays_d_raw / torch::linalg_norm(rays_d_raw, 2, -1, true)).contiguous(); + + int n_rays = rays_o.sizes()[0]; + + const int n_all_pts = n_rays * sample_num_per_ray_; + + Tensor rays_noise; + if (mode == RunningMode::VALIDATE) { + rays_noise = torch::ones({n_all_pts}, CUDAFloat); + } else { + rays_noise = ((torch::rand({n_all_pts}, CUDAFloat) - .5f) + 1.f).contiguous(); + } + rays_noise = rays_noise.view({n_rays, sample_num_per_ray_}).contiguous(); + Tensor cum_noise = torch::cumsum(rays_noise, 1) * SAMPLE_L; + Tensor sampled_t = cum_noise.reshape({n_all_pts}).contiguous(); + + rays_o = rays_o.view({n_rays, 1, 3}).contiguous(); + rays_d = rays_d.view({n_rays, 1, 3}).contiguous(); + cum_noise = cum_noise.unsqueeze(-1).contiguous(); + Tensor sampled_pts = rays_o + rays_d * cum_noise; + + Tensor sampled_distances = torch::diff(sampled_pts, 1, 1).norm(2, -1).contiguous(); + sampled_distances = + torch::cat({torch::zeros({n_rays, 1}, CUDAFloat), sampled_distances}, 1).contiguous(); + sampled_pts = sampled_pts.view({n_all_pts, 3}); + sampled_distances = sampled_distances.view({n_all_pts}).contiguous(); + + Tensor pts_idx_start_end = + torch::ones({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA)) * + sample_num_per_ray_; + Tensor pts_num = pts_idx_start_end.index({Slc(), 0}); + Tensor cum_num = torch::cumsum(pts_num, 0); + pts_idx_start_end.index_put_({Slc(), 0}, cum_num - pts_num); + pts_idx_start_end.index_put_({Slc(), 1}, cum_num); + + Tensor sampled_dirs = + rays_d.expand({-1, sample_num_per_ray_, -1}).reshape({n_all_pts, 3}).contiguous(); + + return {sampled_pts, sampled_dirs, sampled_distances, sampled_t, pts_idx_start_end}; +} diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.hpp b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp new file mode 100644 index 0000000000000..f606ae62f83e3 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp @@ -0,0 +1,58 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.h +// +// Created by ppwang on 2022/6/20. +// + +#ifndef NERF__POINTS_SAMPLER_HPP_ +#define NERF__POINTS_SAMPLER_HPP_ + +#include "Eigen/Eigen" +#include "common.hpp" + +#include + +#include + +struct SampleResultFlex +{ + using Tensor = torch::Tensor; + Tensor pts; // [ n_all_pts, 3 ] + Tensor dirs; // [ n_all_pts, 3 ] + Tensor dt; // [ n_all_pts, 1 ] + Tensor t; // [ n_all_pts, 1 ] + Tensor pts_idx_bounds; // [ n_rays, 2 ] // start, end +}; + +enum RunningMode { TRAIN, VALIDATE }; + +class PtsSampler +{ + using Tensor = torch::Tensor; + +public: + explicit PtsSampler(const int sample_num_per_ray); + + SampleResultFlex get_samples(const Tensor & rays_o, const Tensor & rays_d, RunningMode mode); + +private: + const int sample_num_per_ray_; + + static constexpr float SAMPLE_L = 1.0 / 256; +}; + +#endif // NERF__POINTS_SAMPLER_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/rays.cpp b/localization/nerf_based_localizer/src/nerf/rays.cpp new file mode 100644 index 0000000000000..289a81566ef68 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/rays.cpp @@ -0,0 +1,42 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "rays.hpp" + +#include "common.hpp" + +using Tensor = torch::Tensor; + +Rays get_rays_from_pose(const Tensor & pose, const Tensor & intrinsic, const Tensor & ij) +{ + // Shift half pixel + Tensor i = ij.index({"...", 0}).to(torch::kFloat32) + .5f; + Tensor j = ij.index({"...", 1}).to(torch::kFloat32) + .5f; + + Tensor cx = intrinsic.index({Slc(), 0, 2}); + Tensor cy = intrinsic.index({Slc(), 1, 2}); + Tensor fx = intrinsic.index({Slc(), 0, 0}); + Tensor fy = intrinsic.index({Slc(), 1, 1}); + + Tensor u_tensor = ((j - cx) / fx).unsqueeze(-1); + Tensor v_tensor = -((i - cy) / fy).unsqueeze(-1); + Tensor w_tensor = -torch::ones_like(u_tensor); + + Tensor dir_tensor = torch::cat({u_tensor, v_tensor, w_tensor}, 1).unsqueeze(-1); + Tensor ori_tensor = pose.index({Slc(), Slc(0, 3), Slc(0, 3)}); + Tensor pos_tensor = pose.index({Slc(), Slc(0, 3), 3}); + Tensor rays_d = torch::matmul(ori_tensor, dir_tensor).squeeze(); + Tensor rays_o = pos_tensor.expand({rays_d.sizes()[0], 3}).contiguous(); + + return {rays_o, rays_d}; +} diff --git a/localization/nerf_based_localizer/src/nerf/rays.hpp b/localization/nerf_based_localizer/src/nerf/rays.hpp new file mode 100644 index 0000000000000..5c5cb84558fb7 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/rays.hpp @@ -0,0 +1,28 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef NERF__RAYS_HPP_ +#define NERF__RAYS_HPP_ + +#include + +struct alignas(32) Rays +{ + torch::Tensor origins; + torch::Tensor dirs; +}; + +Rays get_rays_from_pose( + const torch::Tensor & pose, const torch::Tensor & intrinsic, const torch::Tensor & ij); + +#endif // NERF__RAYS_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/renderer.cpp b/localization/nerf_based_localizer/src/nerf/renderer.cpp new file mode 100644 index 0000000000000..18d73f1fab16c --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/renderer.cpp @@ -0,0 +1,214 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.cpp +// +// Created by ppwang on 2022/5/7. +// + +#include "renderer.hpp" + +#include "CustomOps/CustomOps.hpp" +#include "CustomOps/FlexOps.hpp" +#include "CustomOps/Scatter.hpp" +#include "common.hpp" +#include "rays.hpp" +#include "stop_watch.hpp" +#include "utils.hpp" + +using Tensor = torch::Tensor; +namespace F = torch::nn::functional; + +Renderer::Renderer(int n_images, const int sample_num_per_ray) +: sample_num_per_ray_(sample_num_per_ray) +{ + pts_sampler_ = std::make_shared(sample_num_per_ray_); + + scene_field_ = std::make_shared(); + register_module("scene_field", scene_field_); + + shader_ = std::make_shared(); + register_module("shader", shader_); + + app_emb_ = torch::randn({n_images, 16}, CUDAFloat) * .1f; + app_emb_.requires_grad_(true); + register_parameter("app_emb", app_emb_); +} + +RenderResult Renderer::render( + const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode) +{ + int n_rays = rays_o.sizes()[0]; + SampleResultFlex sample_result = pts_sampler_->get_samples(rays_o, rays_d, mode); + int n_all_pts = sample_result.pts.sizes()[0]; + CHECK(sample_result.pts_idx_bounds.max().item() <= n_all_pts); + CHECK(sample_result.pts_idx_bounds.min().item() >= 0); + + Tensor bg_color = + ((mode == RunningMode::TRAIN) ? torch::rand({n_rays, 3}, CUDAFloat) + : torch::ones({n_rays, 3}, CUDAFloat) * .5f); + + if (n_all_pts <= 0) { + return { + bg_color, torch::zeros({n_rays}, CUDAFloat), torch::full({n_rays}, 512.f, CUDAFloat), + Tensor()}; + } + CHECK(rays_o.sizes()[0] == sample_result.pts_idx_bounds.sizes()[0]); + + auto DensityAct = [](Tensor x) -> Tensor { + const float shift = 3.f; + return torch::autograd::TruncExp::apply(x - shift)[0]; + }; + + // First, inference - early stop + SampleResultFlex sample_result_early_stop; + { + Tensor scene_feat = scene_field_->query(sample_result.pts); + Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)})); + Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result.dt; + Tensor alphas = 1.f - torch::exp(-sec_density); + Tensor acc_density = FlexOps::AccumulateSum(sec_density, sample_result.pts_idx_bounds, false); + Tensor trans = torch::exp(-acc_density); + Tensor weights = trans * alphas; + Tensor mask = trans > 1e-4f; + Tensor mask_idx = torch::where(mask)[0]; + + sample_result_early_stop.pts = sample_result.pts.index({mask_idx}).contiguous(); + sample_result_early_stop.dirs = sample_result.dirs.index({mask_idx}).contiguous(); + sample_result_early_stop.dt = sample_result.dt.index({mask_idx}).contiguous(); + sample_result_early_stop.t = sample_result.t.index({mask_idx}).contiguous(); + + Tensor mask_2d = mask.reshape({n_rays, sample_num_per_ray_}); + Tensor num = mask_2d.sum(1); + Tensor cum_num = torch::cumsum(num, 0); + Tensor idx_bounds = + torch::zeros({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA)); + idx_bounds.index_put_({Slc(), 0}, cum_num - num); + idx_bounds.index_put_({Slc(), 1}, cum_num); + sample_result_early_stop.pts_idx_bounds = idx_bounds; + + CHECK( + sample_result_early_stop.pts_idx_bounds.max().item() == + sample_result_early_stop.pts.size(0)); + } + + n_all_pts = sample_result_early_stop.pts.size(0); + + Tensor scene_feat = scene_field_->query(sample_result_early_stop.pts); + Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)})); + + Tensor shading_feat = torch::cat( + {torch::ones_like(scene_feat.index({Slc(), Slc(0, 1)}), CUDAFloat), + scene_feat.index({Slc(), Slc(1, torch::indexing::None)})}, + 1); + + if (mode == RunningMode::TRAIN) { + Tensor all_emb_idx = + CustomOps::ScatterIdx(n_all_pts, sample_result_early_stop.pts_idx_bounds, emb_idx); + shading_feat = CustomOps::ScatterAdd(app_emb_, all_emb_idx, shading_feat); + } + + Tensor sampled_colors = shader_->query(shading_feat, sample_result_early_stop.dirs); + Tensor sampled_t = (sample_result_early_stop.t + 1e-2f).contiguous(); + Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result_early_stop.dt; + Tensor alphas = 1.f - torch::exp(-sec_density); + Tensor idx_start_end = sample_result_early_stop.pts_idx_bounds; + Tensor acc_density = FlexOps::AccumulateSum(sec_density, idx_start_end, false); + Tensor trans = torch::exp(-acc_density); + Tensor weights = trans * alphas; + + Tensor last_trans = torch::exp(-FlexOps::Sum(sec_density, idx_start_end)); + Tensor colors = FlexOps::Sum(weights.unsqueeze(-1) * sampled_colors, idx_start_end); + colors = colors + last_trans.unsqueeze(-1) * bg_color; + Tensor depths = FlexOps::Sum(weights * sampled_t, idx_start_end) / (1.f - last_trans + 1e-4f); + + CHECK(std::isfinite((colors).mean().item())); + + return {colors, depths, weights, idx_start_end}; +} + +std::tuple Renderer::render_all_rays( + const Tensor & rays_o, const Tensor & rays_d, const int batch_size) +{ + const int n_rays = rays_d.sizes()[0]; + + std::vector pred_colors; + std::vector pred_depths; + + const int ray_batch_size = (1 << 16); + for (int i = 0; i < n_rays; i += batch_size) { + int i_high = std::min(i + batch_size, n_rays); + Tensor cur_rays_o = rays_o.index({Slc(i, i_high)}).contiguous(); + Tensor cur_rays_d = rays_d.index({Slc(i, i_high)}).contiguous(); + + RenderResult render_result = render(cur_rays_o, cur_rays_d, Tensor(), RunningMode::VALIDATE); + Tensor colors = render_result.colors; + Tensor depths = render_result.depths.squeeze(); + + pred_colors.push_back(colors); + pred_depths.push_back(depths.unsqueeze(-1)); + } + + Tensor pred_colors_ts = torch::cat(pred_colors, 0); + Tensor pred_depths_ts = torch::cat(pred_depths, 0); + + return {pred_colors_ts, pred_depths_ts}; +} + +std::tuple Renderer::render_image( + const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w, + const int batch_size) +{ + Tensor ii = torch::linspace(0.f, h - 1.f, h, CUDAFloat); + Tensor jj = torch::linspace(0.f, w - 1.f, w, CUDAFloat); + auto ij = torch::meshgrid({ii, jj}, "ij"); + Tensor i = ij[0].reshape({-1}); + Tensor j = ij[1].reshape({-1}); + auto [rays_o, rays_d] = + get_rays_from_pose(pose.unsqueeze(0), intrinsic.unsqueeze(0), torch::stack({i, j}, -1)); + auto [pred_colors, pred_depths] = render_all_rays(rays_o, rays_d, batch_size); + pred_colors = pred_colors.reshape({h, w, 3}); + pred_depths = pred_depths.reshape({h, w, 1}); + + pred_colors = pred_colors.clip(0.0f, 1.0f); + pred_depths = pred_depths.repeat({1, 1, 3}); + + return {pred_colors, pred_depths}; +} + +std::vector Renderer::optim_param_groups(float lr) +{ + std::vector ret; + + // scene_field_ + for (const auto & para_group : scene_field_->optim_param_groups(lr)) { + ret.emplace_back(para_group); + } + + // shader_ + for (const auto & para_group : shader_->optim_param_groups(lr)) { + ret.emplace_back(para_group); + } + + // app_emb_ + auto opt = std::make_unique(lr); + opt->betas() = {0.9, 0.99}; + opt->eps() = 1e-15; + opt->weight_decay() = 1e-6; + std::vector params{app_emb_}; + ret.emplace_back(std::move(params), std::move(opt)); + + return ret; +} diff --git a/localization/nerf_based_localizer/src/nerf/renderer.hpp b/localization/nerf_based_localizer/src/nerf/renderer.hpp new file mode 100644 index 0000000000000..4b7f06dc2f54d --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/renderer.hpp @@ -0,0 +1,69 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.h +// +// Created by ppwang on 2022/5/7. +// + +#ifndef NERF__RENDERER_HPP_ +#define NERF__RENDERER_HPP_ + +#include "hash_3d_anchored.hpp" +#include "points_sampler.hpp" +#include "sh_shader.hpp" + +#include +#include + +struct RenderResult +{ + using Tensor = torch::Tensor; + Tensor colors; + Tensor depths; + Tensor weights; + Tensor idx_start_end; +}; + +class Renderer : public torch::nn::Module +{ + using Tensor = torch::Tensor; + +public: + Renderer(int n_images, const int sample_num_per_ray = 1024); + + RenderResult render( + const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode); + + std::tuple render_all_rays( + const Tensor & rays_o, const Tensor & rays_d, const int batch_size); + + std::tuple render_image( + const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w, + const int batch_size); + + std::vector optim_param_groups(float lr); + +private: + const int sample_num_per_ray_; + + std::shared_ptr pts_sampler_; + std::shared_ptr scene_field_; + std::shared_ptr shader_; + + Tensor app_emb_; +}; + +#endif // NERF__RENDERER_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cpp b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp new file mode 100644 index 0000000000000..84ed5004a23e4 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp @@ -0,0 +1,56 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cpp +// +// Created by ppwang on 2022/10/8. +// + +#include "sh_shader.hpp" + +#include "common.hpp" + +using Tensor = torch::Tensor; + +SHShader::SHShader() +{ + const int d_in = 32; + const int d_hidden = 64; + const int d_out = 3; + + mlp_ = torch::nn::Sequential( + torch::nn::Linear(d_in, d_hidden), torch::nn::ReLU(), torch::nn::Linear(d_hidden, d_out)); + register_module("mlp", mlp_); +} + +Tensor SHShader::query(const Tensor & feats, const Tensor & dirs) +{ + Tensor enc = encode(dirs); + Tensor input = torch::cat({feats, enc}, -1); + Tensor output = mlp_->forward(input); + float eps = 1e-3f; + return (1.f + 2.f * eps) / (1.f + torch::exp(-output)) - eps; +} + +std::vector SHShader::optim_param_groups(float lr) +{ + auto opt = std::make_unique(lr); + opt->betas() = {0.9, 0.99}; + opt->eps() = 1e-15; + opt->weight_decay() = 1e-6; + + std::vector params = mlp_->parameters(); + return {torch::optim::OptimizerParamGroup(params, std::move(opt))}; +} diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cu b/localization/nerf_based_localizer/src/nerf/sh_shader.cu new file mode 100644 index 0000000000000..7a46b30eae60a --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cu @@ -0,0 +1,224 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cu +// +// Created by ppwang on 2022/10/8. +// + +#include "common.hpp" +#include "common_cuda.hpp" +#include "sh_shader.hpp" + +using Tensor = torch::Tensor; + +__global__ void SHKernel( + const uint32_t num_elements, const uint32_t degree, float * data_in, float * data_out) +{ + const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i >= num_elements) return; + + data_out = data_out + (degree * degree) * i; + + float x = data_in[i * 3]; + float y = data_in[i * 3 + 1]; + float z = data_in[i * 3 + 2]; + + // Let compiler figure out how to sequence/reorder these calculations w.r.t. branches + float xy = x * y, xz = x * z, yz = y * z, x2 = x * x, y2 = y * y, z2 = z * z; + float x4 = x2 * x2, y4 = y2 * y2, z4 = z2 * z2; + float x6 = x4 * x2, y6 = y4 * y2, z6 = z4 * z2; + + // SH polynomials generated using scripts/gen_sh.py based on the recurrence relations in appendix + // A1 of https://www.ppsloan.org/publications/StupidSH36.pdf + + data_out[0] = 0.28209479177387814f; // 1/(2*sqrt(pi)) + if (degree <= 1) { + return; + } + data_out[1] = -0.48860251190291987f * y; // -sqrt(3)*y/(2*sqrt(pi)) + data_out[2] = 0.48860251190291987f * z; // sqrt(3)*z/(2*sqrt(pi)) + data_out[3] = -0.48860251190291987f * x; // -sqrt(3)*x/(2*sqrt(pi)) + if (degree <= 2) { + return; + } + data_out[4] = 1.0925484305920792f * xy; // sqrt(15)*xy/(2*sqrt(pi)) + data_out[5] = -1.0925484305920792f * yz; // -sqrt(15)*yz/(2*sqrt(pi)) + data_out[6] = + 0.94617469575755997f * z2 - 0.31539156525251999f; // sqrt(5)*(3*z2 - 1)/(4*sqrt(pi)) + data_out[7] = -1.0925484305920792f * xz; // -sqrt(15)*xz/(2*sqrt(pi)) + data_out[8] = + 0.54627421529603959f * x2 - 0.54627421529603959f * y2; // sqrt(15)*(x2 - y2)/(4*sqrt(pi)) + if (degree <= 3) { + return; + } + data_out[9] = + 0.59004358992664352f * y * (-3.0f * x2 + y2); // sqrt(70)*y*(-3*x2 + y2)/(8*sqrt(pi)) + data_out[10] = 2.8906114426405538f * xy * z; // sqrt(105)*xy*z/(2*sqrt(pi)) + data_out[11] = + 0.45704579946446572f * y * (1.0f - 5.0f * z2); // sqrt(42)*y*(1 - 5*z2)/(8*sqrt(pi)) + data_out[12] = 0.3731763325901154f * z * (5.0f * z2 - 3.0f); // sqrt(7)*z*(5*z2 - 3)/(4*sqrt(pi)) + data_out[13] = + 0.45704579946446572f * x * (1.0f - 5.0f * z2); // sqrt(42)*x*(1 - 5*z2)/(8*sqrt(pi)) + data_out[14] = 1.4453057213202769f * z * (x2 - y2); // sqrt(105)*z*(x2 - y2)/(4*sqrt(pi)) + data_out[15] = + 0.59004358992664352f * x * (-x2 + 3.0f * y2); // sqrt(70)*x*(-x2 + 3*y2)/(8*sqrt(pi)) + if (degree <= 4) { + return; + } + data_out[16] = 2.5033429417967046f * xy * (x2 - y2); // 3*sqrt(35)*xy*(x2 - y2)/(4*sqrt(pi)) + data_out[17] = + 1.7701307697799304f * yz * (-3.0f * x2 + y2); // 3*sqrt(70)*yz*(-3*x2 + y2)/(8*sqrt(pi)) + data_out[18] = + 0.94617469575756008f * xy * (7.0f * z2 - 1.0f); // 3*sqrt(5)*xy*(7*z2 - 1)/(4*sqrt(pi)) + data_out[19] = + 0.66904654355728921f * yz * (3.0f - 7.0f * z2); // 3*sqrt(10)*yz*(3 - 7*z2)/(8*sqrt(pi)) + data_out[20] = -3.1735664074561294f * z2 + 3.7024941420321507f * z4 + + 0.31735664074561293f; // 3*(-30*z2 + 35*z4 + 3)/(16*sqrt(pi)) + data_out[21] = + 0.66904654355728921f * xz * (3.0f - 7.0f * z2); // 3*sqrt(10)*xz*(3 - 7*z2)/(8*sqrt(pi)) + data_out[22] = 0.47308734787878004f * (x2 - y2) * + (7.0f * z2 - 1.0f); // 3*sqrt(5)*(x2 - y2)*(7*z2 - 1)/(8*sqrt(pi)) + data_out[23] = + 1.7701307697799304f * xz * (-x2 + 3.0f * y2); // 3*sqrt(70)*xz*(-x2 + 3*y2)/(8*sqrt(pi)) + data_out[24] = -3.7550144126950569f * x2 * y2 + 0.62583573544917614f * x4 + + 0.62583573544917614f * y4; // 3*sqrt(35)*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi)) + if (degree <= 5) { + return; + } + data_out[25] = + 0.65638205684017015f * y * + (10.0f * x2 * y2 - 5.0f * x4 - y4); // 3*sqrt(154)*y*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi)) + data_out[26] = + 8.3026492595241645f * xy * z * (x2 - y2); // 3*sqrt(385)*xy*z*(x2 - y2)/(4*sqrt(pi)) + data_out[27] = -0.48923829943525038f * y * (3.0f * x2 - y2) * + (9.0f * z2 - 1.0f); // -sqrt(770)*y*(3*x2 - y2)*(9*z2 - 1)/(32*sqrt(pi)) + data_out[28] = + 4.7935367849733241f * xy * z * (3.0f * z2 - 1.0f); // sqrt(1155)*xy*z*(3*z2 - 1)/(4*sqrt(pi)) + data_out[29] = 0.45294665119569694f * y * + (14.0f * z2 - 21.0f * z4 - 1.0f); // sqrt(165)*y*(14*z2 - 21*z4 - 1)/(16*sqrt(pi)) + data_out[30] = + 0.1169503224534236f * z * + (-70.0f * z2 + 63.0f * z4 + 15.0f); // sqrt(11)*z*(-70*z2 + 63*z4 + 15)/(16*sqrt(pi)) + data_out[31] = 0.45294665119569694f * x * + (14.0f * z2 - 21.0f * z4 - 1.0f); // sqrt(165)*x*(14*z2 - 21*z4 - 1)/(16*sqrt(pi)) + data_out[32] = 2.3967683924866621f * z * (x2 - y2) * + (3.0f * z2 - 1.0f); // sqrt(1155)*z*(x2 - y2)*(3*z2 - 1)/(8*sqrt(pi)) + data_out[33] = -0.48923829943525038f * x * (x2 - 3.0f * y2) * + (9.0f * z2 - 1.0f); // -sqrt(770)*x*(x2 - 3*y2)*(9*z2 - 1)/(32*sqrt(pi)) + data_out[34] = 2.0756623148810411f * z * + (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(385)*z*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi)) + data_out[35] = + 0.65638205684017015f * x * + (10.0f * x2 * y2 - x4 - 5.0f * y4); // 3*sqrt(154)*x*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi)) + if (degree <= 6) { + return; + } + data_out[36] = 1.3663682103838286f * xy * + (-10.0f * x2 * y2 + 3.0f * x4 + + 3.0f * y4); // sqrt(6006)*xy*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi)) + data_out[37] = + 2.3666191622317521f * yz * + (10.0f * x2 * y2 - 5.0f * x4 - y4); // 3*sqrt(2002)*yz*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi)) + data_out[38] = 2.0182596029148963f * xy * (x2 - y2) * + (11.0f * z2 - 1.0f); // 3*sqrt(91)*xy*(x2 - y2)*(11*z2 - 1)/(8*sqrt(pi)) + data_out[39] = -0.92120525951492349f * yz * (3.0f * x2 - y2) * + (11.0f * z2 - 3.0f); // -sqrt(2730)*yz*(3*x2 - y2)*(11*z2 - 3)/(32*sqrt(pi)) + data_out[40] = + 0.92120525951492349f * xy * + (-18.0f * z2 + 33.0f * z4 + 1.0f); // sqrt(2730)*xy*(-18*z2 + 33*z4 + 1)/(32*sqrt(pi)) + data_out[41] = + 0.58262136251873131f * yz * + (30.0f * z2 - 33.0f * z4 - 5.0f); // sqrt(273)*yz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi)) + data_out[42] = 6.6747662381009842f * z2 - 20.024298714302954f * z4 + 14.684485723822165f * z6 - + 0.31784601133814211f; // sqrt(13)*(105*z2 - 315*z4 + 231*z6 - 5)/(32*sqrt(pi)) + data_out[43] = + 0.58262136251873131f * xz * + (30.0f * z2 - 33.0f * z4 - 5.0f); // sqrt(273)*xz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi)) + data_out[44] = 0.46060262975746175f * (x2 - y2) * + (11.0f * z2 * (3.0f * z2 - 1.0f) - 7.0f * z2 + + 1.0f); // sqrt(2730)*(x2 - y2)*(11*z2*(3*z2 - 1) - 7*z2 + 1)/(64*sqrt(pi)) + data_out[45] = -0.92120525951492349f * xz * (x2 - 3.0f * y2) * + (11.0f * z2 - 3.0f); // -sqrt(2730)*xz*(x2 - 3*y2)*(11*z2 - 3)/(32*sqrt(pi)) + data_out[46] = + 0.50456490072872406f * (11.0f * z2 - 1.0f) * + (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(91)*(11*z2 - 1)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi)) + data_out[47] = + 2.3666191622317521f * xz * + (10.0f * x2 * y2 - x4 - 5.0f * y4); // 3*sqrt(2002)*xz*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi)) + data_out[48] = + 10.247761577878714f * x2 * y4 - 10.247761577878714f * x4 * y2 + 0.6831841051919143f * x6 - + 0.6831841051919143f * y6; // sqrt(6006)*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi)) + if (degree <= 7) { + return; + } + data_out[49] = 0.70716273252459627f * y * + (-21.0f * x2 * y4 + 35.0f * x4 * y2 - 7.0f * x6 + + y6); // 3*sqrt(715)*y*(-21*x2*y4 + 35*x4*y2 - 7*x6 + y6)/(64*sqrt(pi)) + data_out[50] = 5.2919213236038001f * xy * z * + (-10.0f * x2 * y2 + 3.0f * x4 + + 3.0f * y4); // 3*sqrt(10010)*xy*z*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi)) + data_out[51] = -0.51891557872026028f * y * (13.0f * z2 - 1.0f) * + (-10.0f * x2 * y2 + 5.0f * x4 + + y4); // -3*sqrt(385)*y*(13*z2 - 1)*(-10*x2*y2 + 5*x4 + y4)/(64*sqrt(pi)) + data_out[52] = 4.1513246297620823f * xy * z * (x2 - y2) * + (13.0f * z2 - 3.0f); // 3*sqrt(385)*xy*z*(x2 - y2)*(13*z2 - 3)/(8*sqrt(pi)) + data_out[53] = -0.15645893386229404f * y * (3.0f * x2 - y2) * + (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 + + 3.0f); // -3*sqrt(35)*y*(3*x2 - y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi)) + data_out[54] = + 0.44253269244498261f * xy * z * + (-110.0f * z2 + 143.0f * z4 + 15.0f); // 3*sqrt(70)*xy*z*(-110*z2 + 143*z4 + 15)/(32*sqrt(pi)) + data_out[55] = 0.090331607582517306f * y * + (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 + + 5.0f); // sqrt(105)*y*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi)) + data_out[56] = 0.068284276912004949f * z * + (315.0f * z2 - 693.0f * z4 + 429.0f * z6 - + 35.0f); // sqrt(15)*z*(315*z2 - 693*z4 + 429*z6 - 35)/(32*sqrt(pi)) + data_out[57] = 0.090331607582517306f * x * + (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 + + 5.0f); // sqrt(105)*x*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi)) + data_out[58] = 0.07375544874083044f * z * (x2 - y2) * + (143.0f * z2 * (3.0f * z2 - 1.0f) - 187.0f * z2 + + 45.0f); // sqrt(70)*z*(x2 - y2)*(143*z2*(3*z2 - 1) - 187*z2 + 45)/(64*sqrt(pi)) + data_out[59] = -0.15645893386229404f * x * (x2 - 3.0f * y2) * + (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 + + 3.0f); // -3*sqrt(35)*x*(x2 - 3*y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi)) + data_out[60] = + 1.0378311574405206f * z * (13.0f * z2 - 3.0f) * + (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(385)*z*(13*z2 - 3)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi)) + data_out[61] = -0.51891557872026028f * x * (13.0f * z2 - 1.0f) * + (-10.0f * x2 * y2 + x4 + + 5.0f * y4); // -3*sqrt(385)*x*(13*z2 - 1)*(-10*x2*y2 + x4 + 5*y4)/(64*sqrt(pi)) + data_out[62] = 2.6459606618019f * z * + (15.0f * x2 * y4 - 15.0f * x4 * y2 + x6 - + y6); // 3*sqrt(10010)*z*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi)) + data_out[63] = 0.70716273252459627f * x * + (-35.0f * x2 * y4 + 21.0f * x4 * y2 - x6 + + 7.0f * y6); // 3*sqrt(715)*x*(-35*x2*y4 + 21*x4*y2 - x6 + 7*y6)/(64*sqrt(pi)) +} + +Tensor SHShader::encode(const Tensor & dirs) +{ + CHECK(dirs.is_contiguous()); + int n_pts = dirs.size(0); + Tensor out = torch::empty({n_pts, DEGREE * DEGREE}, CUDAFloat); + dim3 grid_dim = LIN_GRID_DIM(n_pts); + dim3 block_dim = LIN_BLOCK_DIM; + + SHKernel<<>>(n_pts, DEGREE, dirs.data_ptr(), out.data_ptr()); + + return out; +} diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.hpp b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp new file mode 100644 index 0000000000000..66f0b99625561 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp @@ -0,0 +1,45 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.h +// +// Created by ppwang on 2022/10/8. +// + +#ifndef NERF__SH_SHADER_HPP_ +#define NERF__SH_SHADER_HPP_ + +#include + +class SHShader : public torch::nn::Module +{ + using Tensor = torch::Tensor; + +public: + SHShader(); + + Tensor query(const Tensor & feats, const Tensor & dirs); + + std::vector optim_param_groups(float lr); + +private: + static constexpr int DEGREE = 4; + + Tensor encode(const Tensor & dirs); + + torch::nn::Sequential mlp_ = nullptr; +}; + +#endif // NERF__SH_SHADER_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.cpp b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp new file mode 100644 index 0000000000000..c629b56f9bda4 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp @@ -0,0 +1,41 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.cpp +// +// Created by ppwang on 2022/5/18. +// + +#include "stop_watch.hpp" + +#include + +#include + +ScopeWatch::ScopeWatch(const std::string & scope_name) : scope_name_(scope_name) +{ + torch::cuda::synchronize(); + t_point_ = std::chrono::steady_clock::now(); + std::cout << "[" << scope_name_ << "] begin" << std::endl; +} + +ScopeWatch::~ScopeWatch() +{ + torch::cuda::synchronize(); + std::chrono::steady_clock::time_point new_point = std::chrono::steady_clock::now(); + std::chrono::duration time_span = + std::chrono::duration_cast>(new_point - t_point_); + std::cout << "[" << scope_name_ << "] end in " << time_span.count() << " seconds" << std::endl; +} diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.hpp b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp new file mode 100644 index 0000000000000..86483eee9484d --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp @@ -0,0 +1,53 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.h +// +// Created by ppwang on 2022/5/18. +// + +#ifndef NERF__STOP_WATCH_HPP_ +#define NERF__STOP_WATCH_HPP_ + +#include +#include + +class ScopeWatch +{ +public: + ScopeWatch(const std::string & scope_name); + ~ScopeWatch(); + +private: + std::chrono::steady_clock::time_point t_point_; + std::string scope_name_; +}; + +class Timer +{ +public: + void start() { start_time_ = std::chrono::steady_clock::now(); } + int64_t elapsed_milli_seconds() const + { + auto elapsed = std::chrono::steady_clock::now() - start_time_; + return std::chrono::duration_cast(elapsed).count(); + } + double elapsed_seconds() const { return elapsed_milli_seconds() / 1000.0; } + +private: + std::chrono::steady_clock::time_point start_time_; +}; + +#endif // NERF__STOP_WATCH_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf/utils.cpp b/localization/nerf_based_localizer/src/nerf/utils.cpp new file mode 100644 index 0000000000000..4c72509422381 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/utils.cpp @@ -0,0 +1,80 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.cpp +// +// Created by ppwang on 2023/4/4. +// + +#include "utils.hpp" + +#include "common.hpp" + +#include + +#include + +using Tensor = torch::Tensor; + +Tensor utils::read_image_tensor(const std::string & path) +{ + cv::Mat img = cv::imread(path, cv::IMREAD_UNCHANGED); + cv::cvtColor(img, img, cv::COLOR_BGR2RGB); + img.convertTo(img, CV_32FC3, 1.0 / 255.0); + Tensor img_tensor = + torch::from_blob(img.data, {img.rows, img.cols, img.channels()}, torch::kFloat32).clone(); + return img_tensor; +} + +bool utils::write_image_tensor(const std::string & path, Tensor img) +{ + img = img.contiguous(); + img = (img * 255.f).clamp(0, 255).to(torch::kUInt8).to(torch::kCPU); + cv::Mat img_mat(img.size(0), img.size(1), CV_8UC3, img.data_ptr()); + cv::cvtColor(img_mat, img_mat, cv::COLOR_RGB2BGR); + cv::imwrite(path, img_mat); + return true; +} + +Tensor utils::resize_image(Tensor image, const int resize_height, const int resize_width) +{ + const int height = image.size(0); + const int width = image.size(1); + if (height == resize_height && width == resize_width) { + return image; + } + + // change HWC to CHW + image = image.permute({2, 0, 1}); + image = image.unsqueeze(0); // add batch dim + + // Resize + std::vector size = {resize_height, resize_width}; + image = torch::nn::functional::interpolate( + image, torch::nn::functional::InterpolateFuncOptions().size(size)); + + // change CHW to HWC + image = image.squeeze(0); // remove batch dim + image = image.permute({1, 2, 0}); + return image; +} + +float utils::calc_loss(Tensor pred_image, Tensor gt_image) +{ + Tensor diff = pred_image - gt_image; + Tensor loss = (diff * diff).mean(-1); + Tensor score = loss.numel() / (loss.sum() + 1e-6f); + return score.mean().item(); +} diff --git a/localization/nerf_based_localizer/src/nerf/utils.hpp b/localization/nerf_based_localizer/src/nerf/utils.hpp new file mode 100644 index 0000000000000..1942c3b71c2a3 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf/utils.hpp @@ -0,0 +1,39 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.h +// +// Created by ppwang on 2022/5/11. +// + +#ifndef NERF__UTILS_HPP_ +#define NERF__UTILS_HPP_ + +#include + +#include + +namespace utils +{ +using Tensor = torch::Tensor; + +Tensor read_image_tensor(const std::string & path); +bool write_image_tensor(const std::string & path, Tensor img); +Tensor resize_image(Tensor image, const int resize_height, const int resize_width); +float calc_loss(Tensor pred_image, Tensor gt_image); + +} // namespace utils + +#endif // NERF__UTILS_HPP_ diff --git a/localization/nerf_based_localizer/src/nerf_based_localizer.cpp b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp new file mode 100644 index 0000000000000..5b89179fc9a81 --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp @@ -0,0 +1,378 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "nerf_based_localizer.hpp" + +#include "nerf/stop_watch.hpp" +#include "nerf/utils.hpp" + +#include +#include +#include + +#include + +#include + +geometry_msgs::msg::Pose transform_pose( + const geometry_msgs::msg::Pose & pose, const geometry_msgs::msg::TransformStamped & transform) +{ + Eigen::Quaterniond R1( + transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y, + transform.transform.rotation.z); + Eigen::Vector3d t1( + transform.transform.translation.x, transform.transform.translation.y, + transform.transform.translation.z); + + Eigen::Quaterniond R2( + pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z); + Eigen::Vector3d t2(pose.position.x, pose.position.y, pose.position.z); + + Eigen::Quaterniond R = R2 * R1; + Eigen::Vector3d t = R2._transformVector(t1) + t2; + + geometry_msgs::msg::Pose result_pose; + result_pose.orientation.x = R.x(); + result_pose.orientation.y = R.y(); + result_pose.orientation.z = R.z(); + result_pose.orientation.w = R.w(); + result_pose.position.x = t.x(); + result_pose.position.y = t.y(); + result_pose.position.z = t.z(); + + return result_pose; +} + +NerfBasedLocalizer::NerfBasedLocalizer( + const std::string & name_space, const rclcpp::NodeOptions & options) +: Node("nerf_based_localizer", name_space, options), + tf_buffer_(this->get_clock()), + tf_listener_(tf_buffer_), + tf2_broadcaster_(*this), + map_frame_("map"), + particle_num_(this->declare_parameter("particle_num")), + output_covariance_(this->declare_parameter("output_covariance")), + iteration_num_(this->declare_parameter("iteration_num")), + learning_rate_(this->declare_parameter("learning_rate")), + is_activated_(true), + optimization_mode_(this->declare_parameter("optimization_mode")) +{ + LocalizerParam param; + param.train_result_dir = this->declare_parameter("train_result_dir"); + param.render_pixel_num = this->declare_parameter("render_pixel_num"); + param.noise_position_x = this->declare_parameter("noise_position_x"); + param.noise_position_y = this->declare_parameter("noise_position_y"); + param.noise_position_z = this->declare_parameter("noise_position_z"); + param.noise_rotation_x = this->declare_parameter("noise_rotation_x"); + param.noise_rotation_y = this->declare_parameter("noise_rotation_y"); + param.noise_rotation_z = this->declare_parameter("noise_rotation_z"); + param.resize_factor = this->declare_parameter("resize_factor"); + param.sample_num_per_ray = this->declare_parameter("sample_num_per_ray"); + localizer_ = Localizer(param); + + initial_pose_with_covariance_subscriber_ = + this->create_subscription( + "~/input/pose", 100, + std::bind(&NerfBasedLocalizer::callback_initial_pose, this, std::placeholders::_1)); + + image_subscriber_ = this->create_subscription( + "~/input/image", rclcpp::SensorDataQoS().keep_last(0), + std::bind(&NerfBasedLocalizer::callback_image, this, std::placeholders::_1)); + + // create publishers + nerf_pose_publisher_ = + this->create_publisher("~/output/pose", 10); + nerf_pose_with_covariance_publisher_ = + this->create_publisher( + "~/output/pose_with_covariance", 10); + nerf_score_publisher_ = this->create_publisher("~/output/score", 10); + nerf_image_publisher_ = this->create_publisher("~/output/image", 10); + + service_ = this->create_service( + "~/service/optimize_pose", + std::bind(&NerfBasedLocalizer::service, this, std::placeholders::_1, std::placeholders::_2), + rclcpp::ServicesQoS().get_rmw_qos_profile()); + + service_trigger_node_ = this->create_service( + "~/service/trigger_node", + std::bind( + &NerfBasedLocalizer::service_trigger_node, this, std::placeholders::_1, + std::placeholders::_2), + rclcpp::ServicesQoS().get_rmw_qos_profile()); + + RCLCPP_DEBUG(this->get_logger(), "nerf_based_localizer is created."); +} + +void NerfBasedLocalizer::callback_initial_pose( + const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr initial_pose_msg_ptr) +{ + // if rosbag restart, clear buffer + if (!initial_pose_msg_ptr_array_.empty()) { + const builtin_interfaces::msg::Time & t_front = + initial_pose_msg_ptr_array_.front()->header.stamp; + const builtin_interfaces::msg::Time & t_msg = initial_pose_msg_ptr->header.stamp; + if (t_front.sec > t_msg.sec || (t_front.sec == t_msg.sec && t_front.nanosec > t_msg.nanosec)) { + initial_pose_msg_ptr_array_.clear(); + } + } + + if (initial_pose_msg_ptr->header.frame_id == map_frame_) { + initial_pose_msg_ptr_array_.push_back(initial_pose_msg_ptr); + if (initial_pose_msg_ptr_array_.size() > 1) { + initial_pose_msg_ptr_array_.pop_front(); + } + } else { + RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not in map frame."); + std::exit(1); + } +} + +void NerfBasedLocalizer::callback_image(const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr) +{ + target_frame_ = image_msg_ptr->header.frame_id; + image_msg_ptr_array_.push_back(image_msg_ptr); + if (image_msg_ptr_array_.size() > 1) { + image_msg_ptr_array_.pop_front(); + } + + if (!is_activated_) { + RCLCPP_ERROR(this->get_logger(), "NerfBasedLocalizer is not activated in callback_image."); + return; + } + + if (initial_pose_msg_ptr_array_.empty()) { + RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not received."); + return; + } + + const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr pose_base_link = + initial_pose_msg_ptr_array_.back(); + initial_pose_msg_ptr_array_.pop_back(); + + // Process + const auto [pose_msg, image_msg, score_msg] = localize(pose_base_link->pose.pose, *image_msg_ptr); + + // (1) publish nerf_pose + geometry_msgs::msg::PoseStamped pose_stamped_msg; + pose_stamped_msg.header.frame_id = map_frame_; + pose_stamped_msg.header.stamp = image_msg_ptr->header.stamp; + pose_stamped_msg.pose = pose_msg; + nerf_pose_publisher_->publish(pose_stamped_msg); + + // (2) publish nerf_pose_with_covariance + geometry_msgs::msg::PoseWithCovarianceStamped pose_with_cov_msg; + pose_with_cov_msg.header.frame_id = map_frame_; + pose_with_cov_msg.header.stamp = image_msg_ptr->header.stamp; + pose_with_cov_msg.pose.pose = pose_msg; + pose_with_cov_msg.pose.covariance[0] = output_covariance_; + pose_with_cov_msg.pose.covariance[7] = output_covariance_; + pose_with_cov_msg.pose.covariance[14] = output_covariance_; + pose_with_cov_msg.pose.covariance[21] = output_covariance_ * 10; + pose_with_cov_msg.pose.covariance[28] = output_covariance_ * 10; + pose_with_cov_msg.pose.covariance[35] = output_covariance_ * 10; + nerf_pose_with_covariance_publisher_->publish(pose_with_cov_msg); + + // (3) publish score + nerf_score_publisher_->publish(score_msg); + + // (4) publish image + nerf_image_publisher_->publish(image_msg); +} + +void NerfBasedLocalizer::service( + const tier4_localization_msgs::srv::PoseWithCovarianceStamped::Request::SharedPtr req, + tier4_localization_msgs::srv::PoseWithCovarianceStamped::Response::SharedPtr res) +{ + RCLCPP_DEBUG(this->get_logger(), "start NerfBasedLocalizer::service"); + + if (image_msg_ptr_array_.empty()) { + RCLCPP_ERROR(this->get_logger(), "image is not received."); + res->success = false; + return; + } + + // Get the oldest image + const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr = image_msg_ptr_array_.back(); + + // Process + const auto [pose_msg, image_msg, score_msg] = + localize(req->pose_with_covariance.pose.pose, *image_msg_ptr); + + res->success = true; + res->pose_with_covariance.header.frame_id = map_frame_; + res->pose_with_covariance.header.stamp = image_msg_ptr->header.stamp; + res->pose_with_covariance.pose.pose = pose_msg; + res->pose_with_covariance.pose.covariance = req->pose_with_covariance.pose.covariance; + + RCLCPP_DEBUG(this->get_logger(), "finish NerfBasedLocalizer::service"); +} + +std::tuple +NerfBasedLocalizer::localize( + const geometry_msgs::msg::Pose & pose_msg, const sensor_msgs::msg::Image & image_msg) +{ + Timer timer; + timer.start(); + + // Get data of image_ptr + // Accessing header information + const std_msgs::msg::Header header = image_msg.header; + + // Accessing image properties + const uint32_t width = image_msg.width; + const uint32_t height = image_msg.height; + + RCLCPP_DEBUG_STREAM( + this->get_logger(), "Image received. width: " << width << ", height: " << height); + + // Accessing image data + torch::Tensor image_tensor = torch::tensor(image_msg.data); + image_tensor = image_tensor.view({height, width, 3}); + image_tensor = image_tensor.index({Slc(0, 850)}); + image_tensor = image_tensor.to(torch::kFloat32); + image_tensor /= 255.0; + image_tensor = image_tensor.flip(2); // BGR to RGB + image_tensor = + utils::resize_image(image_tensor, localizer_.infer_height(), localizer_.infer_width()); + image_tensor = image_tensor.to(torch::kCUDA); + + geometry_msgs::msg::PoseWithCovarianceStamped pose_camera; + try { + geometry_msgs::msg::TransformStamped transform = + tf_buffer_.lookupTransform("base_link", target_frame_, tf2::TimePointZero); + pose_camera.pose.pose = transform_pose(pose_msg, transform); + } catch (tf2::TransformException & ex) { + RCLCPP_WARN(this->get_logger(), "%s", ex.what()); + } + + const geometry_msgs::msg::Pose pose = pose_camera.pose.pose; + + Eigen::Quaternionf quat_in( + pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z); + Eigen::Matrix3f rot_in = quat_in.toRotationMatrix(); + + torch::Tensor initial_pose = torch::eye(4); + initial_pose[0][0] = rot_in(0, 0); + initial_pose[0][1] = rot_in(0, 1); + initial_pose[0][2] = rot_in(0, 2); + initial_pose[0][3] = pose.position.x; + initial_pose[1][0] = rot_in(1, 0); + initial_pose[1][1] = rot_in(1, 1); + initial_pose[1][2] = rot_in(1, 2); + initial_pose[1][3] = pose.position.y; + initial_pose[2][0] = rot_in(2, 0); + initial_pose[2][1] = rot_in(2, 1); + initial_pose[2][2] = rot_in(2, 2); + initial_pose[2][3] = pose.position.z; + initial_pose = initial_pose.to(torch::kCUDA); + initial_pose = initial_pose.to(torch::kFloat32); + RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_before:\n" << initial_pose); + + initial_pose = localizer_.camera2nerf(initial_pose); + + // run NeRF + torch::Tensor optimized_pose; + std::vector particles; + + if (optimization_mode_ == 0) { + const float noise_coeff = 1.0f; + particles = localizer_.optimize_pose_by_random_search( + initial_pose, image_tensor, particle_num_, noise_coeff); + optimized_pose = Localizer::calc_average_pose(particles); + } else { + std::vector optimized_poses = localizer_.optimize_pose_by_differential( + initial_pose, image_tensor, iteration_num_, learning_rate_); + optimized_pose = optimized_poses.back(); + } + + torch::Tensor nerf_image = localizer_.render_image(optimized_pose); + const float score = utils::calc_loss(nerf_image, image_tensor); + + RCLCPP_DEBUG_STREAM(this->get_logger(), "score = " << score); + + optimized_pose = localizer_.nerf2camera(optimized_pose); + + RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_after:\n" << optimized_pose); + + geometry_msgs::msg::Pose result_pose_camera; + result_pose_camera.position.x = optimized_pose[0][3].item(); + result_pose_camera.position.y = optimized_pose[1][3].item(); + result_pose_camera.position.z = optimized_pose[2][3].item(); + Eigen::Matrix3f rot_out; + rot_out << optimized_pose[0][0].item(), optimized_pose[0][1].item(), + optimized_pose[0][2].item(), optimized_pose[1][0].item(), + optimized_pose[1][1].item(), optimized_pose[1][2].item(), + optimized_pose[2][0].item(), optimized_pose[2][1].item(), + optimized_pose[2][2].item(); + Eigen::Quaternionf quat_out(rot_out); + result_pose_camera.orientation.x = quat_out.x(); + result_pose_camera.orientation.y = quat_out.y(); + result_pose_camera.orientation.z = quat_out.z(); + result_pose_camera.orientation.w = quat_out.w(); + + geometry_msgs::msg::Pose result_pose_base_link; + try { + geometry_msgs::msg::TransformStamped transform = + tf_buffer_.lookupTransform(target_frame_, "base_link", tf2::TimePointZero); + result_pose_base_link = transform_pose(result_pose_camera, transform); + } catch (tf2::TransformException & ex) { + RCLCPP_WARN(this->get_logger(), "%s", ex.what()); + } + + nerf_image = nerf_image * 255; + nerf_image = nerf_image.to(torch::kUInt8); + nerf_image = nerf_image.to(torch::kCPU); + nerf_image = nerf_image.contiguous(); + sensor_msgs::msg::Image nerf_image_msg; + nerf_image_msg.header = header; + nerf_image_msg.width = nerf_image.size(1); + nerf_image_msg.height = nerf_image.size(0); + nerf_image_msg.step = nerf_image.size(1) * 3; + nerf_image_msg.encoding = "rgb8"; + nerf_image_msg.data.resize(nerf_image.numel()); + std::copy( + nerf_image.data_ptr(), nerf_image.data_ptr() + nerf_image.numel(), + nerf_image_msg.data.begin()); + + std_msgs::msg::Float32 score_msg; + score_msg.data = score; + + geometry_msgs::msg::TransformStamped transform; + transform.transform.translation.x = result_pose_base_link.position.x; + transform.transform.translation.y = result_pose_base_link.position.y; + transform.transform.translation.z = result_pose_base_link.position.z; + transform.transform.rotation = result_pose_base_link.orientation; + transform.header = header; + transform.header.frame_id = map_frame_; + transform.child_frame_id = "nerf_base_link"; + tf2_broadcaster_.sendTransform(transform); + + RCLCPP_DEBUG_STREAM(get_logger(), "localize time: " << timer.elapsed_milli_seconds()); + + return std::make_tuple(result_pose_base_link, nerf_image_msg, score_msg); +} + +void NerfBasedLocalizer::service_trigger_node( + const std_srvs::srv::SetBool::Request::SharedPtr req, + std_srvs::srv::SetBool::Response::SharedPtr res) +{ + RCLCPP_DEBUG_STREAM( + this->get_logger(), "service_trigger " << req->data << " is arrived to NerfBasedLocalizer."); + + is_activated_ = req->data; + if (is_activated_) { + initial_pose_msg_ptr_array_.clear(); + image_msg_ptr_array_.clear(); + } + res->success = true; +} diff --git a/localization/nerf_based_localizer/src/nerf_based_localizer.hpp b/localization/nerf_based_localizer/src/nerf_based_localizer.hpp new file mode 100644 index 0000000000000..0dc3a52dfc70e --- /dev/null +++ b/localization/nerf_based_localizer/src/nerf_based_localizer.hpp @@ -0,0 +1,101 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef NERF_BASED_LOCALIZER_HPP_ +#define NERF_BASED_LOCALIZER_HPP_ + +#include "nerf/localizer.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class NerfBasedLocalizer : public rclcpp::Node +{ +public: + NerfBasedLocalizer( + const std::string & name_space = "", + const rclcpp::NodeOptions & options = rclcpp::NodeOptions()); + +private: + void callback_initial_pose( + const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr pose_conv_msg_ptr); + void callback_image(const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr); + void service( + const tier4_localization_msgs::srv::PoseWithCovarianceStamped::Request::SharedPtr req, + tier4_localization_msgs::srv::PoseWithCovarianceStamped::Response::SharedPtr res); + void service_trigger_node( + const std_srvs::srv::SetBool::Request::SharedPtr req, + std_srvs::srv::SetBool::Response::SharedPtr res); + + std::tuple localize( + const geometry_msgs::msg::Pose & pose_msg, const sensor_msgs::msg::Image & image_msg); + + // NerfBasedLocalizer subscribes to the following topics: + // (1) initial_pose_with_covariance [geometry_msgs::msg::PoseWithCovarianceStamped] + rclcpp::Subscription::SharedPtr + initial_pose_with_covariance_subscriber_; + // (2) image [sensor_msgs::msg::Image] + rclcpp::Subscription::SharedPtr image_subscriber_; + + // NerfBasedLocalizer publishes the following topics: + // (1) nerf_pose [geometry_msgs::msg::PoseStamped] + rclcpp::Publisher::SharedPtr nerf_pose_publisher_; + // (2) nerf_pose_with_covariance [geometry_msgs::msg::PoseWithCovarianceStamped] + rclcpp::Publisher::SharedPtr + nerf_pose_with_covariance_publisher_; + // (3) nerf_score [std_msgs::msg::Float32] + rclcpp::Publisher::SharedPtr nerf_score_publisher_; + // (4) nerf_image [sensor_msgs::msg::Image] + rclcpp::Publisher::SharedPtr nerf_image_publisher_; + + // tf + tf2_ros::Buffer tf_buffer_; + tf2_ros::TransformListener tf_listener_; + tf2_ros::TransformBroadcaster tf2_broadcaster_; + + // Service + rclcpp::Service::SharedPtr service_; + rclcpp::Service::SharedPtr service_trigger_node_; + + const std::string map_frame_; + std::string target_frame_; + const int particle_num_; + const double output_covariance_; + const int iteration_num_; + const float learning_rate_; + + // data deque + std::deque + initial_pose_msg_ptr_array_; + std::deque image_msg_ptr_array_; + + bool is_activated_; + + Localizer localizer_; + const int optimization_mode_; +}; + +#endif // NERF_BASED_LOCALIZER_HPP_ diff --git a/localization/nerf_based_localizer/training_tool/CMakeLists.txt b/localization/nerf_based_localizer/training_tool/CMakeLists.txt new file mode 100644 index 0000000000000..e7aae6e4cb34c --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.18) +project(nerf_training_tool + VERSION 0.1 + LANGUAGES CUDA CXX) + +include(CheckLanguage) +check_language(CUDA) + +set(CMAKE_CXX_STANDARD 17) + +# ############################################################################## +# CUDA compiler setup +# ############################################################################## +set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_EXTENSIONS OFF) +set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC) + +# Libtorch +set(CMAKE_PREFIX_PATH ${DCMAKE_PREFIX_PATH}$ ${CMAKE_SOURCE_DIR}/../external/libtorch/) +find_package(Torch REQUIRED PATHS ${CMAKE_SOURCE_DIR}/../external/libtorch NO_DEFAULT_PATH) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +link_directories(${CMAKE_SOURCE_DIR}/../external/libtorch/lib/) +include_directories(${TORCH_INCLUDE_DIRS}) + +# eigen +find_package(Eigen3 REQUIRED) +include_directories(SYSTEM ${EIGEN3_INCLUDE_DIR}) + +# OpenCV +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) + +file(GLOB_RECURSE SRC_ALL + src/*.cpp + src/*.cu + ../src/nerf/*.cpp + ../src/nerf/*.cu) + +set(ALL_LIBS ${TORCH_LIBRARIES} + ${TORCH_CUDA_LIBRARIES} + ${OpenCV_LIBS} + stdc++fs) + +# ------------------------------------ Add executables --------------------------------------- +add_executable(main ${SRC_ALL}) +target_link_libraries(main ${ALL_LIBS}) +set_property(TARGET main PROPERTY CXX_STANDARD 17) diff --git a/localization/nerf_based_localizer/training_tool/config/train_config.yaml b/localization/nerf_based_localizer/training_tool/config/train_config.yaml new file mode 100644 index 0000000000000..e98d3cdce2368 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/config/train_config.yaml @@ -0,0 +1,16 @@ +%YAML 1.2 +--- +train: + pts_batch_size: 262144 + end_iter: 20000 + report_freq: 200 + vis_freq: 2500 + ray_batch_size: 4096 + save_freq: 20000 + validate_freq: 100000 + learning_rate: 1e-2 + learning_rate_alpha: 1e-1 + learning_rate_warm_up_end_iter: 1000 + var_loss_weight: 1e-2 + var_loss_start: 5000 + var_loss_end: 10000 diff --git a/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh b/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh new file mode 100755 index 0000000000000..3e47824e36c71 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -eux + +ROOT_DIR=$(readlink -f $(dirname $0)/../) +TRAIN_RESULT_DIR=$(readlink -f $1) +DATASET_DIR=$(readlink -f $2) + +cd ${ROOT_DIR} +cmake . -B build +cmake --build build --config RelWithDebInfo -j8 + +cd ${ROOT_DIR}/build +./main test ${TRAIN_RESULT_DIR} ${DATASET_DIR} diff --git a/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh b/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh new file mode 100755 index 0000000000000..f68ab0ed151c1 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -eux + +ROOT_DIR=$(readlink -f $(dirname $0)/../) +TRAIN_RESULT_DIR=$(readlink -f $1) +DATASET_DIR=$(readlink -f $2) + +cd ${ROOT_DIR} +cmake . -B build +cmake --build build --config RelWithDebInfo -j8 + +cd ${ROOT_DIR}/build +rm -rf ${TRAIN_RESULT_DIR} +mkdir ${TRAIN_RESULT_DIR} +cp ${ROOT_DIR}/config/train_config.yaml ${TRAIN_RESULT_DIR}/ +./main train ${TRAIN_RESULT_DIR} ${DATASET_DIR} diff --git a/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py b/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py new file mode 100644 index 0000000000000..41c68564b54ca --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py @@ -0,0 +1,97 @@ +import argparse +import os + +import numpy as np +import pandas as pd +from scipy.spatial.transform import Rotation +import yaml + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("target_dir", type=str) + return parser.parse_args() + + +def load_camera_info_from_yaml(filename): + with open(filename, "r") as input_file: + camera_info_dict = yaml.safe_load(input_file) + camera_info_dict["D"] = np.array(camera_info_dict["D"]) + camera_info_dict["K"] = np.array(camera_info_dict["K"]).reshape((3, 3)) + camera_info_dict["R"] = np.array(camera_info_dict["R"]).reshape((3, 3)) + camera_info_dict["P"] = np.array(camera_info_dict["P"]).reshape((3, 4)) + return camera_info_dict + + +AXIS_CONVERT_MAT_W2N = np.array( + [[+1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, +1]], dtype=np.float64 +) + + +def generate_cams_meta(target_dir: str) -> None: + df_pose = pd.read_csv(f"{target_dir}/pose.tsv", sep="\t", index_col=0) + n = len(df_pose) + pose_xyz = df_pose[["x", "y", "z"]].values + pose_quat = df_pose[["qx", "qy", "qz", "qw"]].values + rotation_mat = Rotation.from_quat(pose_quat).as_matrix() + mat = np.tile(np.eye(4), (n, 1, 1)) + mat[:, 0:3, 0:3] = rotation_mat + mat[:, 0:3, 3:4] = pose_xyz.reshape((n, 3, 1)) + + # convert axis + mat = AXIS_CONVERT_MAT_W2N @ mat @ AXIS_CONVERT_MAT_W2N.T + mat = mat[:, 0:3, :] + mat = mat.reshape((n, 12)) + + # save camera meta + camera_info = load_camera_info_from_yaml(f"{target_dir}/camera_info.yaml") + k = camera_info["K"] + camera_param = np.tile(k, (n, 1, 1)) + camera_param = camera_param.reshape((n, 9)) + + dist_param = camera_info["D"][0:4] + dist_param = np.tile(dist_param, (n, 1)) + + data = np.concatenate([mat, camera_param, dist_param], axis=1) + + # save as tsv + df = pd.DataFrame( + data, + columns=[ + # mat + "R00", + "R01", + "R02", + "tx", + "R10", + "R11", + "R12", + "ty", + "R20", + "R21", + "R22", + "tz", + # camera_param + "p00", + "p01", + "p02", + "p10", + "p11", + "p12", + "p20", + "p21", + "p22", + # dist_param + "k1", + "k2", + "p1", + "p2", + ], + ) + df.to_csv(os.path.join(target_dir, "cams_meta.tsv"), sep="\t", index=False) + + +if __name__ == "__main__": + args = parse_args() + target_dir = args.target_dir + generate_cams_meta(target_dir) diff --git a/localization/nerf_based_localizer/training_tool/script/interpolate.py b/localization/nerf_based_localizer/training_tool/script/interpolate.py new file mode 100644 index 0000000000000..f455c30a2b920 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/interpolate.py @@ -0,0 +1,69 @@ +import numpy as np +import pandas as pd +from scipy.spatial.transform import Rotation +from scipy.spatial.transform import Slerp + + +def interpolate_pose_in_time(df: pd.DataFrame, target_timestamp_list: pd.Series) -> pd.DataFrame: + """interpolate a data frame df to match the timestamp indicated by target_timestamp_list + constraint + * df and target_timestamp_list must be sorted by timestamp + * df must have a wide interval of timestamps before and after covering target_timestamp_list + output + """ + POSITIONS_KEY = ["x", "y", "z"] + ORIENTATIONS_KEY = ["qw", "qx", "qy", "qz"] + result_df = pd.DataFrame(columns=df.columns) + target_index = 0 + df_index = 0 + while df_index < len(df) - 1 and target_index < len(target_timestamp_list): + curr_time = df.iloc[df_index]["timestamp"] + next_time = df.iloc[df_index + 1]["timestamp"] + target_time = target_timestamp_list[target_index] + + if not (curr_time <= target_time <= next_time): + df_index += 1 + continue + + curr_weight = (next_time - target_time) / (next_time - curr_time) + next_weight = 1.0 - curr_weight + + curr_position = df.iloc[df_index][POSITIONS_KEY] + next_position = df.iloc[df_index + 1][POSITIONS_KEY] + target_position = curr_position * curr_weight + next_position * next_weight + + curr_orientation = df.iloc[df_index][ORIENTATIONS_KEY] + next_orientation = df.iloc[df_index + 1][ORIENTATIONS_KEY] + curr_r = Rotation.from_quat(curr_orientation) + next_r = Rotation.from_quat(next_orientation) + slerp = Slerp([curr_time, next_time], Rotation.concatenate([curr_r, next_r])) + target_orientation = slerp([target_time]).as_quat()[0] + + target_row = df.iloc[df_index].copy() + target_row["timestamp"] = target_timestamp_list[target_index] + target_row[POSITIONS_KEY] = target_position + target_row[ORIENTATIONS_KEY] = target_orientation + result_df = result_df.append(target_row) + target_index += 1 + return result_df + + +if __name__ == "__main__": + # Test interpolate + df = pd.DataFrame(columns=["timestamp", "x", "y", "z", "qx", "qy", "qz", "qw"]) + + default_rot = Rotation.from_quat(np.array([0, 0, 0, 1])) + quat1 = default_rot.as_quat() + df.loc[0] = [0, 0, 0, 0, quat1[0], quat1[1], quat1[2], quat1[3]] + + r2 = Rotation.from_rotvec(np.array([0, 0, 1]) * (90 * (np.pi / 180))) + quat2 = (r2 * default_rot).as_quat() + df.loc[1] = [1, 0, 0, 0, quat2[0], quat2[1], quat2[2], quat2[3]] + + target_timestamp_list = pd.Series([0.5]) + + result_df = interpolate_pose_in_time(df, target_timestamp_list) + + print(result_df) + r_ans = Rotation.from_rotvec(np.array([0, 0, 1]) * (45 * (np.pi / 180))) + print(r_ans.as_quat()) diff --git a/localization/nerf_based_localizer/training_tool/script/prepare_data.py b/localization/nerf_based_localizer/training_tool/script/prepare_data.py new file mode 100644 index 0000000000000..1b22ec2711eb2 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/prepare_data.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys + +from convert_pose_tsv_to_cams_meta import generate_cams_meta +import cv2 +from cv_bridge import CvBridge +import geometry_msgs +from geometry_msgs.msg import Pose +from geometry_msgs.msg import PoseStamped +from geometry_msgs.msg import PoseWithCovarianceStamped +from interpolate import interpolate_pose_in_time +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from rclpy.serialization import deserialize_message +import rosbag2_py +from scipy.spatial.transform import Rotation +from sensor_msgs.msg import CameraInfo +from sensor_msgs.msg import CompressedImage +from sensor_msgs.msg import Image +from tf2_msgs.msg import TFMessage +from tf2_ros import Buffer +from util_camera_info import save_camera_info_to_yaml + + +def create_reader(input_bag_dir: str, storage_id: str): + storage_options = rosbag2_py.StorageOptions(uri=input_bag_dir, storage_id=storage_id) + converter_options = rosbag2_py.ConverterOptions( + input_serialization_format="cdr", output_serialization_format="cdr" + ) + reader = rosbag2_py.SequentialReader() + reader.open(storage_options, converter_options) + return reader, storage_options, converter_options + + +def transform_pose_base_link_2_camera(pose: Pose, target_frame: str, tf_buffer, time): + # get static transform + transform = tf_buffer.lookup_transform( + target_frame="base_link", source_frame=target_frame, time=time + ) + + # transform pose + R1: geometry_msgs.msg.Quaternion = transform.transform.rotation + R1: np.ndarray = Rotation.from_quat([R1.x, R1.y, R1.z, R1.w]).as_matrix() + t1: geometry_msgs.msg.Vector3 = transform.transform.translation + t1: np.ndarray = np.array([t1.x, t1.y, t1.z]) + + # pose + R2: geometry_msgs.msg.Quaternion = pose.orientation + R2: np.ndarray = Rotation.from_quat([R2.x, R2.y, R2.z, R2.w]).as_matrix() + t2: geometry_msgs.msg.Vector3 = pose.position + t2: np.ndarray = np.array([t2.x, t2.y, t2.z]) + + # transform + R: np.ndarray = np.dot(R2, R1) + t: np.ndarray = np.dot(R2, t1) + t2 + q: np.ndarray = Rotation.from_matrix(R).as_quat() + + # convert to geometry_msgs.msg.Pose + result_pose = Pose() + result_pose.orientation.x = q[0] + result_pose.orientation.y = q[1] + result_pose.orientation.z = q[2] + result_pose.orientation.w = q[3] + result_pose.position.x = t[0] + result_pose.position.y = t[1] + result_pose.position.z = t[2] + return result_pose + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("path_to_rosbag", type=str) + parser.add_argument("output_dir", type=str) + parser.add_argument("--skip", type=int, default=1) + parser.add_argument("--storage_id", type=str, default="sqlite3", choices=["mcap", "sqlite3"]) + parser.add_argument("--crop_height", type=int, default=850) + parser.add_argument("--use_cvt_color", action="store_true") + args = parser.parse_args() + + path_to_rosbag = args.path_to_rosbag + output_dir = args.output_dir + storage_id = args.storage_id + crop_height = args.crop_height + use_cvt_color = args.use_cvt_color + + target_frame = None + image_topic_name = "/sensing/camera/traffic_light/image_raw" + camera_info_topic_name = "/sensing/camera/traffic_light/camera_info" + pose_topic_name = "/localization/pose_twist_fusion_filter/biased_pose_with_covariance" + image_topic_type = Image() + pose_topic_type = PoseWithCovarianceStamped() + + reader, storage_options, converter_options = create_reader(path_to_rosbag, storage_id) + os.makedirs(output_dir, exist_ok=True) + + # save command + with open(f"{output_dir}/command.txt", "w") as f: + f.write(" ".join(["python3"] + sys.argv) + "\n") + + bridge = CvBridge() + tf_buffer = Buffer() + + index_images_all = 0 + skip = args.skip + prev_image = None + image_timestamp_list = list() + image_list = list() + columns = ["timestamp", "x", "y", "z", "qx", "qy", "qz", "qw"] + df_pose = pd.DataFrame(columns=columns) + while reader.has_next(): + (topic, data, t) = reader.read_next() + t /= 1e9 + if topic == image_topic_name: + image_msg = deserialize_message(data, image_topic_type) + if image_topic_type == Image(): + curr_image = bridge.imgmsg_to_cv2(image_msg, desired_encoding="passthrough") + elif image_topic_type == CompressedImage(): + curr_image = bridge.compressed_imgmsg_to_cv2( + image_msg, desired_encoding="passthrough" + ) + if use_cvt_color: + curr_image = cv2.cvtColor(curr_image, cv2.COLOR_BGR2RGB) + diff = 1 if prev_image is None else np.abs(prev_image - curr_image).sum() + prev_image = curr_image + index_images_all += 1 + if diff == 0 or index_images_all % skip != 0: + continue + target_frame = image_msg.header.frame_id + image_timestamp_list.append(t) + image_list.append(curr_image) + elif topic == pose_topic_name: + pose_msg = deserialize_message(data, pose_topic_type) + pose = ( + pose_msg.pose.pose + if pose_topic_type == PoseWithCovarianceStamped() + else pose_msg.pose + ) + if target_frame is None: + continue + try: + pose = transform_pose_base_link_2_camera( + pose, target_frame, tf_buffer, pose_msg.header.stamp + ) + except Exception as e: + print(e) + continue + df_pose.loc[len(df_pose)] = [ + t, + pose.position.x, + pose.position.y, + pose.position.z, + pose.orientation.x, + pose.orientation.y, + pose.orientation.z, + pose.orientation.w, + ] + elif topic == camera_info_topic_name: + camera_info = deserialize_message(data, CameraInfo()) + save_camera_info_to_yaml(camera_info, f"{output_dir}/camera_info.yaml") + elif topic == "/tf" or topic == "/tf_static": + is_static = topic == "/tf_static" + tf_msg = deserialize_message(data, TFMessage()) + for transform_stamped in tf_msg.transforms: + if is_static: + tf_buffer.set_transform_static(transform_stamped, "default_authority") + else: + tf_buffer.set_transform(transform_stamped, "default_authority") + + image_timestamp_list = np.array(image_timestamp_list) + image_list = np.array(image_list) + + min_pose_t = df_pose["timestamp"].min() + max_pose_t = df_pose["timestamp"].max() + ok_image_timestamp = (min_pose_t < image_timestamp_list) * (image_timestamp_list < max_pose_t) + image_timestamp_list = image_timestamp_list[ok_image_timestamp] + image_list = image_list[ok_image_timestamp] + + df_pose = interpolate_pose_in_time(df_pose, image_timestamp_list) + + os.makedirs(f"{output_dir}/images", exist_ok=True) + for i, image in enumerate(image_list): + save_path = f"{output_dir}/images/{i:08d}.png" + image = image[0:crop_height] + cv2.imwrite(save_path, image) + + df_pose.to_csv(f"{output_dir}/pose.tsv", index=True, sep="\t", float_format="%.12f") + + # plot all of trajectory + save_path = f"{output_dir}/plot_pose.png" + plt.plot(df_pose["x"], df_pose["y"]) + plt.xlabel("x") + plt.ylabel("y") + plt.axis("equal") + plt.savefig(save_path, bbox_inches="tight", pad_inches=0.05) + plt.close() + print(f"Saved to {save_path}") + + # generate camera meta + generate_cams_meta(output_dir) diff --git a/localization/nerf_based_localizer/training_tool/script/regression_test.sh b/localization/nerf_based_localizer/training_tool/script/regression_test.sh new file mode 100755 index 0000000000000..d36c8513d777d --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/regression_test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eux + +TRAIN_RESULT_DIR=$(readlink -f $1) +DATASET_PATH=$(readlink -f $2) +cd $(dirname $0) + +./build_and_exec_training.sh ${TRAIN_RESULT_DIR} ${DATASET_PATH} + +./build_and_exec_test.sh ${TRAIN_RESULT_DIR} ${DATASET_PATH} diff --git a/localization/nerf_based_localizer/training_tool/script/util_camera_info.py b/localization/nerf_based_localizer/training_tool/script/util_camera_info.py new file mode 100644 index 0000000000000..2f5ab0254dbbf --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/script/util_camera_info.py @@ -0,0 +1,46 @@ +import numpy as np +import yaml + + +def camera_info_to_dict(camera_info): + return { + "header": { + "stamp": { + "sec": camera_info.header.stamp.sec, + "nanosec": camera_info.header.stamp.nanosec, + }, + "frame_id": camera_info.header.frame_id, + }, + "height": camera_info.height, + "width": camera_info.width, + "distortion_model": camera_info.distortion_model, + "D": camera_info.d.tolist(), + "K": camera_info.k.tolist(), + "R": camera_info.r.tolist(), + "P": camera_info.p.tolist(), + "binning_x": camera_info.binning_x, + "binning_y": camera_info.binning_y, + "roi": { + "x_offset": camera_info.roi.x_offset, + "y_offset": camera_info.roi.y_offset, + "height": camera_info.roi.height, + "width": camera_info.roi.width, + "do_rectify": camera_info.roi.do_rectify, + }, + } + + +def save_camera_info_to_yaml(camera_info, filename): + camera_info_dict = camera_info_to_dict(camera_info) + with open(filename, "w") as outfile: + yaml.dump(camera_info_dict, outfile) + + +def load_camera_info_from_yaml(filename): + with open(filename, "r") as input_file: + camera_info_dict = yaml.safe_load(input_file) + camera_info_dict["D"] = np.array(camera_info_dict["D"]) + camera_info_dict["K"] = np.array(camera_info_dict["K"]).reshape((3, 3)) + camera_info_dict["R"] = np.array(camera_info_dict["R"]).reshape((3, 3)) + camera_info_dict["P"] = np.array(camera_info_dict["P"]).reshape((3, 4)) + return camera_info_dict diff --git a/localization/nerf_based_localizer/training_tool/src/infer.cpp b/localization/nerf_based_localizer/training_tool/src/infer.cpp new file mode 100644 index 0000000000000..0d3b93fa418c6 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/infer.cpp @@ -0,0 +1,117 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "../../src/nerf/localizer.hpp" +#include "../../src/nerf/stop_watch.hpp" +#include "../../src/nerf/utils.hpp" +#include "main_functions.hpp" + +#include +#include + +#include + +namespace fs = std::experimental::filesystem::v1; + +enum Dir { kUp, kUpRight, kRight, kDownRight, kDown, kDownLeft, kLeft, kUpLeft, kDirNum }; +constexpr int64_t kDx[kDirNum] = {0, 1, 1, 1, 0, -1, -1, -1}; +constexpr int64_t kDz[kDirNum] = {1, 1, 0, -1, -1, -1, 0, 1}; + +void infer(const std::string & train_result_dir, const std::string & dataset_dir) +{ + LocalizerParam param{}; + param.resize_factor = 32; + param.train_result_dir = train_result_dir; + Localizer core(param); + + constexpr int32_t iteration_num = 10; + + const std::string save_dir = train_result_dir + "/inference_result/"; + fs::create_directories(save_dir); + + Dataset dataset(dataset_dir); + + Timer timer, timer_local; + timer.start(); + + std::vector optimize_times; + + const float noise = 0.5f / core.radius(); + std::cout << "noise = " << noise << std::endl; + + for (int32_t i = 0; i < dataset.n_images; i++) { + std::cout << "\rTime " << static_cast(timer.elapsed_seconds()) << " " << i << "/" + << dataset.n_images << std::flush; + const std::string curr_dir = + (std::stringstream() << save_dir << "/" << std::setfill('0') << std::setw(4) << i << "/") + .str(); + fs::create_directories(curr_dir); + + torch::Tensor initial_pose = dataset.poses[i]; + torch::Tensor image_tensor = dataset.images[i]; + + image_tensor = utils::resize_image(image_tensor, core.infer_height(), core.infer_width()); + image_tensor = image_tensor.to(torch::kCUDA); + utils::write_image_tensor(curr_dir + "image_01_gt.png", image_tensor); + + std::ofstream ofs(curr_dir + "/position.tsv"); + ofs << std::fixed; + ofs << "name\tx\ty\tz\tscore" << std::endl; + auto output = [&](const std::string & name, const torch::Tensor & pose, float score) { + const torch::Tensor out = core.nerf2camera(pose); + ofs << name << "\t"; + ofs << out[0][3].item() << "\t"; + ofs << out[1][3].item() << "\t"; + ofs << out[2][3].item() << "\t"; + ofs << score << std::endl; + }; + + // Before noise + torch::Tensor nerf_image_before = core.render_image(initial_pose); + float score_before = utils::calc_loss(nerf_image_before, image_tensor); + utils::write_image_tensor(curr_dir + "image_02_before.png", nerf_image_before); + output("original", initial_pose, score_before); + + // Added noise + for (int32_t d = 0; d < kDirNum; d++) { + torch::Tensor curr_pose = initial_pose.clone(); + curr_pose[0][3] += noise * kDx[d]; + curr_pose[2][3] += noise * kDz[d]; + torch::Tensor nerf_image_noised = core.render_image(curr_pose); + float score_noised = utils::calc_loss(nerf_image_noised, image_tensor); + utils::write_image_tensor( + curr_dir + "image_03_noised" + std::to_string(d) + ".png", nerf_image_noised); + output("noised_" + std::to_string(d), curr_pose, score_noised); + + // Optimize + timer_local.start(); + std::vector optimized_poses = + core.optimize_pose_by_differential(curr_pose, image_tensor, iteration_num, 1e-4f); + optimize_times.push_back(timer_local.elapsed_seconds()); + for (int32_t itr = 0; itr < optimized_poses.size(); itr++) { + torch::Tensor optimized_pose = optimized_poses[itr]; + torch::Tensor nerf_image_after = core.render_image(optimized_pose); + float score_after = utils::calc_loss(nerf_image_after, image_tensor); + const std::string suffix = + (std::stringstream() << d << "_" << std::setfill('0') << std::setw(2) << itr).str(); + utils::write_image_tensor(curr_dir + "image_04_after_" + suffix + ".png", nerf_image_after); + output("optimized_" + suffix, optimized_pose, score_after); + } + } + } + + torch::Tensor optimize_time_tensor = torch::tensor(optimize_times, torch::kFloat); + + std::cout << "\nAverage Time = " << optimize_time_tensor.mean().item() << " sec" + << std::endl; +} diff --git a/localization/nerf_based_localizer/training_tool/src/main.cpp b/localization/nerf_based_localizer/training_tool/src/main.cpp new file mode 100644 index 0000000000000..24903f054fbe5 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/main.cpp @@ -0,0 +1,48 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "main_functions.hpp" +#include "train_manager.hpp" + +#include + +#include +#include + +int main(int argc, char * argv[]) +{ + torch::manual_seed(2022); + + if (argc < 2) { + std::cerr << "Please specify a train_result_dir path as command line argument." << std::endl; + std::cerr << "argc = " << argc << std::endl; + return 1; + } + + const std::string command = argv[1]; + const std::string train_result_dir = argv[2]; + const std::string dataset_dir = (argc >= 4 ? argv[3] : ""); + if (command == "train") { + auto train_manager = std::make_unique(train_result_dir, dataset_dir); + train_manager->train(); + } else if (command == "infer") { + infer(train_result_dir, dataset_dir); + } else if (command == "walk") { + walk(train_result_dir); + } else if (command == "test") { + test(train_result_dir, dataset_dir); + } else { + std::cerr << "Invalid command line argument : " << command << std::endl; + return 1; + } +} diff --git a/localization/nerf_based_localizer/training_tool/src/main_functions.hpp b/localization/nerf_based_localizer/training_tool/src/main_functions.hpp new file mode 100644 index 0000000000000..4e25b8c7e550b --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/main_functions.hpp @@ -0,0 +1,23 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MAIN_FUNCTIONS_HPP_ +#define MAIN_FUNCTIONS_HPP_ + +#include + +void walk(const std::string & train_result_dir); +void test(const std::string & train_result_dir, const std::string & dataset_dir); +void infer(const std::string & train_result_dir, const std::string & dataset_dir); + +#endif // MAIN_FUNCTIONS_HPP_ diff --git a/localization/nerf_based_localizer/training_tool/src/test.cpp b/localization/nerf_based_localizer/training_tool/src/test.cpp new file mode 100644 index 0000000000000..48a9b1ece5001 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/test.cpp @@ -0,0 +1,73 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "../../src/nerf/localizer.hpp" +#include "../../src/nerf/stop_watch.hpp" +#include "../../src/nerf/utils.hpp" +#include "main_functions.hpp" + +#include +#include + +#include + +namespace fs = std::experimental::filesystem::v1; + +void test(const std::string & train_result_dir, const std::string & dataset_dir) +{ + torch::NoGradGuard no_grad_guard; + LocalizerParam param; + param.train_result_dir = train_result_dir; + param.resize_factor = 8; + Localizer localizer(param); + + Dataset dataset(dataset_dir); + const std::string save_dir = train_result_dir + "/test_result/"; + fs::create_directories(save_dir); + + Timer timer; + + float score_sum = 0.0f; + float time_sum = 0.0f; + + for (int32_t i = 0; i < dataset.n_images; i++) { + torch::Tensor initial_pose = dataset.poses[i]; + torch::Tensor image_tensor = dataset.images[i]; + + image_tensor = + utils::resize_image(image_tensor, localizer.infer_height(), localizer.infer_width()); + + timer.start(); + torch::Tensor nerf_image = localizer.render_image(initial_pose).cpu(); + time_sum += timer.elapsed_seconds(); + torch::Tensor diff = nerf_image - image_tensor; + torch::Tensor loss = (diff * diff).mean(-1).sum(); + torch::Tensor score = (localizer.infer_height() * localizer.infer_width()) / (loss + 1e-6f); + + std::cout << "\rscore[" << i << "] = " << score.item() << std::flush; + score_sum += score.item(); + + std::stringstream ss; + ss << save_dir << std::setfill('0') << std::setw(8) << i << ".png"; + utils::write_image_tensor(ss.str(), nerf_image); + } + + const float average_time = time_sum / dataset.n_images; + const float average_score = score_sum / dataset.n_images; + + std::ofstream summary(train_result_dir + "/summary.tsv"); + summary << std::fixed; + summary << "average_time\taverage_score" << std::endl; + summary << average_time << "\t" << average_score << std::endl; + std::cout << "\ntime = " << average_time << ", score = " << average_score << std::endl; +} diff --git a/localization/nerf_based_localizer/training_tool/src/train_manager.cpp b/localization/nerf_based_localizer/training_tool/src/train_manager.cpp new file mode 100644 index 0000000000000..2d387dd0709aa --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/train_manager.cpp @@ -0,0 +1,195 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/ExpRunner.cpp +// +// Created by ppwang on 2022/5/6. +// + +#include "train_manager.hpp" + +#include "../../src/nerf/CustomOps/CustomOps.hpp" +#include "../../src/nerf/stop_watch.hpp" +#include "../../src/nerf/utils.hpp" + +#include +#include + +#include + +#include + +namespace fs = std::experimental::filesystem::v1; +using Tensor = torch::Tensor; + +TrainManager::TrainManager(const std::string & train_result_dir, const std::string & dataset_dir) +{ + const std::string conf_path = train_result_dir + "/train_config.yaml"; + fs::path p(conf_path); + fs::path canonical_path = fs::canonical(p); + const std::string path = canonical_path.string(); + cv::FileStorage fs(path, cv::FileStorage::READ); + if (!fs.isOpened()) { + throw std::runtime_error("Failed to open " + conf_path); + } + + train_result_dir_ = train_result_dir; + fs::create_directories(train_result_dir_); + + const cv::FileNode train_config = fs["train"]; + pts_batch_size_ = (int)train_config["pts_batch_size"]; + end_iter_ = (int)train_config["end_iter"]; + vis_freq_ = (int)train_config["vis_freq"]; + ray_batch_size_ = (int)train_config["ray_batch_size"]; + report_freq_ = (int)train_config["report_freq"]; + save_freq_ = (int)train_config["save_freq"]; + learning_rate_ = (float)train_config["learning_rate"]; + learning_rate_alpha_ = (float)train_config["learning_rate_alpha"]; + learning_rate_warm_up_end_iter_ = (int)train_config["learning_rate_warm_up_end_iter"]; + var_loss_weight_ = (float)train_config["var_loss_weight"]; + var_loss_start_ = (int)train_config["var_loss_start"]; + var_loss_end_ = (int)train_config["var_loss_end"]; + + // Dataset + dataset_ = std::make_shared(dataset_dir); + dataset_->save_inference_params(train_result_dir_); + + // Renderer + renderer_ = std::make_shared(dataset_->n_images); + renderer_->to(torch::kCUDA); + + // Optimizer + optimizer_ = std::make_shared(renderer_->optim_param_groups(learning_rate_)); +} + +void TrainManager::train() +{ + std::ofstream ofs_log(train_result_dir_ + "/train_log.txt"); + + Timer timer; + timer.start(); + + float psnr_smooth = -1.0; + update_ada_params(); + + for (; iter_step_ < end_iter_;) { + constexpr float sampled_pts_per_ray_ = 512.f; + int cur_batch_size = int(pts_batch_size_ / sampled_pts_per_ray_) >> 4 << 4; + auto [train_rays, gt_colors, emb_idx] = dataset_->sample_random_rays(cur_batch_size); + + Tensor & rays_o = train_rays.origins; + Tensor & rays_d = train_rays.dirs; + + auto render_result = renderer_->render(rays_o, rays_d, emb_idx, RunningMode::TRAIN); + Tensor pred_colors = render_result.colors.index({Slc(0, cur_batch_size)}); + Tensor color_loss = torch::sqrt((pred_colors - gt_colors).square() + 1e-4f).mean(); + + Tensor sampled_weights = render_result.weights; + Tensor idx_start_end = render_result.idx_start_end; + Tensor sampled_var = CustomOps::WeightVar(sampled_weights, idx_start_end); + Tensor var_loss = (sampled_var + 1e-2).sqrt().mean(); + + float var_loss_weight = 0.f; + if (iter_step_ > var_loss_end_) { + var_loss_weight = var_loss_weight_; + } else if (iter_step_ > var_loss_start_) { + var_loss_weight = float(iter_step_ - var_loss_start_) / + float(var_loss_end_ - var_loss_start_) * var_loss_weight_; + } + + Tensor loss = color_loss + var_loss * var_loss_weight; + + float mse = (pred_colors - gt_colors).square().mean().item(); + float psnr = 20.f * std::log10(1 / std::sqrt(mse)); + psnr_smooth = psnr_smooth < 0.f ? psnr : psnr * .1f + psnr_smooth * .9f; + CHECK(!std::isnan(pred_colors.mean().item())); + CHECK(!std::isnan(gt_colors.mean().item())); + CHECK(!std::isnan(mse)); + + // There can be some cases that the output colors have no grad due to the occupancy grid. + if (loss.requires_grad()) { + optimizer_->zero_grad(); + loss.backward(); + optimizer_->step(); + } + + iter_step_++; + + if (iter_step_ % vis_freq_ == 0) { + torch::NoGradGuard no_grad_guard; + + const int idx = 0; + const int H = dataset_->height; + const int W = dataset_->width; + + auto [pred_colors, pred_depths] = renderer_->render_image( + dataset_->poses[idx], dataset_->intrinsics[idx], H, W, ray_batch_size_); + + Tensor image = dataset_->images[idx].reshape({H, W, 3}).to(torch::kCPU); + pred_colors = pred_colors.to(torch::kCPU); + pred_depths = pred_depths.to(torch::kCPU); + + Tensor concat_tensor = torch::cat({image, pred_colors, pred_depths}, 1); + fs::create_directories(train_result_dir_ + "/images"); + std::stringstream ss; + ss << std::setw(8) << std::setfill('0') << iter_step_ << "_" << idx << ".png"; + utils::write_image_tensor(train_result_dir_ + "/images/" + ss.str(), concat_tensor); + } + + if (iter_step_ % save_freq_ == 0) { + fs::remove_all(train_result_dir_ + "/checkpoints/latest"); + fs::create_directories(train_result_dir_ + "/checkpoints/latest"); + torch::save(renderer_, train_result_dir_ + "/checkpoints/latest/renderer.pt"); + } + + if (iter_step_ % report_freq_ == 0) { + const int64_t total_sec = timer.elapsed_seconds(); + const int64_t total_m = total_sec / 60; + const int64_t total_s = total_sec % 60; + std::stringstream ss; + ss << std::fixed; + ss << "Time: " << std::setw(2) << std::setfill('0') << total_m << ":" << std::setw(2) + << std::setfill('0') << total_s << " "; + ss << "Iter: " << std::setw(6) << iter_step_ << " "; + ss << "PSNR: " << psnr_smooth << " "; + ss << "LOSS: " << color_loss.item() << " "; + ss << "LR: " << optimizer_->param_groups()[0].options().get_lr(); + const std::string log_str = ss.str(); + std::cout << log_str << std::endl; + ofs_log << log_str << std::endl; + } + update_ada_params(); + } + + std::cout << "Train done" << std::endl; +} + +void TrainManager::update_ada_params() +{ + // Update learning rate + float lr_factor; + if (iter_step_ >= learning_rate_warm_up_end_iter_) { + float progress = float(iter_step_ - learning_rate_warm_up_end_iter_) / + float(end_iter_ - learning_rate_warm_up_end_iter_); + lr_factor = (1.f - learning_rate_alpha_) * (std::cos(progress * float(M_PI)) * .5f + .5f) + + learning_rate_alpha_; + } else { + lr_factor = float(iter_step_) / float(learning_rate_warm_up_end_iter_); + } + float lr = learning_rate_ * lr_factor; + for (auto & g : optimizer_->param_groups()) { + g.options().set_lr(lr); + } +} diff --git a/localization/nerf_based_localizer/training_tool/src/train_manager.hpp b/localization/nerf_based_localizer/training_tool/src/train_manager.hpp new file mode 100644 index 0000000000000..f6a589c91ce26 --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/train_manager.hpp @@ -0,0 +1,61 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is derived from the following file. +// https://github.com/Totoro97/f2-nerf/blob/main/src/ExpRunner.h +// +// Created by ppwang on 2022/5/6. +// + +#ifndef TRAIN_MANAGER_HPP_ +#define TRAIN_MANAGER_HPP_ + +#include "../../src/nerf/dataset.hpp" +#include "../../src/nerf/renderer.hpp" + +#include + +#include +#include +#include + +class TrainManager +{ + using Tensor = torch::Tensor; + +public: + TrainManager(const std::string & train_result_dir, const std::string & dataset_dir); + + void train(); + + void update_ada_params(); + + // data + std::string train_result_dir_; + + unsigned iter_step_ = 0; + unsigned end_iter_; + unsigned report_freq_, vis_freq_, save_freq_; + unsigned pts_batch_size_, ray_batch_size_; + + int var_loss_start_, var_loss_end_; + float learning_rate_, learning_rate_alpha_, learning_rate_warm_up_end_iter_; + float var_loss_weight_; + + std::shared_ptr dataset_; + std::shared_ptr renderer_; + std::shared_ptr optimizer_; +}; + +#endif // TRAIN_MANAGER_HPP_ diff --git a/localization/nerf_based_localizer/training_tool/src/walk.cpp b/localization/nerf_based_localizer/training_tool/src/walk.cpp new file mode 100644 index 0000000000000..4ed5598aed45b --- /dev/null +++ b/localization/nerf_based_localizer/training_tool/src/walk.cpp @@ -0,0 +1,145 @@ +// Copyright 2023 Autoware Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "../../src/nerf/localizer.hpp" +#include "../../src/nerf/utils.hpp" +#include "main_functions.hpp" + +#include +#include +#include +#include + +// Disable checking because of too many give errors +// cspell:disable + +int kbhit(void) +{ + struct termios oldt, newt; + int ch; + int oldf; + + tcgetattr(STDIN_FILENO, &oldt); + newt = oldt; + newt.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &newt); + oldf = fcntl(STDIN_FILENO, F_GETFL, 0); + fcntl(STDIN_FILENO, F_SETFL, oldf | O_NONBLOCK); + + ch = getchar(); + + tcsetattr(STDIN_FILENO, TCSANOW, &oldt); + fcntl(STDIN_FILENO, F_SETFL, oldf); + + if (ch != EOF) { + ungetc(ch, stdin); + return 1; + } + + return 0; +} + +torch::Tensor calc_rotation_tensor(float degree, Eigen::Vector3f axis) +{ + const float theta = degree * M_PI / 180.0; + Eigen::AngleAxisf a(theta, axis); + Eigen::Matrix3f rotation_matrix(a); + torch::Tensor result = + torch::from_blob(rotation_matrix.data(), {3, 3}).to(torch::kFloat32).cuda(); + return result; +} + +void render(Localizer & localizer, torch::Tensor pose) +{ + std::cout << "pose:\n" << pose << std::endl; + torch::Tensor pose_camera = localizer.camera2nerf(pose); + torch::Tensor image = localizer.render_image(pose_camera); + utils::write_image_tensor("image.png", image); + std::cout << "Move by WASD, E:Up, Q:Down, J:YAW+, K:PITCH+, L:YAW-, I:PITCH-, O:ROll+, U:ROll-" + << std::endl; +} + +void walk(const std::string & train_result_dir) +{ + torch::NoGradGuard no_grad_guard; + LocalizerParam param; + param.train_result_dir = train_result_dir; + param.resize_factor = 8; + Localizer localizer(param); + torch::Tensor pose = torch::eye(4).cuda(); + float step = 0.2; + constexpr float degree = 10.0; + + render(localizer, pose); + + while (1) { + if (kbhit()) { + const char pushed_key = getchar(); + printf("pushed '%c'\n", pushed_key); + torch::Tensor orientation = pose.index({Slc(0, 3), Slc(0, 3)}); + if (pushed_key == 'w') { + torch::Tensor tmp = torch::tensor({step, 0.0f, 0.0f}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 'a') { + torch::Tensor tmp = torch::tensor({0.0f, step, 0.0f}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 'd') { + torch::Tensor tmp = torch::tensor({0.0f, -step, 0.0f}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 's') { + torch::Tensor tmp = torch::tensor({-step, 0.0f, 0.0f}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 'e') { + torch::Tensor tmp = torch::tensor({0.0f, 0.0f, step}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 'q') { + torch::Tensor tmp = torch::tensor({0.0f, 0.0f, -step}, torch::kFloat32).view({3, 1}).cuda(); + pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp); + } else if (pushed_key == 'j') { + torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitZ()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'k') { + torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitY()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'l') { + torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitZ()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'i') { + torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitY()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'o') { + torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitX()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'u') { + torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitX()); + orientation = torch::matmul(orientation, rotation_matrix); + pose.index({Slc(0, 3), Slc(0, 3)}) = orientation; + } else if (pushed_key == 'x') { + step += 0.1; + std::cout << "step = " << step << std::endl; + } else if (pushed_key == 'z') { + step -= 0.1; + std::cout << "step = " << step << std::endl; + } else { + std::cout << "Unknown kye: " << pushed_key << std::endl; + continue; + } + render(localizer, pose); + } + } +} diff --git a/localization/pose_initializer/CMakeLists.txt b/localization/pose_initializer/CMakeLists.txt index 324488bedebfc..fe3ed6afab8df 100644 --- a/localization/pose_initializer/CMakeLists.txt +++ b/localization/pose_initializer/CMakeLists.txt @@ -7,6 +7,7 @@ autoware_package() ament_auto_add_library(${PROJECT_NAME} SHARED src/pose_initializer/pose_initializer_core.cpp src/pose_initializer/gnss_module.cpp + src/pose_initializer/nerf_module.cpp src/pose_initializer/localization_module.cpp src/pose_initializer/stop_check_module.cpp src/pose_initializer/ekf_localization_trigger_module.cpp diff --git a/localization/pose_initializer/config/pose_initializer.param.yaml b/localization/pose_initializer/config/pose_initializer.param.yaml index 90ec78257e6cb..62ecaa7dd16a1 100644 --- a/localization/pose_initializer/config/pose_initializer.param.yaml +++ b/localization/pose_initializer/config/pose_initializer.param.yaml @@ -10,6 +10,7 @@ gnss_enabled: $(var gnss_enabled) yabloc_enabled: $(var yabloc_enabled) ndt_enabled: $(var ndt_enabled) + nerf_enabled: $(var nerf_enabled) stop_check_enabled: $(var stop_check_enabled) # from gnss diff --git a/localization/pose_initializer/launch/pose_initializer.launch.xml b/localization/pose_initializer/launch/pose_initializer.launch.xml index 2ffdebf39c474..4e8677cd0e998 100644 --- a/localization/pose_initializer/launch/pose_initializer.launch.xml +++ b/localization/pose_initializer/launch/pose_initializer.launch.xml @@ -13,6 +13,7 @@ + diff --git a/localization/pose_initializer/src/pose_initializer/nerf_module.cpp b/localization/pose_initializer/src/pose_initializer/nerf_module.cpp new file mode 100644 index 0000000000000..c5e6d37abe1f0 --- /dev/null +++ b/localization/pose_initializer/src/pose_initializer/nerf_module.cpp @@ -0,0 +1,50 @@ +// Copyright 2022 The Autoware Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "nerf_module.hpp" + +#include +#include + +#include + +using ServiceException = component_interface_utils::ServiceException; +using Initialize = localization_interface::Initialize; +using PoseWithCovarianceStamped = geometry_msgs::msg::PoseWithCovarianceStamped; + +NeRFModule::NeRFModule(rclcpp::Node * node) : logger_(node->get_logger()) +{ + cli_align_ = node->create_client("nerf_align"); +} + +PoseWithCovarianceStamped NeRFModule::align_pose(const PoseWithCovarianceStamped & pose) +{ + const auto req = std::make_shared(); + req->pose_with_covariance = pose; + + if (!cli_align_->service_is_ready()) { + throw component_interface_utils::ServiceUnready("NeRF align server is not ready."); + } + + RCLCPP_INFO(logger_, "Call NeRF align server."); + const auto res = cli_align_->async_send_request(req).get(); + if (!res->success) { + throw ServiceException( + Initialize::Service::Response::ERROR_ESTIMATION, "NeRF align server failed."); + } + RCLCPP_INFO(logger_, "NeRF align server succeeded."); + + // Overwrite the covariance. + return res->pose_with_covariance; +} diff --git a/localization/pose_initializer/src/pose_initializer/nerf_module.hpp b/localization/pose_initializer/src/pose_initializer/nerf_module.hpp new file mode 100644 index 0000000000000..18142a0b23fab --- /dev/null +++ b/localization/pose_initializer/src/pose_initializer/nerf_module.hpp @@ -0,0 +1,38 @@ +// Copyright 2022 The Autoware Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef POSE_INITIALIZER__NERF_MODULE_HPP_ +#define POSE_INITIALIZER__NERF_MODULE_HPP_ + +#include + +#include +#include + +class NeRFModule +{ +private: + using PoseWithCovarianceStamped = geometry_msgs::msg::PoseWithCovarianceStamped; + using RequestPoseAlignment = tier4_localization_msgs::srv::PoseWithCovarianceStamped; + +public: + explicit NeRFModule(rclcpp::Node * node); + PoseWithCovarianceStamped align_pose(const PoseWithCovarianceStamped & pose); + +private: + rclcpp::Logger logger_; + rclcpp::Client::SharedPtr cli_align_; +}; + +#endif // POSE_INITIALIZER__NERF_MODULE_HPP_ diff --git a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp index 6e023a1309c2d..e7f8d01f2c0c3 100644 --- a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp +++ b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp @@ -19,6 +19,7 @@ #include "gnss_module.hpp" #include "localization_module.hpp" #include "ndt_localization_trigger_module.hpp" +#include "nerf_module.hpp" #include "stop_check_module.hpp" #include @@ -50,6 +51,9 @@ PoseInitializer::PoseInitializer(const rclcpp::NodeOptions & options) ndt_ = std::make_unique(this, "ndt_align"); ndt_localization_trigger_ = std::make_unique(this); } + if (declare_parameter("nerf_enabled", false)) { + nerf_ = std::make_unique(this); + } if (declare_parameter("stop_check_enabled")) { // Add 1.0 sec margin for twist buffer. stop_check_duration_ = declare_parameter("stop_check_duration"); @@ -157,6 +161,8 @@ void PoseInitializer::on_initialize( // If both the NDT and YabLoc initializer are enabled, prioritize NDT as it offers more // accuracy pose. pose = yabloc_->align_pose(pose); + } else if (nerf_) { + pose = nerf_->align_pose(pose); } pose.pose.covariance = output_pose_covariance_; pub_reset_->publish(pose); diff --git a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp index 0a9a0614f6d20..8acfd4b784f2a 100644 --- a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp +++ b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp @@ -29,6 +29,7 @@ class LocalizationModule; class GnssModule; class EkfLocalizationTriggerModule; class NdtLocalizationTriggerModule; +class NeRFModule; class PoseInitializer : public rclcpp::Node { @@ -49,6 +50,7 @@ class PoseInitializer : public rclcpp::Node std::array output_pose_covariance_{}; std::array gnss_particle_covariance_{}; std::unique_ptr gnss_; + std::unique_ptr nerf_; std::unique_ptr ndt_; std::unique_ptr yabloc_; std::unique_ptr stop_check_;