diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml
new file mode 100644
index 0000000000000..a557b8b47db10
--- /dev/null
+++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
index fa6bce0e38e55..05d0d1beae8e1 100644
--- a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
+++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
@@ -9,7 +9,7 @@
-
+
@@ -18,6 +18,7 @@
+
@@ -91,6 +92,12 @@
+
+
+
+
+
+
@@ -116,6 +123,7 @@
+
diff --git a/launch/tier4_localization_launch/package.xml b/launch/tier4_localization_launch/package.xml
index b3dc75bbf79cc..69c951db79b58 100644
--- a/launch/tier4_localization_launch/package.xml
+++ b/launch/tier4_localization_launch/package.xml
@@ -27,6 +27,7 @@
geo_pose_projector
gyro_odometer
ndt_scan_matcher
+ nerf_based_localizer
pose_estimator_arbiter
pose_initializer
pose_instability_detector
diff --git a/localization/nerf_based_localizer/CMakeLists.txt b/localization/nerf_based_localizer/CMakeLists.txt
new file mode 100644
index 0000000000000..5cabbb500575f
--- /dev/null
+++ b/localization/nerf_based_localizer/CMakeLists.txt
@@ -0,0 +1,51 @@
+cmake_minimum_required(VERSION 3.14)
+project(nerf_based_localizer)
+
+find_package(ament_cmake_auto REQUIRED)
+ament_auto_find_build_dependencies()
+
+find_package(CUDA REQUIRED)
+find_package(CUDAToolkit REQUIRED)
+
+if(NOT ${CUDA_FOUND})
+ message(WARNING "cuda is not found, so the nerf_based_localizer package won't be built.")
+ return()
+endif()
+
+# libtorch
+set(CMAKE_PREFIX_PATH ${DCMAKE_PREFIX_PATH}$ ${CMAKE_SOURCE_DIR}/external/libtorch/)
+find_package(Torch PATHS ${CMAKE_SOURCE_DIR}/external/libtorch NO_DEFAULT_PATH)
+
+# skip if libtorch is not found
+if(NOT Torch_FOUND)
+ message(WARNING "libtorch is not found, so the nerf_based_localizer package won't be built.")
+ return()
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -w")
+link_directories(${CMAKE_SOURCE_DIR}/external/libtorch/lib/)
+include_directories(${TORCH_INCLUDE_DIRS})
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+link_directories(${OpenCV_LIBRARIES})
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+file(GLOB_RECURSE SRC_ALL
+ src/*.cpp
+ src/*.cu)
+
+ament_auto_add_executable(nerf_based_localizer ${SRC_ALL})
+ament_export_dependencies(CUDA)
+target_link_libraries(nerf_based_localizer
+ ${TORCH_LIBRARIES}
+ ${TORCH_CUDA_LIBRARIES}
+ ${OpenCV_LIBS}
+ stdc++fs
+)
+
+ament_auto_package(
+ INSTALL_TO_SHARE
+ launch
+ config
+)
diff --git a/localization/nerf_based_localizer/ORIGINAL_LICENSE b/localization/nerf_based_localizer/ORIGINAL_LICENSE
new file mode 100644
index 0000000000000..f2473c3b78569
--- /dev/null
+++ b/localization/nerf_based_localizer/ORIGINAL_LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [2023] [Peng Wang]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/localization/nerf_based_localizer/README.md b/localization/nerf_based_localizer/README.md
new file mode 100644
index 0000000000000..1d47df61ffc92
--- /dev/null
+++ b/localization/nerf_based_localizer/README.md
@@ -0,0 +1,131 @@
+# NeRF Based Localizer
+
+NeRFBasedLocalizer is a vision-based localization package.
+
+![example_of_result](./doc_image/example_of_result.png)
+
+## Node diagram
+
+![node diagram](./doc_image/node_diagram.drawio.svg)
+
+## Inputs / Outputs
+
+### Input
+
+| Name | Type | Description |
+| :-------------- | :---------------------------------------------- | :------------------------------- |
+| `~/input/pose` | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction. |
+| `~/input/image` | `sensor_msgs::msg::Image` | Camera Image |
+
+### Output
+
+| Name | Type | Description |
+| :------------------------------ | :---------------------------------------------- | :----------------------------- |
+| `~/output/pose` | `geometry_msgs::msg::PoseStamped` | estimated pose |
+| `~/output/pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | estimated pose with covariance |
+| `~/output/score` | `std_msgs::msg::Float32` | estimated score of nerf |
+| `~/output/image` | `sensor_msgs::msg::Image` | estimated image of nerf |
+
+## How to build
+
+Download libtorch and extract it to the `nerf_based_localizer/external` directory.
+
+For example,
+
+```bash
+cd nerf_based_localizer/external
+
+# v1.13.1
+wget https://download.pytorch.org/libtorch/cu117/libtorch-cxx11-abi-shared-with-deps-1.13.1%2Bcu117.zip
+unzip ./libtorch-cxx11-abi-shared-with-deps-1.13.1+cu117.zip
+
+# v2.1.2
+wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcu121.zip
+unzip ./libtorch-cxx11-abi-shared-with-deps-2.1.2+cu121.zip
+```
+
+If libtorch is prepared, `nerf_based_localizer` can be built as a normal package.
+Otherwise, building `nerf_based_localizer` will be skipped.
+
+## How to launch
+
+Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`.
+
+When launching Autoware, set `nerf` for `pose_source`.
+
+```bash
+ros2 launch autoware_launch ... \
+ pose_source:=nerf \
+ ...
+```
+
+For example, to run `logging_simulator`, the command is as follows.
+
+```bash
+ros2 launch autoware_launch logging_simulator.launch.xml \
+ map_path:=/path/to/map \
+ pose_source:=nerf \
+ vehicle_model:=sample_vehicle \
+ sensor_model:=awsim_sensor_kit \
+ perception:=false \
+ planning:=false \
+ control:=false
+```
+
+[This trained weights](https://drive.google.com/file/d/1w4hLw7aJ_o6OM8XCCXyNPZTGIy4ah9aZ/view?usp=sharing) and [this rosbag data](https://drive.google.com/file/d/1uMVwQQFcfs8JOqfoA1FqfH_fLPwQ71jK/view) can be used as sample data.
+
+## How to train
+
+### Prepare training data
+
+Use `prepare_data.py`.
+
+```bash
+python3 prepare_data.py /path/to/rosbag /path/to/prepared_data/
+```
+
+The rosbag must contain the following topics.
+
+| Topic name | Message type | Description |
+| :------------------------------------------------------------------- | :---------------------------------------------- | :------------------------------ |
+| `/tf_static` | `tf2_msgs::msg::TFMessage` | tf_static |
+| `/localization/pose_twist_fusion_filter/biased_pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction |
+| `/sensing/camera/traffic_light/image_raw` | `sensor_msgs::msg::Image` | Camera Image |
+| `/sensing/camera/traffic_light/camera_info` | `sensor_msgs::msg::CameraInfo` | Camera Info |
+
+For example, the following rosbag obtained from AWSIM can be used.
+
+
+
+### Execute training
+
+Use training_tool.
+
+```bash
+cd nerf_based_localizer/training_tool/script
+./build_and_exec_training.sh /path/to/result_dir/ /path/to/prepared_data/
+```
+
+Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`.
+
+## Principle
+
+[NeRF](https://www.matthewtancik.com/nerf), standing for Neural Radiance Fields, presents a novel approach to synthesize novel views of a scene by leveraging the power of neural networks. It was introduced with an aim to handle the challenges of view synthesis, which includes creating novel, previously unseen views of a 3D scene given a sparse set of input photographs.
+
+Training Phase: The model is trained with a set of 2D images of a 3D scene and their corresponding camera parameters (position and orientation). The neural network learns to predict the color and transparency of rays cast through the scene, effectively learning a representation of the 3D scene. The objective is to minimize the difference between the rendered images and the input images.
+
+Inference Phase: After training, NeRF synthesizes novel views of the scene by sampling and summing the colors of volumetric points along the rays cast from a new camera viewpoint. The synthesized images exhibit high-quality view synthesis even under significant viewpoint changes.
+
+### Application in Localization
+
+Implementing NeRF for localization involves utilizing the learned 3D scene representation to estimate the position and orientation of a camera (or observer) in the scene. By comparing the synthesized views and the actual camera view, the algorithm iteratively refines the estimated camera parameters to minimize the difference between the rendered and actual views.
+
+This approach unlocks the potential to achieve accurate and robust self-localization in various environments, allowing devices and robots to comprehend their position and orientation within a previously learned 3D space.
+
+## Acknowledgement
+
+The code for this package is based on [F2-NeRF](https://github.com/Totoro97/f2-nerf) with significant code changes.
+
+The license of F2-NeRF is Apache License 2.0: .
+
+The license file is also copied to this directory as [ORIGINAL_LICENSE](./ORIGINAL_LICENSE).
diff --git a/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml
new file mode 100644
index 0000000000000..eedf165cac55a
--- /dev/null
+++ b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml
@@ -0,0 +1,23 @@
+/**:
+ ros__parameters:
+ train_result_dir: "/path/to/train_result_dir"
+ optimization_mode: 1 # 0: random_search, 1: differential
+
+ # if optimization_mode is 0, the following parameters are used
+ particle_num: 50
+ render_pixel_num: 128
+ noise_position_x: 0.10 # [m]
+ noise_position_y: 0.10 # [m]
+ noise_position_z: 0.05 # [m]
+ noise_rotation_x: 0.05 # [deg]
+ noise_rotation_y: 0.05 # [deg]
+ noise_rotation_z: 0.10 # [deg]
+
+ # if optimization_mode is 1, the following parameters are used
+ iteration_num: 1
+ learning_rate: 0.0001
+
+ # The following parameters are used in both optimization_mode 0 and 1
+ output_covariance: 0.000225
+ resize_factor: 20
+ sample_num_per_ray: 1024
diff --git a/localization/nerf_based_localizer/doc_image/example_of_result.png b/localization/nerf_based_localizer/doc_image/example_of_result.png
new file mode 100644
index 0000000000000..0a29719e937d1
Binary files /dev/null and b/localization/nerf_based_localizer/doc_image/example_of_result.png differ
diff --git a/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg
new file mode 100644
index 0000000000000..e10d00335b71d
--- /dev/null
+++ b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg
@@ -0,0 +1,63 @@
+
diff --git a/localization/nerf_based_localizer/external/.gitignore b/localization/nerf_based_localizer/external/.gitignore
new file mode 100644
index 0000000000000..d6b7ef32c8478
--- /dev/null
+++ b/localization/nerf_based_localizer/external/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml
new file mode 100644
index 0000000000000..77551a5bbe7b0
--- /dev/null
+++ b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/localization/nerf_based_localizer/package.xml b/localization/nerf_based_localizer/package.xml
new file mode 100644
index 0000000000000..bf68bd690df8b
--- /dev/null
+++ b/localization/nerf_based_localizer/package.xml
@@ -0,0 +1,40 @@
+
+
+
+ nerf_based_localizer
+ 0.1.0
+ The nerf_based_localizer package
+ Shintaro Sakoda
+ Apache License 2.0
+ Shintaro Sakoda
+
+ ament_cmake_auto
+ autoware_cmake
+
+ autoware_map_msgs
+ autoware_universe_utils
+ diagnostic_msgs
+ fmt
+ geometry_msgs
+ libpcl-all-dev
+ nav_msgs
+ rclcpp
+ sensor_msgs
+ std_msgs
+ std_srvs
+ tf2
+ tf2_eigen
+ tf2_geometry_msgs
+ tf2_ros
+ tf2_sensor_msgs
+ tier4_debug_msgs
+ tier4_localization_msgs
+ visualization_msgs
+
+ ament_cmake_cppcheck
+ ament_lint_auto
+
+
+ ament_cmake
+
+
diff --git a/localization/nerf_based_localizer/src/main.cpp b/localization/nerf_based_localizer/src/main.cpp
new file mode 100644
index 0000000000000..ca9b12a47c0e5
--- /dev/null
+++ b/localization/nerf_based_localizer/src/main.cpp
@@ -0,0 +1,24 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "nerf_based_localizer.hpp"
+
+#include
+
+int main(int argc, char * argv[])
+{
+ rclcpp::init(argc, argv);
+ std::shared_ptr node = std::make_shared();
+ rclcpp::spin(node);
+ rclcpp::shutdown();
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp
new file mode 100644
index 0000000000000..a856542b6be3d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp
@@ -0,0 +1,38 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cpp
+//
+// Created by ppwang on 2022/10/5.
+//
+
+#include "CustomOps.hpp"
+
+namespace torch::autograd
+{
+
+variable_list TruncExp::forward(AutogradContext * ctx, Tensor input)
+{
+ ctx->save_for_backward({input});
+ return {torch::exp(input)};
+}
+
+variable_list TruncExp::backward(AutogradContext * ctx, variable_list grad_output)
+{
+ Tensor x = ctx->get_saved_variables()[0];
+ return {grad_output[0] * torch::exp(x.clamp(-100.f, 5.f))};
+}
+
+} // namespace torch::autograd
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu
new file mode 100644
index 0000000000000..87e6b246b0686
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu
@@ -0,0 +1,135 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cu
+//
+// Created by ppwang on 2023/3/17.
+//
+
+#include "../common.hpp"
+#include "../common_cuda.hpp"
+#include "CustomOps.hpp"
+
+#define SCALE (16.f)
+
+using Tensor = torch::Tensor;
+
+__global__ void WeightVarLossForwardKernel(
+ int n_outs, float * weights, int * idx_start_end, float * out_vars)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ if (idx_start >= idx_end) {
+ out_vars[idx] = 0.f;
+ return;
+ }
+ float mean = 0.f;
+ float weight_sum = 1e-6f;
+ float len = SCALE;
+ for (int i = 0; i + idx_start < idx_end; i++) {
+ mean += weights[i + idx_start] * (float(i) / len);
+ weight_sum += weights[i + idx_start];
+ }
+ mean /= weight_sum;
+ float variance = 0.f;
+ for (int i = 0; i + idx_start < idx_end; i++) {
+ float bias = float(i) / len - mean;
+ variance += weights[i + idx_start] * bias * bias;
+ }
+ out_vars[idx] = variance;
+}
+
+__global__ void WeightVarLossBackwardKernel(
+ int n_outs, float * weights, int * idx_start_end, float * dl_dvars, float * dl_dw)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ if (idx_start >= idx_end) {
+ return;
+ }
+ float mean = 0.f;
+ float weight_sum = 1e-6f;
+ float len = SCALE;
+ for (int i = 0; i + idx_start < idx_end; i++) {
+ mean += weights[i + idx_start] * (float(i) / len);
+ weight_sum += weights[i + idx_start];
+ }
+ mean /= weight_sum;
+ float variance = 0.f;
+ float tmp = 0.f;
+ for (int i = 0; i + idx_start < idx_end; i++) {
+ float bias = float(i) / len - mean;
+ variance += weights[i + idx_start] * bias * bias;
+ tmp += weights[i + idx_start] * 2.f * bias;
+ }
+ for (int i = 0; i + idx_start < idx_end; i++) {
+ float bias = float(i) / len - mean;
+ float grad = (bias * bias + tmp * -(float(i) / len) / weight_sum);
+ dl_dw[i + idx_start] = dl_dvars[idx] * grad;
+ }
+}
+
+namespace torch::autograd
+{
+
+class WeightVarLoss : public Function
+{
+public:
+ static variable_list forward(AutogradContext * ctx, Tensor weights, Tensor idx_start_end)
+ {
+ CHECK(weights.is_contiguous());
+ CHECK(idx_start_end.is_contiguous());
+ int n_outs = idx_start_end.size(0);
+ Tensor out_vars = torch::empty({n_outs}, CUDAFloat);
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+ WeightVarLossForwardKernel<<>>(
+ n_outs, weights.data_ptr(), idx_start_end.data_ptr(), out_vars.data_ptr());
+ ctx->save_for_backward({weights, idx_start_end});
+ return {out_vars};
+ }
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+ {
+ Tensor dl_dvar = grad_output[0].contiguous();
+ auto saved_tensors = ctx->get_saved_variables();
+ Tensor & weights = saved_tensors[0];
+ Tensor & idx_start_end = saved_tensors[1];
+
+ int n_outs = idx_start_end.size(0);
+ int n_all = weights.size(0);
+
+ Tensor dl_dw = torch::empty({n_all}, CUDAFloat);
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ WeightVarLossBackwardKernel<<>>(
+ n_outs, weights.data_ptr(), idx_start_end.data_ptr(), dl_dvar.data_ptr(),
+ dl_dw.data_ptr());
+
+ return {dl_dw, Tensor()};
+ }
+};
+
+} // namespace torch::autograd
+
+Tensor CustomOps::WeightVar(Tensor weights, Tensor idx_start_end)
+{
+ return torch::autograd::WeightVarLoss::apply(weights.contiguous(), idx_start_end.contiguous())[0];
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp
new file mode 100644
index 0000000000000..e62aa9fd84bba
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp
@@ -0,0 +1,46 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.h
+//
+// Created by ppwang on 2022/10/5.
+//
+
+#ifndef NERF__CUSTOMOPS__CUSTOMOPS_HPP_
+#define NERF__CUSTOMOPS__CUSTOMOPS_HPP_
+
+#include
+
+namespace torch::autograd
+{
+
+class TruncExp : public Function
+{
+public:
+ static variable_list forward(AutogradContext * ctx, Tensor input);
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output);
+};
+
+} // namespace torch::autograd
+
+namespace CustomOps
+{
+
+torch::Tensor WeightVar(torch::Tensor weights, torch::Tensor idx_start_end);
+
+}
+
+#endif // NERF__CUSTOMOPS__CUSTOMOPS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp
new file mode 100644
index 0000000000000..8eaf748bc13ea
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp
@@ -0,0 +1,21 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cpp
+//
+// Created by ppwang on 2023/2/11.
+//
+
+#include "FlexOps.hpp"
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu
new file mode 100644
index 0000000000000..2aee27d4b74aa
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu
@@ -0,0 +1,245 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cu
+
+#include "../common_cuda.hpp"
+#include "FlexOps.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void FlexSumForwardKernel(int n_outs, float * val, int * idx_start_end, float * sum)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ float out_val = 0.f;
+ for (int i = idx_start; i < idx_end; i++) {
+ out_val += val[i];
+ }
+ sum[idx] = out_val;
+}
+
+__global__ void FlexSumBackwardKernel(
+ int n_outs, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ float fill_val = dl_dsum[idx];
+ for (int i = idx_start; i < idx_end; i++) {
+ dl_dval[i] = fill_val;
+ }
+}
+
+__global__ void FlexSumVecForwardKernel(
+ int n_outs, int vec_size, float * val, int * idx_start_end, float * sum)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ for (int j = 0; j < vec_size; j++) {
+ float out_val = 0.f;
+ for (int i = idx_start; i < idx_end; i++) {
+ out_val += val[i * vec_size + j];
+ }
+ sum[idx * vec_size + j] = out_val;
+ }
+}
+
+__global__ void FlexSumVecBackwardKernel(
+ int n_outs, int vec_size, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ for (int j = 0; j < vec_size; j++) {
+ float fill_val = dl_dsum[idx * vec_size + j];
+ for (int i = idx_start; i < idx_end; i++) {
+ dl_dval[i * vec_size + j] = fill_val;
+ }
+ }
+}
+
+__global__ void FlexAccumulateSumForwardKernel(
+ int n_outs, bool include_this, float * val, int * idx_start_end, float * sum)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ float out_val = 0.f;
+ if (include_this) {
+ for (int i = idx_start; i < idx_end; i++) {
+ out_val += val[i];
+ sum[i] = out_val;
+ }
+ } else {
+ for (int i = idx_start; i < idx_end; i++) {
+ sum[i] = out_val;
+ out_val += val[i];
+ }
+ }
+}
+
+__global__ void FlexAccumulateSumBackwardKernel(
+ int n_outs, bool include_this, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= n_outs) return;
+ int idx_start = idx_start_end[idx * 2];
+ int idx_end = idx_start_end[idx * 2 + 1];
+ float wp = 0.f;
+ if (include_this) {
+ for (int i = idx_end - 1; i >= idx_start; i--) {
+ wp += dl_dsum[i];
+ dl_dval[i] = wp;
+ }
+ } else {
+ for (int i = idx_end - 1; i >= idx_start; i--) {
+ dl_dval[i] = wp;
+ wp += dl_dsum[i];
+ }
+ }
+}
+
+namespace torch::autograd
+{
+
+class FlexSum : public Function
+{
+public:
+ static variable_list forward(AutogradContext * ctx, Tensor val, Tensor idx_start_end)
+ {
+ CHECK(val.is_contiguous());
+ CHECK(idx_start_end.is_contiguous());
+ int n_outs = idx_start_end.size(0);
+ Tensor sum;
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ if (val.sizes().size() == 1) {
+ sum = torch::empty({n_outs}, CUDAFloat);
+ FlexSumForwardKernel<<>>(
+ n_outs, val.data_ptr(), idx_start_end.data_ptr(), sum.data_ptr());
+ } else {
+ int vec_size = val.size(1);
+ sum = torch::empty({n_outs, vec_size}, CUDAFloat);
+ FlexSumVecForwardKernel<<>>(
+ n_outs, vec_size, val.data_ptr(), idx_start_end.data_ptr(),
+ sum.data_ptr());
+ }
+ ctx->save_for_backward({val, idx_start_end});
+ return {sum};
+ }
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+ {
+ Tensor dl_dsum = grad_output[0].contiguous();
+ auto saved_tensors = ctx->get_saved_variables();
+ Tensor & val = saved_tensors[0];
+ Tensor & idx_start_end = saved_tensors[1];
+ int n_outs = idx_start_end.size(0);
+ int n_all = val.size(0);
+
+ Tensor dl_dval;
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ if (val.sizes().size() == 1) {
+ dl_dval = torch::empty({n_all}, CUDAFloat);
+ FlexSumBackwardKernel<<>>(
+ n_outs, dl_dsum.data_ptr(), idx_start_end.data_ptr(),
+ dl_dval.data_ptr());
+
+ } else {
+ int vec_size = val.size(1);
+ dl_dval = torch::empty({n_all, vec_size}, CUDAFloat);
+ FlexSumVecBackwardKernel<<>>(
+ n_outs, vec_size, dl_dsum.data_ptr(), idx_start_end.data_ptr(),
+ dl_dval.data_ptr());
+ }
+ return {dl_dval, Tensor()};
+ }
+};
+
+class FlexAccumulateSum : public Function
+{
+public:
+ static variable_list forward(
+ AutogradContext * ctx, Tensor val, Tensor idx_start_end, torch::IValue include_this_ivalue)
+ {
+ CHECK(val.is_contiguous());
+ CHECK(idx_start_end.is_contiguous());
+ bool include_this = include_this_ivalue.toBool();
+ int n_all = val.size(0);
+ int n_outs = idx_start_end.size(0);
+ Tensor sum = torch::empty({n_all}, CUDAFloat);
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ FlexAccumulateSumForwardKernel<<>>(
+ n_outs, include_this, val.data_ptr(), idx_start_end.data_ptr(),
+ sum.data_ptr());
+
+ ctx->save_for_backward({val, idx_start_end});
+ ctx->saved_data["include_this"] = include_this_ivalue;
+ return {sum};
+ }
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+ {
+ Tensor dl_dsum = grad_output[0].contiguous();
+
+ auto saved_tensors = ctx->get_saved_variables();
+ bool include_this = ctx->saved_data["include_this"].toBool();
+ Tensor & val = saved_tensors[0];
+ Tensor & idx_start_end = saved_tensors[1];
+ int n_outs = idx_start_end.size(0);
+ int n_all = val.size(0);
+
+ Tensor dl_dval = torch::empty({n_all}, CUDAFloat);
+ dim3 grid_dim = LIN_GRID_DIM(n_outs);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ FlexAccumulateSumBackwardKernel<<>>(
+ n_outs, include_this, dl_dsum.data_ptr(), idx_start_end.data_ptr(),
+ dl_dval.data_ptr());
+
+ return {dl_dval, Tensor(), Tensor()};
+ }
+};
+
+} // namespace torch::autograd
+
+namespace FlexOps
+{
+
+Tensor Sum(Tensor val, Tensor idx_start_end)
+{
+ return torch::autograd::FlexSum::apply(val.contiguous(), idx_start_end.contiguous())[0];
+}
+
+Tensor AccumulateSum(Tensor val, Tensor idx_start_end, bool include_this)
+{
+ return torch::autograd::FlexAccumulateSum::apply(
+ val.contiguous(), idx_start_end.contiguous(), torch::IValue(include_this))[0];
+}
+
+} // namespace FlexOps
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp
new file mode 100644
index 0000000000000..d8171d06d7798
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp
@@ -0,0 +1,36 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.h
+//
+// Created by ppwang on 2023/2/11.
+//
+
+#ifndef NERF__CUSTOMOPS__FLEXOPS_HPP_
+#define NERF__CUSTOMOPS__FLEXOPS_HPP_
+
+#include "../common.hpp"
+
+#include
+
+namespace FlexOps
+{
+
+torch::Tensor Sum(torch::Tensor val, torch::Tensor idx_start_end);
+torch::Tensor AccumulateSum(torch::Tensor val, torch::Tensor idx_start_end, bool include_this);
+
+} // namespace FlexOps
+
+#endif // NERF__CUSTOMOPS__FLEXOPS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp
new file mode 100644
index 0000000000000..5af970d6b98f9
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp
@@ -0,0 +1,21 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cpp
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#include "Scatter.hpp"
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu
new file mode 100644
index 0000000000000..05eba608c858d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu
@@ -0,0 +1,149 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cu
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#include "../common_cuda.hpp"
+#include "Scatter.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void ScatterAddFuncForward(
+ int n_all, int n_channels, float * emb, int * scatter_idx, float * to_add)
+{
+ int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int c_idx = blockIdx.y;
+ if (pts_idx >= n_all || c_idx >= n_channels) return;
+ int emb_idx = scatter_idx[pts_idx];
+ to_add = to_add + pts_idx * n_channels + c_idx;
+ emb = emb + emb_idx * n_channels + c_idx;
+ to_add[0] += emb[0];
+}
+
+__global__ void ScatterAddFuncBackwardBlock(
+ int n_emb, int n_blocks, int n_all, int block_size, int n_channels, float * dl_demb_pool,
+ int * scatter_idx, float * dl_dsum)
+{
+ int block_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int emb_idx = blockIdx.y;
+ if (emb_idx >= n_emb || block_idx >= n_blocks || block_idx * block_size >= n_all) return;
+ scatter_idx = scatter_idx + block_idx * block_size;
+ dl_dsum = dl_dsum + (block_idx * block_size) * n_channels;
+ dl_demb_pool = dl_demb_pool + emb_idx * n_blocks * n_channels + block_idx * n_channels;
+ int block_min = block_size;
+ if (block_idx * block_size + block_size > n_all) {
+ block_min = n_all - (block_idx * block_size);
+ }
+ for (int i = 0; i < block_min; i++) {
+ if (scatter_idx[i] == emb_idx) {
+ for (int c = 0; c < n_channels; c++) {
+ dl_demb_pool[c] += dl_dsum[i * n_channels + c];
+ }
+ }
+ }
+}
+
+namespace torch::autograd
+{
+
+class ScatterAddFunc : public Function
+{
+public:
+ static variable_list forward(AutogradContext * ctx, Tensor emb, Tensor idx, Tensor to_add)
+ {
+ emb = emb.contiguous();
+ idx = idx.contiguous();
+ int n_all = idx.size(0);
+ int n_channels = emb.size(1);
+ CHECK(n_all == to_add.size(0));
+ CHECK(n_channels == to_add.size(1));
+
+ const unsigned thread_cap = 512;
+ dim3 grid_dim = {unsigned(n_all + thread_cap - 1) / thread_cap, unsigned(n_channels), 1};
+ dim3 block_dim = {unsigned(thread_cap), 1, 1};
+
+ Tensor sum = to_add.clone().contiguous();
+ ScatterAddFuncForward<<>>(
+ n_all, n_channels, emb.data_ptr(), idx.data_ptr(), sum.data_ptr());
+ ctx->save_for_backward({emb, idx});
+ return {sum};
+ }
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+ {
+ Tensor dl_dsum = grad_output[0].contiguous();
+ auto saved_tensors = ctx->get_saved_variables();
+ Tensor & emb = saved_tensors[0];
+ Tensor & idx = saved_tensors[1];
+
+ int n_all = idx.size(0);
+ int n_channels = dl_dsum.size(1);
+
+ int n_emb = emb.size(0);
+ int block_size = (int(std::sqrt(n_all + 1024)) >> 5) << 5;
+ int n_blocks = (n_all + block_size - 1) / block_size;
+
+ const unsigned thread_cap = 512;
+ dim3 grid_dim = {unsigned(n_blocks + thread_cap - 1) / thread_cap, unsigned(n_emb), 1};
+ dim3 block_dim = {unsigned(thread_cap), 1, 1};
+
+ Tensor dl_demb_pool = torch::zeros({n_emb, n_blocks, n_channels}, CUDAFloat);
+ ScatterAddFuncBackwardBlock<<>>(
+ n_emb, n_blocks, n_all, block_size, n_channels, dl_demb_pool.data_ptr(),
+ idx.data_ptr(), dl_dsum.data_ptr());
+
+ Tensor dl_demb = torch::sum(dl_demb_pool, 1, false);
+ Tensor dl_dto_add = dl_dsum.clone();
+
+ return {dl_demb, Tensor(), dl_dto_add};
+ }
+};
+
+} // namespace torch::autograd
+
+Tensor CustomOps::ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add)
+{
+ return torch::autograd::ScatterAddFunc::apply(emb, idx, to_add)[0];
+}
+
+__global__ void ScatterIdxKernal(int n_rays, int * idx_start_end, int * emb_idx, int * all_emb_idx)
+{
+ int ray_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (ray_idx >= n_rays) return;
+ int idx_start = idx_start_end[ray_idx * 2];
+ int idx_end = idx_start_end[ray_idx * 2 + 1];
+
+ int fill = emb_idx[ray_idx];
+ for (int i = idx_start; i < idx_end; i++) {
+ all_emb_idx[i] = fill;
+ }
+}
+
+Tensor CustomOps::ScatterIdx(int n_all_pts, Tensor idx_start_end, Tensor emb_idx)
+{
+ Tensor ret =
+ torch::empty({n_all_pts}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA));
+ int n_rays = idx_start_end.size(0);
+ dim3 grid_dim = LIN_GRID_DIM(n_rays);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ ScatterIdxKernal<<>>(
+ n_rays, idx_start_end.data_ptr(), emb_idx.data_ptr(), ret.data_ptr());
+
+ return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp
new file mode 100644
index 0000000000000..fafc79b616db9
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.h
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#ifndef NERF__CUSTOMOPS__SCATTER_HPP_
+#define NERF__CUSTOMOPS__SCATTER_HPP_
+
+#include "../common.hpp"
+
+#include
+
+class Scatter
+{
+};
+
+namespace CustomOps
+{
+
+torch::Tensor ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add);
+torch::Tensor ScatterIdx(int n_all_pts, torch::Tensor idx_start_end, torch::Tensor emb_idx);
+} // namespace CustomOps
+
+#endif // NERF__CUSTOMOPS__SCATTER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/common.hpp b/localization/nerf_based_localizer/src/nerf/common.hpp
new file mode 100644
index 0000000000000..107be3081dac5
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/common.hpp
@@ -0,0 +1,29 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h
+//
+// Created by ppwang on 2022/5/8.
+//
+
+#ifndef NERF__COMMON_HPP_
+#define NERF__COMMON_HPP_
+
+#include
+
+using Slc = torch::indexing::Slice;
+const auto CUDAFloat = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA);
+
+#endif // NERF__COMMON_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/common_cuda.hpp b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp
new file mode 100644
index 0000000000000..c80d4c458c39e
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h
+//
+// Created by ppwang on 2022/5/8.
+//
+
+#ifndef NERF__COMMON_CUDA_HPP_
+#define NERF__COMMON_CUDA_HPP_
+
+#include
+
+inline unsigned int DivUp(const unsigned int x, const unsigned int y)
+{
+ return (x + y - 1) / y;
+}
+
+constexpr unsigned int THREAD_CAP = 512;
+constexpr dim3 LIN_BLOCK_DIM = {THREAD_CAP, 1, 1};
+
+inline dim3 LIN_GRID_DIM(const int x)
+{
+ return dim3{DivUp(x, THREAD_CAP), 1, 1};
+}
+
+#endif // NERF__COMMON_CUDA_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/dataset.cpp b/localization/nerf_based_localizer/src/nerf/dataset.cpp
new file mode 100644
index 0000000000000..267426ec3d6c1
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/dataset.cpp
@@ -0,0 +1,195 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.cpp
+//
+// Created by ppwang on 2022/5/7.
+//
+#include "dataset.hpp"
+
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+#include
+
+#include
+#include
+
+#include
+#include
+
+using Tensor = torch::Tensor;
+
+namespace fs = std::experimental::filesystem::v1;
+
+Dataset::Dataset(const std::string & data_path)
+{
+ ScopeWatch dataset_watch("Dataset::Dataset");
+
+ std::cout << "data_path = " << data_path << std::endl;
+
+ // Load camera pose
+ CHECK(fs::exists(data_path + "/cams_meta.tsv"));
+ {
+ std::ifstream ifs(data_path + "/cams_meta.tsv");
+ std::string line;
+ std::getline(ifs, line); // header
+ std::vector poses_vec, intrinsics_vec, dist_params_vec;
+ while (std::getline(ifs, line)) {
+ std::istringstream iss(line);
+ std::vector tokens;
+ std::string token;
+ while (std::getline(iss, token, '\t')) {
+ tokens.push_back(token);
+ }
+ const int POSE_NUM = 12; //(3, 4)
+ const int INTRINSIC_NUM = 9; //(3, 3)
+ const int DISTORTION_NUM = 4; //(k1, k2, p1, p2)
+ CHECK(tokens.size() == POSE_NUM + INTRINSIC_NUM + DISTORTION_NUM);
+ Tensor pose = torch::zeros({3, 4}, torch::kFloat32);
+ for (int i = 0; i < POSE_NUM; i++) {
+ pose.index_put_({i / 4, i % 4}, std::stof(tokens[i]));
+ }
+ pose = pose.reshape({3, 4});
+ poses_vec.push_back(pose);
+
+ Tensor intrinsic = torch::zeros({3, 3}, torch::kFloat32);
+ for (int i = 0; i < INTRINSIC_NUM; i++) {
+ intrinsic.index_put_({i / 3, i % 3}, std::stof(tokens[POSE_NUM + i]));
+ }
+ intrinsic = intrinsic.reshape({3, 3});
+ intrinsics_vec.push_back(intrinsic);
+
+ Tensor dist_param = torch::zeros({4}, torch::kFloat32);
+ for (int i = 0; i < DISTORTION_NUM; i++) {
+ dist_param.index_put_({i}, std::stof(tokens[POSE_NUM + INTRINSIC_NUM + i]));
+ }
+ dist_params_vec.push_back(dist_param);
+ }
+
+ n_images = poses_vec.size();
+ poses = torch::stack(poses_vec, 0).contiguous().to(torch::kCUDA);
+ intrinsics = torch::stack(intrinsics_vec, 0).contiguous().to(torch::kCUDA);
+ dist_params = torch::stack(dist_params_vec, 0).contiguous().to(torch::kCUDA);
+ }
+
+ // Normalize scene
+ {
+ Tensor cam_pos = poses.index({Slc(), Slc(0, 3), 3}).clone();
+ center = cam_pos.mean(0, false);
+ Tensor bias = cam_pos - center.unsqueeze(0);
+ radius = torch::linalg_norm(bias, 2, -1, false).max().item();
+ cam_pos = (cam_pos - center.unsqueeze(0)) / radius;
+ poses.index_put_({Slc(), Slc(0, 3), 3}, cam_pos);
+ poses = poses.contiguous();
+ }
+
+ std::vector images_vec;
+ // Load images
+ {
+ ScopeWatch watch("LoadImages");
+ const std::vector image_paths = glob_image_paths(data_path + "/images/");
+ for (int i = 0; i < n_images; i++) {
+ const std::string image_path = image_paths[i];
+ images_vec.push_back(utils::read_image_tensor(image_path).to(torch::kCPU));
+ }
+ }
+
+ std::cout << "Number of images: " << n_images << std::endl;
+
+ height = images_vec[0].size(0);
+ width = images_vec[0].size(1);
+ images = torch::stack(images_vec, 0).contiguous();
+}
+
+void Dataset::save_inference_params(const std::string & train_result_dir) const
+{
+ std::ofstream ofs(train_result_dir + "/inference_params.yaml");
+ ofs << std::fixed;
+ ofs << "%YAML 1.2" << std::endl;
+ ofs << "---" << std::endl;
+ ofs << "n_images: " << n_images << std::endl;
+ ofs << "height: " << height << std::endl;
+ ofs << "width: " << width << std::endl;
+
+ ofs << "intrinsic: [";
+ ofs << intrinsics[0][0][0].item() << ", ";
+ ofs << intrinsics[0][0][1].item() << ", ";
+ ofs << intrinsics[0][0][2].item() << "," << std::endl;
+ ofs << " ";
+ ofs << intrinsics[0][1][0].item() << ", ";
+ ofs << intrinsics[0][1][1].item() << ", ";
+ ofs << intrinsics[0][1][2].item() << "," << std::endl;
+ ofs << " ";
+ ofs << intrinsics[0][2][0].item() << ", ";
+ ofs << intrinsics[0][2][1].item() << ", ";
+ ofs << intrinsics[0][2][2].item() << "]" << std::endl;
+
+ ofs << "normalizing_center: [" << center[0].item();
+ ofs << ", " << center[1].item();
+ ofs << ", " << center[2].item() << "]" << std::endl;
+ ofs << "normalizing_radius: " << radius << std::endl;
+}
+
+Rays Dataset::get_all_rays_of_camera(int idx)
+{
+ int H = height;
+ int W = width;
+ Tensor ii = torch::linspace(0.f, H - 1.f, H, CUDAFloat);
+ Tensor jj = torch::linspace(0.f, W - 1.f, W, CUDAFloat);
+ auto ij = torch::meshgrid({ii, jj}, "ij");
+ Tensor i = ij[0].reshape({-1});
+ Tensor j = ij[1].reshape({-1});
+
+ auto [rays_o, rays_d] = get_rays_from_pose(
+ poses[idx].unsqueeze(0), intrinsics[idx].unsqueeze(0), torch::stack({i, j}, -1));
+ return {rays_o, rays_d};
+}
+
+std::tuple Dataset::sample_random_rays(int batch_size)
+{
+ const auto CPULong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
+ Tensor cam_indices = torch::randint(n_images, {batch_size}, CPULong);
+ Tensor i = torch::randint(0, height, batch_size, CPULong);
+ Tensor j = torch::randint(0, width, batch_size, CPULong);
+ Tensor ij = torch::stack({i, j}, -1).to(torch::kCUDA).contiguous();
+
+ Tensor gt_colors = images.view({-1, 3})
+ .index({(cam_indices * height * width + i * width + j).to(torch::kLong)})
+ .to(torch::kCUDA)
+ .contiguous();
+ cam_indices = cam_indices.to(torch::kCUDA);
+ cam_indices = cam_indices.to(torch::kInt32);
+ ij = ij.to(torch::kInt32);
+
+ Tensor selected_poses = torch::index_select(poses, 0, cam_indices);
+ Tensor selected_intrinsics = torch::index_select(intrinsics, 0, cam_indices);
+ auto [rays_o, rays_d] = get_rays_from_pose(selected_poses, selected_intrinsics, ij);
+
+ return {{rays_o, rays_d}, gt_colors, cam_indices.to(torch::kInt32).contiguous()};
+}
+
+std::vector Dataset::glob_image_paths(const std::string & input_dir)
+{
+ glob_t buffer;
+ std::vector files;
+ glob((input_dir + "*.png").c_str(), 0, NULL, &buffer);
+ for (size_t i = 0; i < buffer.gl_pathc; i++) {
+ files.push_back(buffer.gl_pathv[i]);
+ }
+ globfree(&buffer);
+ std::sort(files.begin(), files.end());
+ return files;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/dataset.hpp b/localization/nerf_based_localizer/src/nerf/dataset.hpp
new file mode 100644
index 0000000000000..7842a82befe4a
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/dataset.hpp
@@ -0,0 +1,55 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.h
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#ifndef NERF__DATASET_HPP_
+#define NERF__DATASET_HPP_
+
+#include "common.hpp"
+#include "rays.hpp"
+
+#include
+
+#include
+#include
+#include
+
+struct Dataset
+{
+ using Tensor = torch::Tensor;
+
+public:
+ Dataset(const std::string & data_path);
+
+ void save_inference_params(const std::string & train_result_dir) const;
+
+ Rays get_all_rays_of_camera(int idx);
+
+ std::tuple sample_random_rays(int batch_size);
+
+ static std::vector glob_image_paths(const std::string & input_dir);
+
+ int n_images;
+ Tensor poses, images, intrinsics, dist_params;
+ Tensor center;
+ float radius;
+ int height, width;
+};
+
+#endif // NERF__DATASET_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp
new file mode 100644
index 0000000000000..64ed1a69cc647
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp
@@ -0,0 +1,130 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cpp
+//
+// Created by ppwang on 2022/7/17.
+//
+
+#include "hash_3d_anchored.hpp"
+
+#include "common.hpp"
+#include "stop_watch.hpp"
+
+#include
+
+using Tensor = torch::Tensor;
+
+TORCH_LIBRARY(dec_hash3d_anchored, m)
+{
+ m.class_("Hash3DAnchoredInfo").def(torch::init());
+}
+
+Hash3DAnchored::Hash3DAnchored()
+{
+ pool_size_ = (1 << 19) * N_LEVELS;
+
+ // Feat pool
+ feat_pool_ = (torch::rand({pool_size_, N_CHANNELS}, CUDAFloat) * .2f - 1.f) * 1e-4f;
+ feat_pool_.requires_grad_(true);
+ CHECK(feat_pool_.is_contiguous());
+
+ // Get prime numbers
+ auto is_prim = [](int x) {
+ for (int i = 2; i * i <= x; i++) {
+ if (x % i == 0) return false;
+ }
+ return true;
+ };
+
+ std::vector prim_selected;
+ int min_local_prim = 1 << 28;
+ int max_local_prim = 1 << 30;
+
+ const auto CPUInt = torch::TensorOptions().dtype(torch::kInt).device(torch::kCPU);
+
+ for (int i = 0; i < 3 * N_LEVELS; i++) {
+ int val;
+ do {
+ val = torch::randint(min_local_prim, max_local_prim, {1}, CPUInt).item();
+ } while (!is_prim(val));
+ prim_selected.push_back(val);
+ }
+
+ CHECK(prim_selected.size() == 3 * N_LEVELS);
+
+ prim_pool_ = torch::from_blob(prim_selected.data(), 3 * N_LEVELS, CPUInt).to(torch::kCUDA);
+ prim_pool_ = prim_pool_.reshape({N_LEVELS, 3}).contiguous();
+
+ bias_pool_ = (torch::rand({N_LEVELS, 3}, CUDAFloat) * 1000.f + 100.f).contiguous();
+
+ local_size_ = pool_size_ / N_LEVELS;
+ local_size_ = (local_size_ >> 4) << 4;
+
+ // MLP
+ const int mlp_out_dim = 16;
+ mlp_ = torch::nn::Linear(N_LEVELS * N_CHANNELS, mlp_out_dim);
+
+ register_parameter("feat_pool", feat_pool_);
+ register_parameter("prim_pool", prim_pool_, false);
+ register_parameter("bias_pool", bias_pool_);
+ register_module("mlp", mlp_);
+}
+
+Tensor Hash3DAnchored::query(const Tensor & points)
+{
+#ifdef PROFILE
+ ScopeWatch watch(__func__);
+#endif
+
+ auto info = torch::make_intrusive();
+ info->hash3d_ = this;
+
+ const float radius = 1.0f;
+ Tensor norm = points.norm(2, {1}, true);
+ Tensor mask = (norm <= radius);
+ Tensor x = points * mask + ~mask * (1 + radius - radius / norm) * points / norm;
+
+ Tensor feat =
+ torch::autograd::Hash3DAnchoredFunction::apply(x, feat_pool_, torch::IValue(info))[0];
+ Tensor output = mlp_->forward(feat);
+ return output;
+}
+
+std::vector Hash3DAnchored::optim_param_groups(float lr)
+{
+ std::vector ret;
+
+ {
+ auto opt = std::make_unique(lr);
+ opt->betas() = {0.9, 0.99};
+ opt->eps() = 1e-15;
+
+ std::vector params = {feat_pool_};
+ ret.emplace_back(std::move(params), std::move(opt));
+ }
+
+ {
+ auto opt = std::make_unique(lr);
+ opt->betas() = {0.9, 0.99};
+ opt->eps() = 1e-15;
+ opt->weight_decay() = 1e-6;
+
+ std::vector params = mlp_->parameters();
+ ret.emplace_back(std::move(params), std::move(opt));
+ }
+
+ return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu
new file mode 100644
index 0000000000000..00ef8c5619416
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu
@@ -0,0 +1,231 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cu
+//
+// Created by ppwang on 2022/7/17.
+//
+#include "common.hpp"
+#include "common_cuda.hpp"
+#include "hash_3d_anchored.hpp"
+
+#include
+
+#include
+
+using Tensor = torch::Tensor;
+
+constexpr float RES_FINE_POW_2 = 10.f;
+constexpr float RES_BASE_POW_2 = 3.f;
+
+const auto CUDAFlex = torch::TensorOptions().dtype(torch::kFloat16).device(torch::kCUDA);
+using FlexType = __half;
+
+__device__ inline void calculate_pos_and_w(
+ const Eigen::Vector3f & pt, const int local_size, const int * const prim_pool,
+ const int level_idx, unsigned pos[8], float w[8])
+{
+ const int offset = level_idx * 3;
+ const unsigned pa = prim_pool[offset + 0];
+ const unsigned pb = prim_pool[offset + 1];
+ const unsigned pc = prim_pool[offset + 2];
+
+ const unsigned pos_x = static_cast(floorf(pt[0]));
+ const unsigned pos_y = static_cast(floorf(pt[1]));
+ const unsigned pos_z = static_cast(floorf(pt[2]));
+ pos[0] = ((pos_x * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size;
+ pos[1] = ((pos_x * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+ pos[2] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size;
+ pos[3] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+ pos[4] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size;
+ pos[5] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+ pos[6] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size;
+ pos[7] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+
+ const float a = pt[0] - floorf(pt[0]);
+ const float b = pt[1] - floorf(pt[1]);
+ const float c = pt[2] - floorf(pt[2]);
+
+ w[0] = (1.f - a) * (1.f - b) * (1.f - c);
+ w[1] = (1.f - a) * (1.f - b) * c;
+ w[2] = (1.f - a) * b * (1.f - c);
+ w[3] = (1.f - a) * b * c;
+ w[4] = a * (1.f - b) * (1.f - c);
+ w[5] = a * (1.f - b) * c;
+ w[6] = a * b * (1.f - c);
+ w[7] = a * b * c;
+}
+
+template
+__global__ void Hash3DAnchoredForwardKernel(
+ int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool,
+ Eigen::Vector3f * points_ptr, T * out_feat)
+{
+ const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ const int level_idx = blockIdx.y;
+ if (pts_idx >= n_points) {
+ return;
+ }
+ feat_pool = feat_pool + local_size * level_idx;
+
+ const float mul = exp2f(
+ (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2);
+ const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]);
+
+ float ws[8] = {};
+ unsigned pos[8] = {};
+ calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws);
+
+ out_feat = out_feat + pts_idx * (N_LEVELS * N_CHANNELS);
+
+#pragma unroll
+ for (int k = 0; k < N_CHANNELS; k++) {
+ out_feat[level_idx * N_CHANNELS + k] = (T)(ws[0] * float(feat_pool[pos[0] * N_CHANNELS + k]) +
+ ws[1] * float(feat_pool[pos[1] * N_CHANNELS + k]) +
+ ws[2] * float(feat_pool[pos[2] * N_CHANNELS + k]) +
+ ws[3] * float(feat_pool[pos[3] * N_CHANNELS + k]) +
+ ws[4] * float(feat_pool[pos[4] * N_CHANNELS + k]) +
+ ws[5] * float(feat_pool[pos[5] * N_CHANNELS + k]) +
+ ws[6] * float(feat_pool[pos[6] * N_CHANNELS + k]) +
+ ws[7] * float(feat_pool[pos[7] * N_CHANNELS + k]));
+ }
+}
+
+template
+__global__ void Hash3DAnchoredBackwardKernel(
+ int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool,
+ Eigen::Vector3f * points_ptr,
+ T * grad_in, // [ n_points, n_levels, n_channels ]
+ T * grad_points, // [ n_points, 3 ]
+ T * grad_embeds // [ pool_size, n_channels ]
+)
+{
+ const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ const int level_idx = blockIdx.y;
+ if (pts_idx >= n_points) {
+ return;
+ }
+ feat_pool = feat_pool + local_size * level_idx;
+
+ const float mul = exp2f(
+ (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2);
+ const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]);
+
+ float ws[8] = {};
+ unsigned pos[8] = {};
+ calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws);
+
+ const float sign_x[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+ const float sign_y[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
+ const float sign_z[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
+
+ grad_in = grad_in + (N_LEVELS * N_CHANNELS) * pts_idx + level_idx * N_CHANNELS;
+
+ grad_points = grad_points + pts_idx * 3;
+ grad_embeds = grad_embeds + local_size * level_idx;
+
+#pragma unroll
+ for (int d = 0; d < 8; d++) {
+ for (int k = 0; k < N_CHANNELS; k += 2) {
+ float w0 = (float)grad_in[k];
+ float w1 = (float)grad_in[k + 1];
+ if (w0 != 0.f || w1 != 0.f) {
+ __half2 cur_w = {(__half)(float(w0) * ws[d]), (__half)(float(w1) * ws[d])};
+ atomicAdd((__half2 *)(grad_embeds + pos[d] * N_CHANNELS + k), cur_w);
+ }
+ }
+ for (int k = 0; k < N_CHANNELS; k++) {
+ const float norm = (float)(feat_pool[pos[d] * N_CHANNELS + k]) * mul * (float)grad_in[k];
+ atomicAdd(grad_points + 0, (T)(sign_x[d] * norm));
+ atomicAdd(grad_points + 1, (T)(sign_y[d] * norm));
+ atomicAdd(grad_points + 2, (T)(sign_z[d] * norm));
+ }
+ }
+}
+
+namespace torch::autograd
+{
+
+variable_list Hash3DAnchoredFunction::forward(
+ AutogradContext * ctx, Tensor points, Tensor feat_pool, IValue hash3d_info)
+{
+ auto info_ptr = hash3d_info.toCustomClass();
+ ctx->saved_data["hash3d_info"] = hash3d_info;
+ ctx->saved_data["points"] = points;
+ ctx->saved_data["feat_pool"] = feat_pool;
+ Tensor & prim_pool = info_ptr->hash3d_->prim_pool_;
+ Tensor & bias_pool = info_ptr->hash3d_->bias_pool_;
+ CHECK(points.device().is_cuda());
+
+ int n_points = points.sizes()[0];
+
+ dim3 block_dim = LIN_BLOCK_DIM;
+ dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1};
+
+ Tensor out_feat = torch::zeros({n_points, N_LEVELS * N_CHANNELS}, CUDAFlex);
+ CHECK(out_feat.is_contiguous());
+
+ Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous();
+
+ Hash3DAnchoredForwardKernel<<>>(
+ n_points, info_ptr->hash3d_->local_size_,
+ reinterpret_cast(feat_pool_true.data_ptr()), prim_pool.data_ptr(),
+ reinterpret_cast(bias_pool.data_ptr()),
+ reinterpret_cast(points.data_ptr()),
+ reinterpret_cast(out_feat.data_ptr()));
+
+ return {out_feat.to(torch::kFloat32)};
+}
+
+variable_list Hash3DAnchoredFunction::backward(AutogradContext * ctx, variable_list grad_output)
+{
+ auto info_ptr = ctx->saved_data["hash3d_info"].toCustomClass();
+ Tensor & points = ctx->saved_data["points"].toTensor(); // [ n_points, 3 ]
+ Tensor & feat_pool = ctx->saved_data["feat_pool"].toTensor();
+ Tensor & prim_pool = info_ptr->hash3d_->prim_pool_;
+ Tensor & bias_pool = info_ptr->hash3d_->bias_pool_;
+ CHECK(points.device().is_cuda());
+
+ const float grad_scale = 128.f;
+ int n_points = points.sizes()[0];
+
+ int pool_size = info_ptr->hash3d_->pool_size_;
+
+ dim3 block_dim = LIN_BLOCK_DIM;
+ dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1};
+
+ Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous();
+
+ Tensor grad_in = (grad_output[0] * grad_scale).to(torch::kFloat16).contiguous();
+
+ Tensor points_grad = torch::zeros({n_points, 3}, CUDAFlex);
+ Tensor embeds_grad = torch::zeros({pool_size, N_CHANNELS}, CUDAFlex);
+
+ Hash3DAnchoredBackwardKernel<<>>(
+ n_points, info_ptr->hash3d_->local_size_,
+ reinterpret_cast(feat_pool_true.data_ptr()), prim_pool.data_ptr(),
+ reinterpret_cast(bias_pool.data_ptr()),
+ reinterpret_cast(points.data_ptr()),
+ reinterpret_cast(grad_in.data_ptr()),
+ reinterpret_cast(points_grad.data_ptr()),
+ reinterpret_cast(embeds_grad.data_ptr()));
+
+ points_grad = points_grad.to(torch::kFloat32) / grad_scale;
+ embeds_grad = embeds_grad.to(torch::kFloat32) / grad_scale;
+
+ return {points_grad, embeds_grad, Tensor()};
+}
+
+} // namespace torch::autograd
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp
new file mode 100644
index 0000000000000..2c0cce1096b51
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp
@@ -0,0 +1,70 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.h
+//
+// Created by ppwang on 2022/7/17.
+//
+
+#ifndef NERF__HASH_3D_ANCHORED_HPP_
+#define NERF__HASH_3D_ANCHORED_HPP_
+
+#include
+
+static constexpr int64_t N_CHANNELS = 2;
+static constexpr int64_t N_LEVELS = 16;
+
+class Hash3DAnchored : public torch::nn::Module
+{
+ using Tensor = torch::Tensor;
+
+public:
+ Hash3DAnchored();
+
+ Tensor query(const Tensor & points);
+
+ std::vector optim_param_groups(float lr);
+
+ int pool_size_;
+ int local_size_;
+
+ Tensor feat_pool_; // [ pool_size_, n_channels_ ];
+ Tensor prim_pool_; // [ n_levels, 3 ];
+ Tensor bias_pool_; // [ n_levels, 3 ];
+
+ torch::nn::Linear mlp_ = nullptr;
+};
+
+class Hash3DAnchoredInfo : public torch::CustomClassHolder
+{
+public:
+ Hash3DAnchored * hash3d_ = nullptr;
+};
+
+namespace torch::autograd
+{
+
+class Hash3DAnchoredFunction : public Function
+{
+public:
+ static variable_list forward(
+ AutogradContext * ctx, Tensor points, Tensor feat_pool_, IValue hash3d_info);
+
+ static variable_list backward(AutogradContext * ctx, variable_list grad_output);
+};
+
+} // namespace torch::autograd
+
+#endif // NERF__HASH_3D_ANCHORED_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/localizer.cpp b/localization/nerf_based_localizer/src/nerf/localizer.cpp
new file mode 100644
index 0000000000000..b36f9fae819cd
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/localizer.cpp
@@ -0,0 +1,354 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "localizer.hpp"
+
+#include "dataset.hpp"
+#include "stop_watch.hpp"
+
+#include
+#include
+
+#include
+
+using Tensor = torch::Tensor;
+
+Localizer::Localizer(const LocalizerParam & param) : param_(param)
+{
+ const std::string train_result_dir = param.train_result_dir;
+
+ cv::FileStorage inference_params(
+ train_result_dir + "/inference_params.yaml", cv::FileStorage::READ);
+ if (!inference_params.isOpened()) {
+ throw std::runtime_error("Failed to open " + train_result_dir + "/inference_params.yaml");
+ }
+
+ const int n_images = (int)inference_params["n_images"];
+ const int train_height = (int)inference_params["height"];
+ const int train_width = (int)inference_params["width"];
+
+ std::vector intrinsic_vector;
+ inference_params["intrinsic"] >> intrinsic_vector;
+ intrinsic_ = torch::tensor(intrinsic_vector, torch::kFloat).view({3, 3}).to(torch::kCUDA);
+
+ std::vector normalizing_center;
+ inference_params["normalizing_center"] >> normalizing_center;
+ center_ = torch::tensor(normalizing_center, torch::kFloat).to(torch::kCUDA);
+
+ radius_ = (float)inference_params["normalizing_radius"];
+
+ renderer_ = std::make_shared(n_images, param.sample_num_per_ray);
+
+ torch::load(renderer_, train_result_dir + "/checkpoints/latest/renderer.pt");
+
+ // set
+ infer_height_ = train_height / param.resize_factor;
+ infer_width_ = train_width / param.resize_factor;
+ intrinsic_ /= param.resize_factor;
+ intrinsic_[2][2] = 1.0;
+
+ /*
+ [[+1, 0, 0, 0 ],
+ [ 0, -1, 0, 0 ],
+ [ 0, 0, -1, 0 ],
+ [ 0, 0, 0, +1 ]]
+*/
+ axis_convert_mat_ = torch::zeros({4, 4});
+ axis_convert_mat_[0][0] = +1;
+ axis_convert_mat_[1][1] = -1;
+ axis_convert_mat_[2][2] = -1;
+ axis_convert_mat_[3][3] = +1;
+ axis_convert_mat_ = axis_convert_mat_.to(torch::kCUDA);
+}
+
+std::vector Localizer::optimize_pose_by_random_search(
+ Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff)
+{
+ torch::NoGradGuard no_grad_guard;
+
+ std::mt19937_64 engine(std::random_device{}());
+
+ // Note that the order of the axes is different
+ // World coordinates (x: Front, y: Left, z: Up)
+ // NeRF coordinates (x: Right, y: Up, z: Back)
+ const float pos_noise_x_in_nerf = param_.noise_position_y * noise_coeff / radius_;
+ const float pos_noise_y_in_nerf = param_.noise_position_z * noise_coeff / radius_;
+ const float pos_noise_z_in_nerf = param_.noise_position_x * noise_coeff / radius_;
+ const float theta_x_in_nerf = param_.noise_rotation_y * noise_coeff;
+ const float theta_y_in_nerf = param_.noise_rotation_z * noise_coeff;
+ const float theta_z_in_nerf = param_.noise_rotation_x * noise_coeff;
+
+ std::normal_distribution dist_position_x(0.0f, pos_noise_x_in_nerf);
+ std::normal_distribution dist_position_y(0.0f, pos_noise_y_in_nerf);
+ std::normal_distribution dist_position_z(0.0f, pos_noise_z_in_nerf);
+ std::normal_distribution dist_rotation_x(0.0f, theta_x_in_nerf);
+ std::normal_distribution dist_rotation_y(0.0f, theta_y_in_nerf);
+ std::normal_distribution dist_rotation_z(0.0f, theta_z_in_nerf);
+
+ std::vector poses;
+ for (int64_t i = 0; i < particle_num; i++) {
+ // Sample a random translation
+ Tensor curr_pose = initial_pose.clone();
+ if (i == 0) {
+ poses.push_back(curr_pose);
+ continue;
+ }
+ curr_pose[0][3] += dist_position_x(engine);
+ curr_pose[1][3] += dist_position_y(engine);
+ curr_pose[2][3] += dist_position_z(engine);
+
+ // orientation
+ const float theta_x = dist_rotation_x(engine) * M_PI / 180.0;
+ const float theta_y = dist_rotation_y(engine) * M_PI / 180.0;
+ const float theta_z = dist_rotation_z(engine) * M_PI / 180.0;
+ Eigen::Matrix3f rotation_matrix_x(Eigen::AngleAxisf(theta_x, Eigen::Vector3f::UnitX()));
+ Eigen::Matrix3f rotation_matrix_y(Eigen::AngleAxisf(theta_y, Eigen::Vector3f::UnitY()));
+ Eigen::Matrix3f rotation_matrix_z(Eigen::AngleAxisf(theta_z, Eigen::Vector3f::UnitZ()));
+ const torch::Device dev = initial_pose.device();
+ Tensor rotation_tensor_x =
+ torch::from_blob(rotation_matrix_x.data(), {3, 3}).to(torch::kFloat32).to(dev);
+ Tensor rotation_tensor_y =
+ torch::from_blob(rotation_matrix_y.data(), {3, 3}).to(torch::kFloat32).to(dev);
+ Tensor rotation_tensor_z =
+ torch::from_blob(rotation_matrix_z.data(), {3, 3}).to(torch::kFloat32).to(dev);
+ Tensor rotated = rotation_tensor_z.mm(
+ rotation_tensor_y.mm(rotation_tensor_x.mm(curr_pose.index({Slc(0, 3), Slc(0, 3)}))));
+ curr_pose.index_put_({Slc(0, 3), Slc(0, 3)}, rotated);
+ poses.push_back(curr_pose);
+ }
+
+ const std::vector weights = evaluate_poses(poses, image_tensor);
+ const int pose_num = poses.size();
+
+ std::vector result;
+ for (int i = 0; i < pose_num; i++) {
+ result.push_back({poses[i], weights[i]});
+ }
+ return result;
+}
+
+torch::Tensor gram_schmidt(torch::Tensor A)
+{
+ A = A.clone();
+ for (int i = 0; i < A.size(0); ++i) {
+ for (int j = 0; j < i; ++j) {
+ A[i] -= torch::dot(A[j], A[i]) * A[j];
+ }
+ A[i] = A[i] / A[i].norm();
+ }
+ return A;
+}
+
+std::vector Localizer::optimize_pose_by_differential(
+ Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate)
+{
+ std::vector results;
+ initial_pose = initial_pose.requires_grad_(true);
+ image_tensor = image_tensor.view({infer_height_, infer_width_, 3});
+ torch::optim::Adam optimizer({initial_pose}, learning_rate);
+ for (int64_t i = 0; i < iteration_num; i++) {
+ Tensor pred_img = render_image(initial_pose);
+ Tensor loss = torch::nn::functional::mse_loss(pred_img, image_tensor);
+ optimizer.zero_grad();
+ // For some reason, backward may fail, so check here
+ try {
+ loss.backward();
+ } catch (const std::runtime_error & e) {
+ return results;
+ }
+ optimizer.step();
+
+ Tensor curr_result = initial_pose.clone().detach();
+ results.push_back(curr_result);
+ }
+ return results;
+}
+
+Tensor Localizer::render_image(const Tensor & pose)
+{
+ auto [image, _] =
+ renderer_->render_image(pose, intrinsic_, infer_height_, infer_width_, param_.ray_batch_size);
+ return image;
+}
+
+std::vector Localizer::evaluate_poses(
+ const std::vector & poses, const Tensor & image)
+{
+ torch::NoGradGuard no_grad_guard;
+ Timer timer;
+
+ const int pixel_num = param_.render_pixel_num;
+ const auto CUDALong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCUDA);
+
+ // Pick rays by constant interval
+ // const int step = H * W / pixel_num;
+ // std::vector i_vec, j_vec;
+ // for (int k = 0; k < pixel_num; k++) {
+ // const int v = k * step;
+ // const int64_t i = v / W;
+ // const int64_t j = v % W;
+ // i_vec.push_back(i);
+ // j_vec.push_back(j);
+ // }
+ // const Tensor i = torch::tensor(i_vec, CUDALong);
+ // const Tensor j = torch::tensor(j_vec, CUDALong);
+
+ // Pick rays by random sampling without replacement
+ std::vector indices(infer_height_ * infer_width_);
+ std::iota(indices.begin(), indices.end(), 0);
+ std::mt19937 engine(std::random_device{}());
+ std::shuffle(indices.begin(), indices.end(), engine);
+ std::vector i_vec, j_vec;
+ for (int k = 0; k < pixel_num; k++) {
+ const int v = indices[k];
+ const int64_t i = v / infer_width_;
+ const int64_t j = v % infer_width_;
+ i_vec.push_back(i);
+ j_vec.push_back(j);
+ }
+ Tensor i = torch::tensor(i_vec, CUDALong);
+ Tensor j = torch::tensor(j_vec, CUDALong);
+
+ // Pick rays by random sampling with replacement
+ // const Tensor i = torch::randint(0, H, pixel_num, CUDALong);
+ // const Tensor j = torch::randint(0, infer_width_, pixel_num, CUDALong);
+
+ const Tensor ij = torch::stack({i, j}, -1).to(torch::kFloat32);
+ std::vector rays_o_vec;
+ std::vector rays_d_vec;
+ for (const Tensor & pose : poses) {
+ auto [rays_o, rays_d] = get_rays_from_pose(pose.unsqueeze(0), intrinsic_.unsqueeze(0), ij);
+ rays_o_vec.push_back(rays_o);
+ rays_d_vec.push_back(rays_d);
+ }
+
+ const int64_t pose_num = poses.size();
+ const int64_t numel = pixel_num * pose_num;
+
+ Tensor rays_o = torch::cat(rays_o_vec); // (numel, 3)
+ Tensor rays_d = torch::cat(rays_d_vec); // (numel, 3)
+
+ timer.start();
+ auto [pred_colors, _] = renderer_->render_all_rays(rays_o, rays_d, (1 << 16));
+
+ Tensor pred_pixels = pred_colors.view({pose_num, pixel_num, 3});
+ pred_pixels = pred_pixels.clip(0.f, 1.f);
+ pred_pixels = pred_pixels.to(image.device()); // (pose_num, pixel_num, 3)
+
+ i = i.to(image.device());
+ j = j.to(image.device());
+
+ Tensor gt_pixels = image.index({i, j}); // (pixel_num, 3)
+ Tensor diff = pred_pixels - gt_pixels; // (pose_num, pixel_num, 3)
+ Tensor loss = (diff * diff).mean(-1).sum(-1).cpu(); // (pose_num,)
+ loss = pixel_num / (loss + 1e-6f);
+ loss = torch::pow(loss, 5);
+ loss /= loss.sum();
+
+ std::vector result(loss.data_ptr(), loss.data_ptr() + loss.numel());
+ return result;
+}
+
+Eigen::Matrix3d compute_rotation_average(
+ const std::vector & rotations, const std::vector & weights)
+{
+ // cf. https://stackoverflow.com/questions/12374087/average-of-multiple-quaternions
+ std::vector quaternions;
+ for (const Eigen::Matrix3d & rot : rotations) {
+ Eigen::Quaterniond quat(rot);
+ quaternions.push_back(quat);
+ }
+
+ Eigen::Vector4d cumulative(0.0, 0.0, 0.0, 0.0);
+ const Eigen::Quaterniond & front = quaternions[0];
+
+ for (Eigen::Quaterniond & q : quaternions) {
+ if (q.dot(front) < 0.0) {
+ q = Eigen::Quaterniond(-q.coeffs());
+ }
+ cumulative += q.coeffs();
+ }
+
+ cumulative /= quaternions.size();
+
+ Eigen::Quaterniond average_quaternion;
+ average_quaternion.coeffs() = cumulative;
+ average_quaternion.normalize();
+
+ return average_quaternion.toRotationMatrix();
+}
+
+Tensor Localizer::calc_average_pose(const std::vector & particles)
+{
+ torch::Device device = particles.front().pose.device();
+ torch::Tensor avg_position_tensor = torch::zeros({3, 1}, device).to(torch::kFloat32);
+ std::vector rotations;
+ std::vector weights;
+
+ for (const Particle & particle : particles) {
+ torch::Tensor pose = particle.pose;
+ torch::Tensor position = pose.index({Slc(0, 3), Slc(3, 4)});
+ avg_position_tensor += position * particle.weight;
+
+ // slice to get 3x3 rotation matrix, convert it to Eigen::Matrix3f
+ torch::Tensor rotation_tensor = pose.index({Slc(0, 3), Slc(0, 3)}).to(torch::kDouble).cpu();
+ Eigen::Matrix3d rotation;
+ std::memcpy(
+ rotation.data(), rotation_tensor.data_ptr(), sizeof(double) * rotation_tensor.numel());
+ rotations.push_back(rotation);
+ weights.push_back(particle.weight);
+ }
+
+ Eigen::Matrix3d avg_rotation_matrix = compute_rotation_average(rotations, weights);
+ torch::Tensor avg_rotation_tensor = torch::from_blob(
+ avg_rotation_matrix.data(), {3, 3}, torch::TensorOptions().dtype(torch::kDouble));
+ avg_rotation_tensor = avg_rotation_tensor.to(torch::kFloat32);
+ avg_rotation_tensor = avg_rotation_tensor.to(device);
+
+ // combine average position and rotation to form average pose
+ torch::Tensor avg_pose = torch::zeros_like(particles.front().pose);
+ avg_pose.index_put_({Slc(0, 3), Slc(3, 4)}, avg_position_tensor);
+ avg_pose.index_put_({Slc(0, 3), Slc(0, 3)}, avg_rotation_tensor);
+
+ return avg_pose;
+}
+
+torch::Tensor Localizer::camera2nerf(const torch::Tensor & pose_in_world)
+{
+ torch::Tensor x = pose_in_world;
+ x = torch::mm(x, axis_convert_mat_);
+ x = torch::mm(axis_convert_mat_.t(), x);
+
+ // normalize t
+ Tensor t = x.index({Slc(0, 3), 3}).clone();
+ t = (t - center_.unsqueeze(0)) / radius_;
+ x.index_put_({Slc(0, 3), 3}, t);
+
+ x = x.index({Slc(0, 3), Slc(0, 4)});
+ return x;
+}
+
+torch::Tensor Localizer::nerf2camera(const torch::Tensor & pose_in_camera)
+{
+ torch::Tensor x = pose_in_camera;
+ x = torch::cat({x, torch::tensor({0, 0, 0, 1}).view({1, 4}).to(torch::kCUDA)});
+
+ // denormalize t
+ Tensor t = x.index({Slc(0, 3), 3}).clone();
+ t = t * radius_ + center_.unsqueeze(0);
+ x.index_put_({Slc(0, 3), 3}, t);
+
+ x = torch::mm(x, axis_convert_mat_.t());
+ x = torch::mm(axis_convert_mat_, x);
+ return x;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/localizer.hpp b/localization/nerf_based_localizer/src/nerf/localizer.hpp
new file mode 100644
index 0000000000000..020ffca3e5dcc
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/localizer.hpp
@@ -0,0 +1,81 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NERF__LOCALIZER_HPP_
+#define NERF__LOCALIZER_HPP_
+
+#include "dataset.hpp"
+#include "renderer.hpp"
+
+#include
+
+struct Particle
+{
+ torch::Tensor pose; // (3, 4)
+ float weight;
+};
+
+struct LocalizerParam
+{
+ std::string train_result_dir;
+ int32_t render_pixel_num = 256;
+ float noise_position_x = 0.025f;
+ float noise_position_y = 0.025f;
+ float noise_position_z = 0.025f;
+ float noise_rotation_x = 2.5f;
+ float noise_rotation_y = 2.5f;
+ float noise_rotation_z = 2.5f;
+ int32_t resize_factor = 1;
+ int32_t sample_num_per_ray = 1024;
+ int32_t ray_batch_size = (1 << 8);
+};
+
+class Localizer
+{
+ using Tensor = torch::Tensor;
+
+public:
+ Localizer() = default;
+ Localizer(const LocalizerParam & param);
+
+ Tensor render_image(const Tensor & pose);
+ std::vector optimize_pose_by_random_search(
+ Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff);
+ std::vector optimize_pose_by_differential(
+ Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate);
+
+ torch::Tensor camera2nerf(const torch::Tensor & pose_in_world);
+ torch::Tensor nerf2camera(const torch::Tensor & pose_in_camera);
+
+ static Tensor calc_average_pose(const std::vector & particles);
+
+ float radius() const { return radius_; }
+ int infer_height() const { return infer_height_; }
+ int infer_width() const { return infer_width_; }
+
+private:
+ std::vector evaluate_poses(const std::vector & poses, const Tensor & image);
+
+ LocalizerParam param_;
+
+ std::shared_ptr renderer_;
+
+ torch::Tensor axis_convert_mat_;
+
+ int infer_height_, infer_width_;
+ Tensor intrinsic_;
+ Tensor center_;
+ float radius_;
+};
+
+#endif // NERF__LOCALIZER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.cpp b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp
new file mode 100644
index 0000000000000..3d302a50d6cf3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp
@@ -0,0 +1,79 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.cpp
+//
+// Created by ppwang on 2022/9/26.
+//
+
+#include "points_sampler.hpp"
+
+#include "dataset.hpp"
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+#include
+#include
+
+using Tensor = torch::Tensor;
+
+PtsSampler::PtsSampler(const int sample_num_per_ray) : sample_num_per_ray_(sample_num_per_ray)
+{
+}
+
+SampleResultFlex PtsSampler::get_samples(
+ const Tensor & rays_o_raw, const Tensor & rays_d_raw, RunningMode mode)
+{
+ Tensor rays_o = rays_o_raw.contiguous();
+ Tensor rays_d = (rays_d_raw / torch::linalg_norm(rays_d_raw, 2, -1, true)).contiguous();
+
+ int n_rays = rays_o.sizes()[0];
+
+ const int n_all_pts = n_rays * sample_num_per_ray_;
+
+ Tensor rays_noise;
+ if (mode == RunningMode::VALIDATE) {
+ rays_noise = torch::ones({n_all_pts}, CUDAFloat);
+ } else {
+ rays_noise = ((torch::rand({n_all_pts}, CUDAFloat) - .5f) + 1.f).contiguous();
+ }
+ rays_noise = rays_noise.view({n_rays, sample_num_per_ray_}).contiguous();
+ Tensor cum_noise = torch::cumsum(rays_noise, 1) * SAMPLE_L;
+ Tensor sampled_t = cum_noise.reshape({n_all_pts}).contiguous();
+
+ rays_o = rays_o.view({n_rays, 1, 3}).contiguous();
+ rays_d = rays_d.view({n_rays, 1, 3}).contiguous();
+ cum_noise = cum_noise.unsqueeze(-1).contiguous();
+ Tensor sampled_pts = rays_o + rays_d * cum_noise;
+
+ Tensor sampled_distances = torch::diff(sampled_pts, 1, 1).norm(2, -1).contiguous();
+ sampled_distances =
+ torch::cat({torch::zeros({n_rays, 1}, CUDAFloat), sampled_distances}, 1).contiguous();
+ sampled_pts = sampled_pts.view({n_all_pts, 3});
+ sampled_distances = sampled_distances.view({n_all_pts}).contiguous();
+
+ Tensor pts_idx_start_end =
+ torch::ones({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA)) *
+ sample_num_per_ray_;
+ Tensor pts_num = pts_idx_start_end.index({Slc(), 0});
+ Tensor cum_num = torch::cumsum(pts_num, 0);
+ pts_idx_start_end.index_put_({Slc(), 0}, cum_num - pts_num);
+ pts_idx_start_end.index_put_({Slc(), 1}, cum_num);
+
+ Tensor sampled_dirs =
+ rays_d.expand({-1, sample_num_per_ray_, -1}).reshape({n_all_pts, 3}).contiguous();
+
+ return {sampled_pts, sampled_dirs, sampled_distances, sampled_t, pts_idx_start_end};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.hpp b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp
new file mode 100644
index 0000000000000..f606ae62f83e3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp
@@ -0,0 +1,58 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.h
+//
+// Created by ppwang on 2022/6/20.
+//
+
+#ifndef NERF__POINTS_SAMPLER_HPP_
+#define NERF__POINTS_SAMPLER_HPP_
+
+#include "Eigen/Eigen"
+#include "common.hpp"
+
+#include
+
+#include
+
+struct SampleResultFlex
+{
+ using Tensor = torch::Tensor;
+ Tensor pts; // [ n_all_pts, 3 ]
+ Tensor dirs; // [ n_all_pts, 3 ]
+ Tensor dt; // [ n_all_pts, 1 ]
+ Tensor t; // [ n_all_pts, 1 ]
+ Tensor pts_idx_bounds; // [ n_rays, 2 ] // start, end
+};
+
+enum RunningMode { TRAIN, VALIDATE };
+
+class PtsSampler
+{
+ using Tensor = torch::Tensor;
+
+public:
+ explicit PtsSampler(const int sample_num_per_ray);
+
+ SampleResultFlex get_samples(const Tensor & rays_o, const Tensor & rays_d, RunningMode mode);
+
+private:
+ const int sample_num_per_ray_;
+
+ static constexpr float SAMPLE_L = 1.0 / 256;
+};
+
+#endif // NERF__POINTS_SAMPLER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/rays.cpp b/localization/nerf_based_localizer/src/nerf/rays.cpp
new file mode 100644
index 0000000000000..289a81566ef68
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/rays.cpp
@@ -0,0 +1,42 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "rays.hpp"
+
+#include "common.hpp"
+
+using Tensor = torch::Tensor;
+
+Rays get_rays_from_pose(const Tensor & pose, const Tensor & intrinsic, const Tensor & ij)
+{
+ // Shift half pixel
+ Tensor i = ij.index({"...", 0}).to(torch::kFloat32) + .5f;
+ Tensor j = ij.index({"...", 1}).to(torch::kFloat32) + .5f;
+
+ Tensor cx = intrinsic.index({Slc(), 0, 2});
+ Tensor cy = intrinsic.index({Slc(), 1, 2});
+ Tensor fx = intrinsic.index({Slc(), 0, 0});
+ Tensor fy = intrinsic.index({Slc(), 1, 1});
+
+ Tensor u_tensor = ((j - cx) / fx).unsqueeze(-1);
+ Tensor v_tensor = -((i - cy) / fy).unsqueeze(-1);
+ Tensor w_tensor = -torch::ones_like(u_tensor);
+
+ Tensor dir_tensor = torch::cat({u_tensor, v_tensor, w_tensor}, 1).unsqueeze(-1);
+ Tensor ori_tensor = pose.index({Slc(), Slc(0, 3), Slc(0, 3)});
+ Tensor pos_tensor = pose.index({Slc(), Slc(0, 3), 3});
+ Tensor rays_d = torch::matmul(ori_tensor, dir_tensor).squeeze();
+ Tensor rays_o = pos_tensor.expand({rays_d.sizes()[0], 3}).contiguous();
+
+ return {rays_o, rays_d};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/rays.hpp b/localization/nerf_based_localizer/src/nerf/rays.hpp
new file mode 100644
index 0000000000000..5c5cb84558fb7
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/rays.hpp
@@ -0,0 +1,28 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NERF__RAYS_HPP_
+#define NERF__RAYS_HPP_
+
+#include
+
+struct alignas(32) Rays
+{
+ torch::Tensor origins;
+ torch::Tensor dirs;
+};
+
+Rays get_rays_from_pose(
+ const torch::Tensor & pose, const torch::Tensor & intrinsic, const torch::Tensor & ij);
+
+#endif // NERF__RAYS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/renderer.cpp b/localization/nerf_based_localizer/src/nerf/renderer.cpp
new file mode 100644
index 0000000000000..18d73f1fab16c
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/renderer.cpp
@@ -0,0 +1,214 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.cpp
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#include "renderer.hpp"
+
+#include "CustomOps/CustomOps.hpp"
+#include "CustomOps/FlexOps.hpp"
+#include "CustomOps/Scatter.hpp"
+#include "common.hpp"
+#include "rays.hpp"
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+using Tensor = torch::Tensor;
+namespace F = torch::nn::functional;
+
+Renderer::Renderer(int n_images, const int sample_num_per_ray)
+: sample_num_per_ray_(sample_num_per_ray)
+{
+ pts_sampler_ = std::make_shared(sample_num_per_ray_);
+
+ scene_field_ = std::make_shared();
+ register_module("scene_field", scene_field_);
+
+ shader_ = std::make_shared();
+ register_module("shader", shader_);
+
+ app_emb_ = torch::randn({n_images, 16}, CUDAFloat) * .1f;
+ app_emb_.requires_grad_(true);
+ register_parameter("app_emb", app_emb_);
+}
+
+RenderResult Renderer::render(
+ const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode)
+{
+ int n_rays = rays_o.sizes()[0];
+ SampleResultFlex sample_result = pts_sampler_->get_samples(rays_o, rays_d, mode);
+ int n_all_pts = sample_result.pts.sizes()[0];
+ CHECK(sample_result.pts_idx_bounds.max().item() <= n_all_pts);
+ CHECK(sample_result.pts_idx_bounds.min().item() >= 0);
+
+ Tensor bg_color =
+ ((mode == RunningMode::TRAIN) ? torch::rand({n_rays, 3}, CUDAFloat)
+ : torch::ones({n_rays, 3}, CUDAFloat) * .5f);
+
+ if (n_all_pts <= 0) {
+ return {
+ bg_color, torch::zeros({n_rays}, CUDAFloat), torch::full({n_rays}, 512.f, CUDAFloat),
+ Tensor()};
+ }
+ CHECK(rays_o.sizes()[0] == sample_result.pts_idx_bounds.sizes()[0]);
+
+ auto DensityAct = [](Tensor x) -> Tensor {
+ const float shift = 3.f;
+ return torch::autograd::TruncExp::apply(x - shift)[0];
+ };
+
+ // First, inference - early stop
+ SampleResultFlex sample_result_early_stop;
+ {
+ Tensor scene_feat = scene_field_->query(sample_result.pts);
+ Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)}));
+ Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result.dt;
+ Tensor alphas = 1.f - torch::exp(-sec_density);
+ Tensor acc_density = FlexOps::AccumulateSum(sec_density, sample_result.pts_idx_bounds, false);
+ Tensor trans = torch::exp(-acc_density);
+ Tensor weights = trans * alphas;
+ Tensor mask = trans > 1e-4f;
+ Tensor mask_idx = torch::where(mask)[0];
+
+ sample_result_early_stop.pts = sample_result.pts.index({mask_idx}).contiguous();
+ sample_result_early_stop.dirs = sample_result.dirs.index({mask_idx}).contiguous();
+ sample_result_early_stop.dt = sample_result.dt.index({mask_idx}).contiguous();
+ sample_result_early_stop.t = sample_result.t.index({mask_idx}).contiguous();
+
+ Tensor mask_2d = mask.reshape({n_rays, sample_num_per_ray_});
+ Tensor num = mask_2d.sum(1);
+ Tensor cum_num = torch::cumsum(num, 0);
+ Tensor idx_bounds =
+ torch::zeros({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA));
+ idx_bounds.index_put_({Slc(), 0}, cum_num - num);
+ idx_bounds.index_put_({Slc(), 1}, cum_num);
+ sample_result_early_stop.pts_idx_bounds = idx_bounds;
+
+ CHECK(
+ sample_result_early_stop.pts_idx_bounds.max().item() ==
+ sample_result_early_stop.pts.size(0));
+ }
+
+ n_all_pts = sample_result_early_stop.pts.size(0);
+
+ Tensor scene_feat = scene_field_->query(sample_result_early_stop.pts);
+ Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)}));
+
+ Tensor shading_feat = torch::cat(
+ {torch::ones_like(scene_feat.index({Slc(), Slc(0, 1)}), CUDAFloat),
+ scene_feat.index({Slc(), Slc(1, torch::indexing::None)})},
+ 1);
+
+ if (mode == RunningMode::TRAIN) {
+ Tensor all_emb_idx =
+ CustomOps::ScatterIdx(n_all_pts, sample_result_early_stop.pts_idx_bounds, emb_idx);
+ shading_feat = CustomOps::ScatterAdd(app_emb_, all_emb_idx, shading_feat);
+ }
+
+ Tensor sampled_colors = shader_->query(shading_feat, sample_result_early_stop.dirs);
+ Tensor sampled_t = (sample_result_early_stop.t + 1e-2f).contiguous();
+ Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result_early_stop.dt;
+ Tensor alphas = 1.f - torch::exp(-sec_density);
+ Tensor idx_start_end = sample_result_early_stop.pts_idx_bounds;
+ Tensor acc_density = FlexOps::AccumulateSum(sec_density, idx_start_end, false);
+ Tensor trans = torch::exp(-acc_density);
+ Tensor weights = trans * alphas;
+
+ Tensor last_trans = torch::exp(-FlexOps::Sum(sec_density, idx_start_end));
+ Tensor colors = FlexOps::Sum(weights.unsqueeze(-1) * sampled_colors, idx_start_end);
+ colors = colors + last_trans.unsqueeze(-1) * bg_color;
+ Tensor depths = FlexOps::Sum(weights * sampled_t, idx_start_end) / (1.f - last_trans + 1e-4f);
+
+ CHECK(std::isfinite((colors).mean().item()));
+
+ return {colors, depths, weights, idx_start_end};
+}
+
+std::tuple Renderer::render_all_rays(
+ const Tensor & rays_o, const Tensor & rays_d, const int batch_size)
+{
+ const int n_rays = rays_d.sizes()[0];
+
+ std::vector pred_colors;
+ std::vector pred_depths;
+
+ const int ray_batch_size = (1 << 16);
+ for (int i = 0; i < n_rays; i += batch_size) {
+ int i_high = std::min(i + batch_size, n_rays);
+ Tensor cur_rays_o = rays_o.index({Slc(i, i_high)}).contiguous();
+ Tensor cur_rays_d = rays_d.index({Slc(i, i_high)}).contiguous();
+
+ RenderResult render_result = render(cur_rays_o, cur_rays_d, Tensor(), RunningMode::VALIDATE);
+ Tensor colors = render_result.colors;
+ Tensor depths = render_result.depths.squeeze();
+
+ pred_colors.push_back(colors);
+ pred_depths.push_back(depths.unsqueeze(-1));
+ }
+
+ Tensor pred_colors_ts = torch::cat(pred_colors, 0);
+ Tensor pred_depths_ts = torch::cat(pred_depths, 0);
+
+ return {pred_colors_ts, pred_depths_ts};
+}
+
+std::tuple Renderer::render_image(
+ const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w,
+ const int batch_size)
+{
+ Tensor ii = torch::linspace(0.f, h - 1.f, h, CUDAFloat);
+ Tensor jj = torch::linspace(0.f, w - 1.f, w, CUDAFloat);
+ auto ij = torch::meshgrid({ii, jj}, "ij");
+ Tensor i = ij[0].reshape({-1});
+ Tensor j = ij[1].reshape({-1});
+ auto [rays_o, rays_d] =
+ get_rays_from_pose(pose.unsqueeze(0), intrinsic.unsqueeze(0), torch::stack({i, j}, -1));
+ auto [pred_colors, pred_depths] = render_all_rays(rays_o, rays_d, batch_size);
+ pred_colors = pred_colors.reshape({h, w, 3});
+ pred_depths = pred_depths.reshape({h, w, 1});
+
+ pred_colors = pred_colors.clip(0.0f, 1.0f);
+ pred_depths = pred_depths.repeat({1, 1, 3});
+
+ return {pred_colors, pred_depths};
+}
+
+std::vector Renderer::optim_param_groups(float lr)
+{
+ std::vector ret;
+
+ // scene_field_
+ for (const auto & para_group : scene_field_->optim_param_groups(lr)) {
+ ret.emplace_back(para_group);
+ }
+
+ // shader_
+ for (const auto & para_group : shader_->optim_param_groups(lr)) {
+ ret.emplace_back(para_group);
+ }
+
+ // app_emb_
+ auto opt = std::make_unique(lr);
+ opt->betas() = {0.9, 0.99};
+ opt->eps() = 1e-15;
+ opt->weight_decay() = 1e-6;
+ std::vector params{app_emb_};
+ ret.emplace_back(std::move(params), std::move(opt));
+
+ return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/renderer.hpp b/localization/nerf_based_localizer/src/nerf/renderer.hpp
new file mode 100644
index 0000000000000..4b7f06dc2f54d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/renderer.hpp
@@ -0,0 +1,69 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.h
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#ifndef NERF__RENDERER_HPP_
+#define NERF__RENDERER_HPP_
+
+#include "hash_3d_anchored.hpp"
+#include "points_sampler.hpp"
+#include "sh_shader.hpp"
+
+#include
+#include
+
+struct RenderResult
+{
+ using Tensor = torch::Tensor;
+ Tensor colors;
+ Tensor depths;
+ Tensor weights;
+ Tensor idx_start_end;
+};
+
+class Renderer : public torch::nn::Module
+{
+ using Tensor = torch::Tensor;
+
+public:
+ Renderer(int n_images, const int sample_num_per_ray = 1024);
+
+ RenderResult render(
+ const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode);
+
+ std::tuple render_all_rays(
+ const Tensor & rays_o, const Tensor & rays_d, const int batch_size);
+
+ std::tuple render_image(
+ const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w,
+ const int batch_size);
+
+ std::vector optim_param_groups(float lr);
+
+private:
+ const int sample_num_per_ray_;
+
+ std::shared_ptr pts_sampler_;
+ std::shared_ptr scene_field_;
+ std::shared_ptr shader_;
+
+ Tensor app_emb_;
+};
+
+#endif // NERF__RENDERER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cpp b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp
new file mode 100644
index 0000000000000..84ed5004a23e4
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp
@@ -0,0 +1,56 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cpp
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#include "sh_shader.hpp"
+
+#include "common.hpp"
+
+using Tensor = torch::Tensor;
+
+SHShader::SHShader()
+{
+ const int d_in = 32;
+ const int d_hidden = 64;
+ const int d_out = 3;
+
+ mlp_ = torch::nn::Sequential(
+ torch::nn::Linear(d_in, d_hidden), torch::nn::ReLU(), torch::nn::Linear(d_hidden, d_out));
+ register_module("mlp", mlp_);
+}
+
+Tensor SHShader::query(const Tensor & feats, const Tensor & dirs)
+{
+ Tensor enc = encode(dirs);
+ Tensor input = torch::cat({feats, enc}, -1);
+ Tensor output = mlp_->forward(input);
+ float eps = 1e-3f;
+ return (1.f + 2.f * eps) / (1.f + torch::exp(-output)) - eps;
+}
+
+std::vector SHShader::optim_param_groups(float lr)
+{
+ auto opt = std::make_unique(lr);
+ opt->betas() = {0.9, 0.99};
+ opt->eps() = 1e-15;
+ opt->weight_decay() = 1e-6;
+
+ std::vector params = mlp_->parameters();
+ return {torch::optim::OptimizerParamGroup(params, std::move(opt))};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cu b/localization/nerf_based_localizer/src/nerf/sh_shader.cu
new file mode 100644
index 0000000000000..7a46b30eae60a
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cu
@@ -0,0 +1,224 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cu
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#include "common.hpp"
+#include "common_cuda.hpp"
+#include "sh_shader.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void SHKernel(
+ const uint32_t num_elements, const uint32_t degree, float * data_in, float * data_out)
+{
+ const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x;
+ if (i >= num_elements) return;
+
+ data_out = data_out + (degree * degree) * i;
+
+ float x = data_in[i * 3];
+ float y = data_in[i * 3 + 1];
+ float z = data_in[i * 3 + 2];
+
+ // Let compiler figure out how to sequence/reorder these calculations w.r.t. branches
+ float xy = x * y, xz = x * z, yz = y * z, x2 = x * x, y2 = y * y, z2 = z * z;
+ float x4 = x2 * x2, y4 = y2 * y2, z4 = z2 * z2;
+ float x6 = x4 * x2, y6 = y4 * y2, z6 = z4 * z2;
+
+ // SH polynomials generated using scripts/gen_sh.py based on the recurrence relations in appendix
+ // A1 of https://www.ppsloan.org/publications/StupidSH36.pdf
+
+ data_out[0] = 0.28209479177387814f; // 1/(2*sqrt(pi))
+ if (degree <= 1) {
+ return;
+ }
+ data_out[1] = -0.48860251190291987f * y; // -sqrt(3)*y/(2*sqrt(pi))
+ data_out[2] = 0.48860251190291987f * z; // sqrt(3)*z/(2*sqrt(pi))
+ data_out[3] = -0.48860251190291987f * x; // -sqrt(3)*x/(2*sqrt(pi))
+ if (degree <= 2) {
+ return;
+ }
+ data_out[4] = 1.0925484305920792f * xy; // sqrt(15)*xy/(2*sqrt(pi))
+ data_out[5] = -1.0925484305920792f * yz; // -sqrt(15)*yz/(2*sqrt(pi))
+ data_out[6] =
+ 0.94617469575755997f * z2 - 0.31539156525251999f; // sqrt(5)*(3*z2 - 1)/(4*sqrt(pi))
+ data_out[7] = -1.0925484305920792f * xz; // -sqrt(15)*xz/(2*sqrt(pi))
+ data_out[8] =
+ 0.54627421529603959f * x2 - 0.54627421529603959f * y2; // sqrt(15)*(x2 - y2)/(4*sqrt(pi))
+ if (degree <= 3) {
+ return;
+ }
+ data_out[9] =
+ 0.59004358992664352f * y * (-3.0f * x2 + y2); // sqrt(70)*y*(-3*x2 + y2)/(8*sqrt(pi))
+ data_out[10] = 2.8906114426405538f * xy * z; // sqrt(105)*xy*z/(2*sqrt(pi))
+ data_out[11] =
+ 0.45704579946446572f * y * (1.0f - 5.0f * z2); // sqrt(42)*y*(1 - 5*z2)/(8*sqrt(pi))
+ data_out[12] = 0.3731763325901154f * z * (5.0f * z2 - 3.0f); // sqrt(7)*z*(5*z2 - 3)/(4*sqrt(pi))
+ data_out[13] =
+ 0.45704579946446572f * x * (1.0f - 5.0f * z2); // sqrt(42)*x*(1 - 5*z2)/(8*sqrt(pi))
+ data_out[14] = 1.4453057213202769f * z * (x2 - y2); // sqrt(105)*z*(x2 - y2)/(4*sqrt(pi))
+ data_out[15] =
+ 0.59004358992664352f * x * (-x2 + 3.0f * y2); // sqrt(70)*x*(-x2 + 3*y2)/(8*sqrt(pi))
+ if (degree <= 4) {
+ return;
+ }
+ data_out[16] = 2.5033429417967046f * xy * (x2 - y2); // 3*sqrt(35)*xy*(x2 - y2)/(4*sqrt(pi))
+ data_out[17] =
+ 1.7701307697799304f * yz * (-3.0f * x2 + y2); // 3*sqrt(70)*yz*(-3*x2 + y2)/(8*sqrt(pi))
+ data_out[18] =
+ 0.94617469575756008f * xy * (7.0f * z2 - 1.0f); // 3*sqrt(5)*xy*(7*z2 - 1)/(4*sqrt(pi))
+ data_out[19] =
+ 0.66904654355728921f * yz * (3.0f - 7.0f * z2); // 3*sqrt(10)*yz*(3 - 7*z2)/(8*sqrt(pi))
+ data_out[20] = -3.1735664074561294f * z2 + 3.7024941420321507f * z4 +
+ 0.31735664074561293f; // 3*(-30*z2 + 35*z4 + 3)/(16*sqrt(pi))
+ data_out[21] =
+ 0.66904654355728921f * xz * (3.0f - 7.0f * z2); // 3*sqrt(10)*xz*(3 - 7*z2)/(8*sqrt(pi))
+ data_out[22] = 0.47308734787878004f * (x2 - y2) *
+ (7.0f * z2 - 1.0f); // 3*sqrt(5)*(x2 - y2)*(7*z2 - 1)/(8*sqrt(pi))
+ data_out[23] =
+ 1.7701307697799304f * xz * (-x2 + 3.0f * y2); // 3*sqrt(70)*xz*(-x2 + 3*y2)/(8*sqrt(pi))
+ data_out[24] = -3.7550144126950569f * x2 * y2 + 0.62583573544917614f * x4 +
+ 0.62583573544917614f * y4; // 3*sqrt(35)*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi))
+ if (degree <= 5) {
+ return;
+ }
+ data_out[25] =
+ 0.65638205684017015f * y *
+ (10.0f * x2 * y2 - 5.0f * x4 - y4); // 3*sqrt(154)*y*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi))
+ data_out[26] =
+ 8.3026492595241645f * xy * z * (x2 - y2); // 3*sqrt(385)*xy*z*(x2 - y2)/(4*sqrt(pi))
+ data_out[27] = -0.48923829943525038f * y * (3.0f * x2 - y2) *
+ (9.0f * z2 - 1.0f); // -sqrt(770)*y*(3*x2 - y2)*(9*z2 - 1)/(32*sqrt(pi))
+ data_out[28] =
+ 4.7935367849733241f * xy * z * (3.0f * z2 - 1.0f); // sqrt(1155)*xy*z*(3*z2 - 1)/(4*sqrt(pi))
+ data_out[29] = 0.45294665119569694f * y *
+ (14.0f * z2 - 21.0f * z4 - 1.0f); // sqrt(165)*y*(14*z2 - 21*z4 - 1)/(16*sqrt(pi))
+ data_out[30] =
+ 0.1169503224534236f * z *
+ (-70.0f * z2 + 63.0f * z4 + 15.0f); // sqrt(11)*z*(-70*z2 + 63*z4 + 15)/(16*sqrt(pi))
+ data_out[31] = 0.45294665119569694f * x *
+ (14.0f * z2 - 21.0f * z4 - 1.0f); // sqrt(165)*x*(14*z2 - 21*z4 - 1)/(16*sqrt(pi))
+ data_out[32] = 2.3967683924866621f * z * (x2 - y2) *
+ (3.0f * z2 - 1.0f); // sqrt(1155)*z*(x2 - y2)*(3*z2 - 1)/(8*sqrt(pi))
+ data_out[33] = -0.48923829943525038f * x * (x2 - 3.0f * y2) *
+ (9.0f * z2 - 1.0f); // -sqrt(770)*x*(x2 - 3*y2)*(9*z2 - 1)/(32*sqrt(pi))
+ data_out[34] = 2.0756623148810411f * z *
+ (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(385)*z*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi))
+ data_out[35] =
+ 0.65638205684017015f * x *
+ (10.0f * x2 * y2 - x4 - 5.0f * y4); // 3*sqrt(154)*x*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi))
+ if (degree <= 6) {
+ return;
+ }
+ data_out[36] = 1.3663682103838286f * xy *
+ (-10.0f * x2 * y2 + 3.0f * x4 +
+ 3.0f * y4); // sqrt(6006)*xy*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi))
+ data_out[37] =
+ 2.3666191622317521f * yz *
+ (10.0f * x2 * y2 - 5.0f * x4 - y4); // 3*sqrt(2002)*yz*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi))
+ data_out[38] = 2.0182596029148963f * xy * (x2 - y2) *
+ (11.0f * z2 - 1.0f); // 3*sqrt(91)*xy*(x2 - y2)*(11*z2 - 1)/(8*sqrt(pi))
+ data_out[39] = -0.92120525951492349f * yz * (3.0f * x2 - y2) *
+ (11.0f * z2 - 3.0f); // -sqrt(2730)*yz*(3*x2 - y2)*(11*z2 - 3)/(32*sqrt(pi))
+ data_out[40] =
+ 0.92120525951492349f * xy *
+ (-18.0f * z2 + 33.0f * z4 + 1.0f); // sqrt(2730)*xy*(-18*z2 + 33*z4 + 1)/(32*sqrt(pi))
+ data_out[41] =
+ 0.58262136251873131f * yz *
+ (30.0f * z2 - 33.0f * z4 - 5.0f); // sqrt(273)*yz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi))
+ data_out[42] = 6.6747662381009842f * z2 - 20.024298714302954f * z4 + 14.684485723822165f * z6 -
+ 0.31784601133814211f; // sqrt(13)*(105*z2 - 315*z4 + 231*z6 - 5)/(32*sqrt(pi))
+ data_out[43] =
+ 0.58262136251873131f * xz *
+ (30.0f * z2 - 33.0f * z4 - 5.0f); // sqrt(273)*xz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi))
+ data_out[44] = 0.46060262975746175f * (x2 - y2) *
+ (11.0f * z2 * (3.0f * z2 - 1.0f) - 7.0f * z2 +
+ 1.0f); // sqrt(2730)*(x2 - y2)*(11*z2*(3*z2 - 1) - 7*z2 + 1)/(64*sqrt(pi))
+ data_out[45] = -0.92120525951492349f * xz * (x2 - 3.0f * y2) *
+ (11.0f * z2 - 3.0f); // -sqrt(2730)*xz*(x2 - 3*y2)*(11*z2 - 3)/(32*sqrt(pi))
+ data_out[46] =
+ 0.50456490072872406f * (11.0f * z2 - 1.0f) *
+ (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(91)*(11*z2 - 1)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi))
+ data_out[47] =
+ 2.3666191622317521f * xz *
+ (10.0f * x2 * y2 - x4 - 5.0f * y4); // 3*sqrt(2002)*xz*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi))
+ data_out[48] =
+ 10.247761577878714f * x2 * y4 - 10.247761577878714f * x4 * y2 + 0.6831841051919143f * x6 -
+ 0.6831841051919143f * y6; // sqrt(6006)*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi))
+ if (degree <= 7) {
+ return;
+ }
+ data_out[49] = 0.70716273252459627f * y *
+ (-21.0f * x2 * y4 + 35.0f * x4 * y2 - 7.0f * x6 +
+ y6); // 3*sqrt(715)*y*(-21*x2*y4 + 35*x4*y2 - 7*x6 + y6)/(64*sqrt(pi))
+ data_out[50] = 5.2919213236038001f * xy * z *
+ (-10.0f * x2 * y2 + 3.0f * x4 +
+ 3.0f * y4); // 3*sqrt(10010)*xy*z*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi))
+ data_out[51] = -0.51891557872026028f * y * (13.0f * z2 - 1.0f) *
+ (-10.0f * x2 * y2 + 5.0f * x4 +
+ y4); // -3*sqrt(385)*y*(13*z2 - 1)*(-10*x2*y2 + 5*x4 + y4)/(64*sqrt(pi))
+ data_out[52] = 4.1513246297620823f * xy * z * (x2 - y2) *
+ (13.0f * z2 - 3.0f); // 3*sqrt(385)*xy*z*(x2 - y2)*(13*z2 - 3)/(8*sqrt(pi))
+ data_out[53] = -0.15645893386229404f * y * (3.0f * x2 - y2) *
+ (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 +
+ 3.0f); // -3*sqrt(35)*y*(3*x2 - y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi))
+ data_out[54] =
+ 0.44253269244498261f * xy * z *
+ (-110.0f * z2 + 143.0f * z4 + 15.0f); // 3*sqrt(70)*xy*z*(-110*z2 + 143*z4 + 15)/(32*sqrt(pi))
+ data_out[55] = 0.090331607582517306f * y *
+ (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 +
+ 5.0f); // sqrt(105)*y*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi))
+ data_out[56] = 0.068284276912004949f * z *
+ (315.0f * z2 - 693.0f * z4 + 429.0f * z6 -
+ 35.0f); // sqrt(15)*z*(315*z2 - 693*z4 + 429*z6 - 35)/(32*sqrt(pi))
+ data_out[57] = 0.090331607582517306f * x *
+ (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 +
+ 5.0f); // sqrt(105)*x*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi))
+ data_out[58] = 0.07375544874083044f * z * (x2 - y2) *
+ (143.0f * z2 * (3.0f * z2 - 1.0f) - 187.0f * z2 +
+ 45.0f); // sqrt(70)*z*(x2 - y2)*(143*z2*(3*z2 - 1) - 187*z2 + 45)/(64*sqrt(pi))
+ data_out[59] = -0.15645893386229404f * x * (x2 - 3.0f * y2) *
+ (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 +
+ 3.0f); // -3*sqrt(35)*x*(x2 - 3*y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi))
+ data_out[60] =
+ 1.0378311574405206f * z * (13.0f * z2 - 3.0f) *
+ (-6.0f * x2 * y2 + x4 + y4); // 3*sqrt(385)*z*(13*z2 - 3)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi))
+ data_out[61] = -0.51891557872026028f * x * (13.0f * z2 - 1.0f) *
+ (-10.0f * x2 * y2 + x4 +
+ 5.0f * y4); // -3*sqrt(385)*x*(13*z2 - 1)*(-10*x2*y2 + x4 + 5*y4)/(64*sqrt(pi))
+ data_out[62] = 2.6459606618019f * z *
+ (15.0f * x2 * y4 - 15.0f * x4 * y2 + x6 -
+ y6); // 3*sqrt(10010)*z*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi))
+ data_out[63] = 0.70716273252459627f * x *
+ (-35.0f * x2 * y4 + 21.0f * x4 * y2 - x6 +
+ 7.0f * y6); // 3*sqrt(715)*x*(-35*x2*y4 + 21*x4*y2 - x6 + 7*y6)/(64*sqrt(pi))
+}
+
+Tensor SHShader::encode(const Tensor & dirs)
+{
+ CHECK(dirs.is_contiguous());
+ int n_pts = dirs.size(0);
+ Tensor out = torch::empty({n_pts, DEGREE * DEGREE}, CUDAFloat);
+ dim3 grid_dim = LIN_GRID_DIM(n_pts);
+ dim3 block_dim = LIN_BLOCK_DIM;
+
+ SHKernel<<>>(n_pts, DEGREE, dirs.data_ptr(), out.data_ptr());
+
+ return out;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.hpp b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp
new file mode 100644
index 0000000000000..66f0b99625561
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp
@@ -0,0 +1,45 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.h
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#ifndef NERF__SH_SHADER_HPP_
+#define NERF__SH_SHADER_HPP_
+
+#include
+
+class SHShader : public torch::nn::Module
+{
+ using Tensor = torch::Tensor;
+
+public:
+ SHShader();
+
+ Tensor query(const Tensor & feats, const Tensor & dirs);
+
+ std::vector optim_param_groups(float lr);
+
+private:
+ static constexpr int DEGREE = 4;
+
+ Tensor encode(const Tensor & dirs);
+
+ torch::nn::Sequential mlp_ = nullptr;
+};
+
+#endif // NERF__SH_SHADER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.cpp b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp
new file mode 100644
index 0000000000000..c629b56f9bda4
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp
@@ -0,0 +1,41 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.cpp
+//
+// Created by ppwang on 2022/5/18.
+//
+
+#include "stop_watch.hpp"
+
+#include
+
+#include
+
+ScopeWatch::ScopeWatch(const std::string & scope_name) : scope_name_(scope_name)
+{
+ torch::cuda::synchronize();
+ t_point_ = std::chrono::steady_clock::now();
+ std::cout << "[" << scope_name_ << "] begin" << std::endl;
+}
+
+ScopeWatch::~ScopeWatch()
+{
+ torch::cuda::synchronize();
+ std::chrono::steady_clock::time_point new_point = std::chrono::steady_clock::now();
+ std::chrono::duration time_span =
+ std::chrono::duration_cast>(new_point - t_point_);
+ std::cout << "[" << scope_name_ << "] end in " << time_span.count() << " seconds" << std::endl;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.hpp b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp
new file mode 100644
index 0000000000000..86483eee9484d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp
@@ -0,0 +1,53 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.h
+//
+// Created by ppwang on 2022/5/18.
+//
+
+#ifndef NERF__STOP_WATCH_HPP_
+#define NERF__STOP_WATCH_HPP_
+
+#include
+#include
+
+class ScopeWatch
+{
+public:
+ ScopeWatch(const std::string & scope_name);
+ ~ScopeWatch();
+
+private:
+ std::chrono::steady_clock::time_point t_point_;
+ std::string scope_name_;
+};
+
+class Timer
+{
+public:
+ void start() { start_time_ = std::chrono::steady_clock::now(); }
+ int64_t elapsed_milli_seconds() const
+ {
+ auto elapsed = std::chrono::steady_clock::now() - start_time_;
+ return std::chrono::duration_cast(elapsed).count();
+ }
+ double elapsed_seconds() const { return elapsed_milli_seconds() / 1000.0; }
+
+private:
+ std::chrono::steady_clock::time_point start_time_;
+};
+
+#endif // NERF__STOP_WATCH_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/utils.cpp b/localization/nerf_based_localizer/src/nerf/utils.cpp
new file mode 100644
index 0000000000000..4c72509422381
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/utils.cpp
@@ -0,0 +1,80 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.cpp
+//
+// Created by ppwang on 2023/4/4.
+//
+
+#include "utils.hpp"
+
+#include "common.hpp"
+
+#include
+
+#include
+
+using Tensor = torch::Tensor;
+
+Tensor utils::read_image_tensor(const std::string & path)
+{
+ cv::Mat img = cv::imread(path, cv::IMREAD_UNCHANGED);
+ cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+ img.convertTo(img, CV_32FC3, 1.0 / 255.0);
+ Tensor img_tensor =
+ torch::from_blob(img.data, {img.rows, img.cols, img.channels()}, torch::kFloat32).clone();
+ return img_tensor;
+}
+
+bool utils::write_image_tensor(const std::string & path, Tensor img)
+{
+ img = img.contiguous();
+ img = (img * 255.f).clamp(0, 255).to(torch::kUInt8).to(torch::kCPU);
+ cv::Mat img_mat(img.size(0), img.size(1), CV_8UC3, img.data_ptr());
+ cv::cvtColor(img_mat, img_mat, cv::COLOR_RGB2BGR);
+ cv::imwrite(path, img_mat);
+ return true;
+}
+
+Tensor utils::resize_image(Tensor image, const int resize_height, const int resize_width)
+{
+ const int height = image.size(0);
+ const int width = image.size(1);
+ if (height == resize_height && width == resize_width) {
+ return image;
+ }
+
+ // change HWC to CHW
+ image = image.permute({2, 0, 1});
+ image = image.unsqueeze(0); // add batch dim
+
+ // Resize
+ std::vector size = {resize_height, resize_width};
+ image = torch::nn::functional::interpolate(
+ image, torch::nn::functional::InterpolateFuncOptions().size(size));
+
+ // change CHW to HWC
+ image = image.squeeze(0); // remove batch dim
+ image = image.permute({1, 2, 0});
+ return image;
+}
+
+float utils::calc_loss(Tensor pred_image, Tensor gt_image)
+{
+ Tensor diff = pred_image - gt_image;
+ Tensor loss = (diff * diff).mean(-1);
+ Tensor score = loss.numel() / (loss.sum() + 1e-6f);
+ return score.mean().item();
+}
diff --git a/localization/nerf_based_localizer/src/nerf/utils.hpp b/localization/nerf_based_localizer/src/nerf/utils.hpp
new file mode 100644
index 0000000000000..1942c3b71c2a3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/utils.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.h
+//
+// Created by ppwang on 2022/5/11.
+//
+
+#ifndef NERF__UTILS_HPP_
+#define NERF__UTILS_HPP_
+
+#include
+
+#include
+
+namespace utils
+{
+using Tensor = torch::Tensor;
+
+Tensor read_image_tensor(const std::string & path);
+bool write_image_tensor(const std::string & path, Tensor img);
+Tensor resize_image(Tensor image, const int resize_height, const int resize_width);
+float calc_loss(Tensor pred_image, Tensor gt_image);
+
+} // namespace utils
+
+#endif // NERF__UTILS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf_based_localizer.cpp b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp
new file mode 100644
index 0000000000000..5b89179fc9a81
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp
@@ -0,0 +1,378 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "nerf_based_localizer.hpp"
+
+#include "nerf/stop_watch.hpp"
+#include "nerf/utils.hpp"
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+geometry_msgs::msg::Pose transform_pose(
+ const geometry_msgs::msg::Pose & pose, const geometry_msgs::msg::TransformStamped & transform)
+{
+ Eigen::Quaterniond R1(
+ transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y,
+ transform.transform.rotation.z);
+ Eigen::Vector3d t1(
+ transform.transform.translation.x, transform.transform.translation.y,
+ transform.transform.translation.z);
+
+ Eigen::Quaterniond R2(
+ pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z);
+ Eigen::Vector3d t2(pose.position.x, pose.position.y, pose.position.z);
+
+ Eigen::Quaterniond R = R2 * R1;
+ Eigen::Vector3d t = R2._transformVector(t1) + t2;
+
+ geometry_msgs::msg::Pose result_pose;
+ result_pose.orientation.x = R.x();
+ result_pose.orientation.y = R.y();
+ result_pose.orientation.z = R.z();
+ result_pose.orientation.w = R.w();
+ result_pose.position.x = t.x();
+ result_pose.position.y = t.y();
+ result_pose.position.z = t.z();
+
+ return result_pose;
+}
+
+NerfBasedLocalizer::NerfBasedLocalizer(
+ const std::string & name_space, const rclcpp::NodeOptions & options)
+: Node("nerf_based_localizer", name_space, options),
+ tf_buffer_(this->get_clock()),
+ tf_listener_(tf_buffer_),
+ tf2_broadcaster_(*this),
+ map_frame_("map"),
+ particle_num_(this->declare_parameter("particle_num")),
+ output_covariance_(this->declare_parameter("output_covariance")),
+ iteration_num_(this->declare_parameter("iteration_num")),
+ learning_rate_(this->declare_parameter("learning_rate")),
+ is_activated_(true),
+ optimization_mode_(this->declare_parameter("optimization_mode"))
+{
+ LocalizerParam param;
+ param.train_result_dir = this->declare_parameter("train_result_dir");
+ param.render_pixel_num = this->declare_parameter("render_pixel_num");
+ param.noise_position_x = this->declare_parameter("noise_position_x");
+ param.noise_position_y = this->declare_parameter("noise_position_y");
+ param.noise_position_z = this->declare_parameter("noise_position_z");
+ param.noise_rotation_x = this->declare_parameter("noise_rotation_x");
+ param.noise_rotation_y = this->declare_parameter("noise_rotation_y");
+ param.noise_rotation_z = this->declare_parameter("noise_rotation_z");
+ param.resize_factor = this->declare_parameter("resize_factor");
+ param.sample_num_per_ray = this->declare_parameter("sample_num_per_ray");
+ localizer_ = Localizer(param);
+
+ initial_pose_with_covariance_subscriber_ =
+ this->create_subscription(
+ "~/input/pose", 100,
+ std::bind(&NerfBasedLocalizer::callback_initial_pose, this, std::placeholders::_1));
+
+ image_subscriber_ = this->create_subscription(
+ "~/input/image", rclcpp::SensorDataQoS().keep_last(0),
+ std::bind(&NerfBasedLocalizer::callback_image, this, std::placeholders::_1));
+
+ // create publishers
+ nerf_pose_publisher_ =
+ this->create_publisher("~/output/pose", 10);
+ nerf_pose_with_covariance_publisher_ =
+ this->create_publisher(
+ "~/output/pose_with_covariance", 10);
+ nerf_score_publisher_ = this->create_publisher("~/output/score", 10);
+ nerf_image_publisher_ = this->create_publisher("~/output/image", 10);
+
+ service_ = this->create_service(
+ "~/service/optimize_pose",
+ std::bind(&NerfBasedLocalizer::service, this, std::placeholders::_1, std::placeholders::_2),
+ rclcpp::ServicesQoS().get_rmw_qos_profile());
+
+ service_trigger_node_ = this->create_service(
+ "~/service/trigger_node",
+ std::bind(
+ &NerfBasedLocalizer::service_trigger_node, this, std::placeholders::_1,
+ std::placeholders::_2),
+ rclcpp::ServicesQoS().get_rmw_qos_profile());
+
+ RCLCPP_DEBUG(this->get_logger(), "nerf_based_localizer is created.");
+}
+
+void NerfBasedLocalizer::callback_initial_pose(
+ const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr initial_pose_msg_ptr)
+{
+ // if rosbag restart, clear buffer
+ if (!initial_pose_msg_ptr_array_.empty()) {
+ const builtin_interfaces::msg::Time & t_front =
+ initial_pose_msg_ptr_array_.front()->header.stamp;
+ const builtin_interfaces::msg::Time & t_msg = initial_pose_msg_ptr->header.stamp;
+ if (t_front.sec > t_msg.sec || (t_front.sec == t_msg.sec && t_front.nanosec > t_msg.nanosec)) {
+ initial_pose_msg_ptr_array_.clear();
+ }
+ }
+
+ if (initial_pose_msg_ptr->header.frame_id == map_frame_) {
+ initial_pose_msg_ptr_array_.push_back(initial_pose_msg_ptr);
+ if (initial_pose_msg_ptr_array_.size() > 1) {
+ initial_pose_msg_ptr_array_.pop_front();
+ }
+ } else {
+ RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not in map frame.");
+ std::exit(1);
+ }
+}
+
+void NerfBasedLocalizer::callback_image(const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr)
+{
+ target_frame_ = image_msg_ptr->header.frame_id;
+ image_msg_ptr_array_.push_back(image_msg_ptr);
+ if (image_msg_ptr_array_.size() > 1) {
+ image_msg_ptr_array_.pop_front();
+ }
+
+ if (!is_activated_) {
+ RCLCPP_ERROR(this->get_logger(), "NerfBasedLocalizer is not activated in callback_image.");
+ return;
+ }
+
+ if (initial_pose_msg_ptr_array_.empty()) {
+ RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not received.");
+ return;
+ }
+
+ const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr pose_base_link =
+ initial_pose_msg_ptr_array_.back();
+ initial_pose_msg_ptr_array_.pop_back();
+
+ // Process
+ const auto [pose_msg, image_msg, score_msg] = localize(pose_base_link->pose.pose, *image_msg_ptr);
+
+ // (1) publish nerf_pose
+ geometry_msgs::msg::PoseStamped pose_stamped_msg;
+ pose_stamped_msg.header.frame_id = map_frame_;
+ pose_stamped_msg.header.stamp = image_msg_ptr->header.stamp;
+ pose_stamped_msg.pose = pose_msg;
+ nerf_pose_publisher_->publish(pose_stamped_msg);
+
+ // (2) publish nerf_pose_with_covariance
+ geometry_msgs::msg::PoseWithCovarianceStamped pose_with_cov_msg;
+ pose_with_cov_msg.header.frame_id = map_frame_;
+ pose_with_cov_msg.header.stamp = image_msg_ptr->header.stamp;
+ pose_with_cov_msg.pose.pose = pose_msg;
+ pose_with_cov_msg.pose.covariance[0] = output_covariance_;
+ pose_with_cov_msg.pose.covariance[7] = output_covariance_;
+ pose_with_cov_msg.pose.covariance[14] = output_covariance_;
+ pose_with_cov_msg.pose.covariance[21] = output_covariance_ * 10;
+ pose_with_cov_msg.pose.covariance[28] = output_covariance_ * 10;
+ pose_with_cov_msg.pose.covariance[35] = output_covariance_ * 10;
+ nerf_pose_with_covariance_publisher_->publish(pose_with_cov_msg);
+
+ // (3) publish score
+ nerf_score_publisher_->publish(score_msg);
+
+ // (4) publish image
+ nerf_image_publisher_->publish(image_msg);
+}
+
+void NerfBasedLocalizer::service(
+ const tier4_localization_msgs::srv::PoseWithCovarianceStamped::Request::SharedPtr req,
+ tier4_localization_msgs::srv::PoseWithCovarianceStamped::Response::SharedPtr res)
+{
+ RCLCPP_DEBUG(this->get_logger(), "start NerfBasedLocalizer::service");
+
+ if (image_msg_ptr_array_.empty()) {
+ RCLCPP_ERROR(this->get_logger(), "image is not received.");
+ res->success = false;
+ return;
+ }
+
+ // Get the oldest image
+ const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr = image_msg_ptr_array_.back();
+
+ // Process
+ const auto [pose_msg, image_msg, score_msg] =
+ localize(req->pose_with_covariance.pose.pose, *image_msg_ptr);
+
+ res->success = true;
+ res->pose_with_covariance.header.frame_id = map_frame_;
+ res->pose_with_covariance.header.stamp = image_msg_ptr->header.stamp;
+ res->pose_with_covariance.pose.pose = pose_msg;
+ res->pose_with_covariance.pose.covariance = req->pose_with_covariance.pose.covariance;
+
+ RCLCPP_DEBUG(this->get_logger(), "finish NerfBasedLocalizer::service");
+}
+
+std::tuple
+NerfBasedLocalizer::localize(
+ const geometry_msgs::msg::Pose & pose_msg, const sensor_msgs::msg::Image & image_msg)
+{
+ Timer timer;
+ timer.start();
+
+ // Get data of image_ptr
+ // Accessing header information
+ const std_msgs::msg::Header header = image_msg.header;
+
+ // Accessing image properties
+ const uint32_t width = image_msg.width;
+ const uint32_t height = image_msg.height;
+
+ RCLCPP_DEBUG_STREAM(
+ this->get_logger(), "Image received. width: " << width << ", height: " << height);
+
+ // Accessing image data
+ torch::Tensor image_tensor = torch::tensor(image_msg.data);
+ image_tensor = image_tensor.view({height, width, 3});
+ image_tensor = image_tensor.index({Slc(0, 850)});
+ image_tensor = image_tensor.to(torch::kFloat32);
+ image_tensor /= 255.0;
+ image_tensor = image_tensor.flip(2); // BGR to RGB
+ image_tensor =
+ utils::resize_image(image_tensor, localizer_.infer_height(), localizer_.infer_width());
+ image_tensor = image_tensor.to(torch::kCUDA);
+
+ geometry_msgs::msg::PoseWithCovarianceStamped pose_camera;
+ try {
+ geometry_msgs::msg::TransformStamped transform =
+ tf_buffer_.lookupTransform("base_link", target_frame_, tf2::TimePointZero);
+ pose_camera.pose.pose = transform_pose(pose_msg, transform);
+ } catch (tf2::TransformException & ex) {
+ RCLCPP_WARN(this->get_logger(), "%s", ex.what());
+ }
+
+ const geometry_msgs::msg::Pose pose = pose_camera.pose.pose;
+
+ Eigen::Quaternionf quat_in(
+ pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z);
+ Eigen::Matrix3f rot_in = quat_in.toRotationMatrix();
+
+ torch::Tensor initial_pose = torch::eye(4);
+ initial_pose[0][0] = rot_in(0, 0);
+ initial_pose[0][1] = rot_in(0, 1);
+ initial_pose[0][2] = rot_in(0, 2);
+ initial_pose[0][3] = pose.position.x;
+ initial_pose[1][0] = rot_in(1, 0);
+ initial_pose[1][1] = rot_in(1, 1);
+ initial_pose[1][2] = rot_in(1, 2);
+ initial_pose[1][3] = pose.position.y;
+ initial_pose[2][0] = rot_in(2, 0);
+ initial_pose[2][1] = rot_in(2, 1);
+ initial_pose[2][2] = rot_in(2, 2);
+ initial_pose[2][3] = pose.position.z;
+ initial_pose = initial_pose.to(torch::kCUDA);
+ initial_pose = initial_pose.to(torch::kFloat32);
+ RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_before:\n" << initial_pose);
+
+ initial_pose = localizer_.camera2nerf(initial_pose);
+
+ // run NeRF
+ torch::Tensor optimized_pose;
+ std::vector particles;
+
+ if (optimization_mode_ == 0) {
+ const float noise_coeff = 1.0f;
+ particles = localizer_.optimize_pose_by_random_search(
+ initial_pose, image_tensor, particle_num_, noise_coeff);
+ optimized_pose = Localizer::calc_average_pose(particles);
+ } else {
+ std::vector optimized_poses = localizer_.optimize_pose_by_differential(
+ initial_pose, image_tensor, iteration_num_, learning_rate_);
+ optimized_pose = optimized_poses.back();
+ }
+
+ torch::Tensor nerf_image = localizer_.render_image(optimized_pose);
+ const float score = utils::calc_loss(nerf_image, image_tensor);
+
+ RCLCPP_DEBUG_STREAM(this->get_logger(), "score = " << score);
+
+ optimized_pose = localizer_.nerf2camera(optimized_pose);
+
+ RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_after:\n" << optimized_pose);
+
+ geometry_msgs::msg::Pose result_pose_camera;
+ result_pose_camera.position.x = optimized_pose[0][3].item();
+ result_pose_camera.position.y = optimized_pose[1][3].item();
+ result_pose_camera.position.z = optimized_pose[2][3].item();
+ Eigen::Matrix3f rot_out;
+ rot_out << optimized_pose[0][0].item(), optimized_pose[0][1].item(),
+ optimized_pose[0][2].item(), optimized_pose[1][0].item(),
+ optimized_pose[1][1].item(), optimized_pose[1][2].item(),
+ optimized_pose[2][0].item(), optimized_pose[2][1].item(),
+ optimized_pose[2][2].item();
+ Eigen::Quaternionf quat_out(rot_out);
+ result_pose_camera.orientation.x = quat_out.x();
+ result_pose_camera.orientation.y = quat_out.y();
+ result_pose_camera.orientation.z = quat_out.z();
+ result_pose_camera.orientation.w = quat_out.w();
+
+ geometry_msgs::msg::Pose result_pose_base_link;
+ try {
+ geometry_msgs::msg::TransformStamped transform =
+ tf_buffer_.lookupTransform(target_frame_, "base_link", tf2::TimePointZero);
+ result_pose_base_link = transform_pose(result_pose_camera, transform);
+ } catch (tf2::TransformException & ex) {
+ RCLCPP_WARN(this->get_logger(), "%s", ex.what());
+ }
+
+ nerf_image = nerf_image * 255;
+ nerf_image = nerf_image.to(torch::kUInt8);
+ nerf_image = nerf_image.to(torch::kCPU);
+ nerf_image = nerf_image.contiguous();
+ sensor_msgs::msg::Image nerf_image_msg;
+ nerf_image_msg.header = header;
+ nerf_image_msg.width = nerf_image.size(1);
+ nerf_image_msg.height = nerf_image.size(0);
+ nerf_image_msg.step = nerf_image.size(1) * 3;
+ nerf_image_msg.encoding = "rgb8";
+ nerf_image_msg.data.resize(nerf_image.numel());
+ std::copy(
+ nerf_image.data_ptr(), nerf_image.data_ptr