diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml
new file mode 100644
index 0000000000000..a557b8b47db10
--- /dev/null
+++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/nerf_based_localizer.launch.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<launch>
+  <group>
+    <push-ros-namespace namespace="nerf_based_localizer"/>
+    <group>
+      <include file="$(find-pkg-share nerf_based_localizer)/launch/nerf_based_localizer.launch.xml">
+        <arg name="input_image" value="/sensing/camera/traffic_light/image_raw"/>
+        <arg name="input_pose" value="/localization/pose_twist_fusion_filter/biased_pose_with_covariance"/>
+        <arg name="output_image" value="/localization/pose_estimator/nerf_image"/>
+        <arg name="output_pose_with_covariance" value="/localization/pose_estimator/pose_with_covariance"/>
+        <arg name="param_file" value="$(find-pkg-share nerf_based_localizer)/config/nerf_based_localizer.param.yaml"/>
+        <arg name="service_optimize_pose" value="/localization/pose_estimator/nerf_align_srv"/>
+      </include>
+    </group>
+  </group>
+</launch>
diff --git a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
index fa6bce0e38e55..05d0d1beae8e1 100644
--- a/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
+++ b/launch/tier4_localization_launch/launch/pose_twist_estimator/pose_twist_estimator.launch.xml
@@ -9,7 +9,7 @@
   <arg name="gnss_enabled" default="true" description="gnss availability for initial position estimation"/>
 
   <!-- split string with underscores -->
-  <let name="available_args" value="[\'ndt\',\'yabloc\',\'eagleye\',\'artag\']"/>
+  <let name="available_args" value="[\'ndt\',\'yabloc\',\'eagleye\',\'artag\',\'nerf\']"/>
   <let name="split_function" value="list(set('$(var pose_source)'.split('_')).intersection($(var available_args)))"/>
   <let name="pose_sources" value="$(eval $(var split_function))"/>
   <let name="multi_localizer_mode" value="$(eval &quot;len($(var pose_sources))> 1&quot;)"/>
@@ -18,6 +18,7 @@
   <let name="use_ndt_pose" value="$(eval &quot;'ndt' in $(var pose_sources)&quot;)"/>
   <let name="use_yabloc_pose" value="$(eval &quot;'yabloc' in $(var pose_sources)&quot;)"/>
   <let name="use_artag_pose" value="$(eval &quot;'artag' in $(var pose_sources)&quot;)"/>
+  <let name="use_nerf_pose" value="$(eval &quot;'nerf' in $(var pose_sources)&quot;)"/>
   <let name="use_eagleye_pose" value="$(eval &quot;'eagleye' in $(var pose_sources)&quot;)"/>
   <let name="use_eagleye_twist" value="$(eval &quot;'eagleye' == '$(var twist_source)'&quot;)"/>
   <let name="use_gyro_odom_twist" value="$(eval &quot;'gyro_odom' == '$(var twist_source)'&quot;)"/>
@@ -91,6 +92,12 @@
     </include>
   </group>
 
+  <!-- NeRF Based Localizer (as pose estimator) -->
+  <group if="$(var use_nerf_pose)">
+    <push-ros-namespace namespace="pose_estimator"/>
+    <include file="$(find-pkg-share tier4_localization_launch)/launch/pose_twist_estimator/nerf_based_localizer.launch.xml"/>
+  </group>
+
   <!-- Pose Estimator Arbiter Launch -->
   <group if="$(var multi_localizer_mode)">
     <include file="$(find-pkg-share pose_estimator_arbiter)/launch/pose_estimator_arbiter.launch.xml">
@@ -116,6 +123,7 @@
       <arg name="yabloc_enabled" value="$(var use_yabloc_pose)"/>
       <arg name="gnss_enabled" value="$(var gnss_enabled)"/>
       <arg name="ekf_enabled" value="true"/>
+      <arg name="nerf_enabled" value="$(var use_nerf_pose)"/>
       <arg name="stop_check_enabled" value="$(var stop_check_enabled)"/>
       <arg name="config_file" value="$(var pose_initializer_param_path)"/>
       <arg name="sub_gnss_pose_cov" value="$(var sub_gnss_pose_cov)"/>
diff --git a/launch/tier4_localization_launch/package.xml b/launch/tier4_localization_launch/package.xml
index b3dc75bbf79cc..69c951db79b58 100644
--- a/launch/tier4_localization_launch/package.xml
+++ b/launch/tier4_localization_launch/package.xml
@@ -27,6 +27,7 @@
   <exec_depend>geo_pose_projector</exec_depend>
   <exec_depend>gyro_odometer</exec_depend>
   <exec_depend>ndt_scan_matcher</exec_depend>
+  <exec_depend>nerf_based_localizer</exec_depend>
   <exec_depend>pose_estimator_arbiter</exec_depend>
   <exec_depend>pose_initializer</exec_depend>
   <exec_depend>pose_instability_detector</exec_depend>
diff --git a/localization/nerf_based_localizer/CMakeLists.txt b/localization/nerf_based_localizer/CMakeLists.txt
new file mode 100644
index 0000000000000..5cabbb500575f
--- /dev/null
+++ b/localization/nerf_based_localizer/CMakeLists.txt
@@ -0,0 +1,51 @@
+cmake_minimum_required(VERSION 3.14)
+project(nerf_based_localizer)
+
+find_package(ament_cmake_auto REQUIRED)
+ament_auto_find_build_dependencies()
+
+find_package(CUDA REQUIRED)
+find_package(CUDAToolkit REQUIRED)
+
+if(NOT ${CUDA_FOUND})
+  message(WARNING "cuda is not found, so the nerf_based_localizer package won't be built.")
+  return()
+endif()
+
+# libtorch
+set(CMAKE_PREFIX_PATH ${DCMAKE_PREFIX_PATH}$ ${CMAKE_SOURCE_DIR}/external/libtorch/)
+find_package(Torch PATHS ${CMAKE_SOURCE_DIR}/external/libtorch NO_DEFAULT_PATH)
+
+# skip if libtorch is not found
+if(NOT Torch_FOUND)
+  message(WARNING "libtorch is not found, so the nerf_based_localizer package won't be built.")
+  return()
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -w")
+link_directories(${CMAKE_SOURCE_DIR}/external/libtorch/lib/)
+include_directories(${TORCH_INCLUDE_DIRS})
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+link_directories(${OpenCV_LIBRARIES})
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+file(GLOB_RECURSE SRC_ALL
+  src/*.cpp
+  src/*.cu)
+
+ament_auto_add_executable(nerf_based_localizer ${SRC_ALL})
+ament_export_dependencies(CUDA)
+target_link_libraries(nerf_based_localizer
+  ${TORCH_LIBRARIES}
+  ${TORCH_CUDA_LIBRARIES}
+  ${OpenCV_LIBS}
+  stdc++fs
+)
+
+ament_auto_package(
+  INSTALL_TO_SHARE
+  launch
+  config
+)
diff --git a/localization/nerf_based_localizer/ORIGINAL_LICENSE b/localization/nerf_based_localizer/ORIGINAL_LICENSE
new file mode 100644
index 0000000000000..f2473c3b78569
--- /dev/null
+++ b/localization/nerf_based_localizer/ORIGINAL_LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [2023] [Peng Wang]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/localization/nerf_based_localizer/README.md b/localization/nerf_based_localizer/README.md
new file mode 100644
index 0000000000000..1d47df61ffc92
--- /dev/null
+++ b/localization/nerf_based_localizer/README.md
@@ -0,0 +1,131 @@
+# NeRF Based Localizer
+
+NeRFBasedLocalizer is a vision-based localization package.
+
+![example_of_result](./doc_image/example_of_result.png)
+
+## Node diagram
+
+![node diagram](./doc_image/node_diagram.drawio.svg)
+
+## Inputs / Outputs
+
+### Input
+
+| Name            | Type                                            | Description                      |
+| :-------------- | :---------------------------------------------- | :------------------------------- |
+| `~/input/pose`  | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction. |
+| `~/input/image` | `sensor_msgs::msg::Image`                       | Camera Image                     |
+
+### Output
+
+| Name                            | Type                                            | Description                    |
+| :------------------------------ | :---------------------------------------------- | :----------------------------- |
+| `~/output/pose`                 | `geometry_msgs::msg::PoseStamped`               | estimated pose                 |
+| `~/output/pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | estimated pose with covariance |
+| `~/output/score`                | `std_msgs::msg::Float32`                        | estimated score of nerf        |
+| `~/output/image`                | `sensor_msgs::msg::Image`                       | estimated image of nerf        |
+
+## How to build
+
+Download libtorch and extract it to the `nerf_based_localizer/external` directory.
+
+For example,
+
+```bash
+cd nerf_based_localizer/external
+
+# v1.13.1
+wget https://download.pytorch.org/libtorch/cu117/libtorch-cxx11-abi-shared-with-deps-1.13.1%2Bcu117.zip
+unzip ./libtorch-cxx11-abi-shared-with-deps-1.13.1+cu117.zip
+
+# v2.1.2
+wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcu121.zip
+unzip ./libtorch-cxx11-abi-shared-with-deps-2.1.2+cu121.zip
+```
+
+If libtorch is prepared, `nerf_based_localizer` can be built as a normal package.
+Otherwise, building `nerf_based_localizer` will be skipped.
+
+## How to launch
+
+Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`.
+
+When launching Autoware, set `nerf` for `pose_source`.
+
+```bash
+ros2 launch autoware_launch ... \
+    pose_source:=nerf \
+    ...
+```
+
+For example, to run `logging_simulator`, the command is as follows.
+
+```bash
+ros2 launch autoware_launch logging_simulator.launch.xml \
+    map_path:=/path/to/map \
+    pose_source:=nerf \
+    vehicle_model:=sample_vehicle \
+    sensor_model:=awsim_sensor_kit \
+    perception:=false \
+    planning:=false \
+    control:=false
+```
+
+[This trained weights](https://drive.google.com/file/d/1w4hLw7aJ_o6OM8XCCXyNPZTGIy4ah9aZ/view?usp=sharing) and [this rosbag data](https://drive.google.com/file/d/1uMVwQQFcfs8JOqfoA1FqfH_fLPwQ71jK/view) can be used as sample data.
+
+## How to train
+
+### Prepare training data
+
+Use `prepare_data.py`.
+
+```bash
+python3 prepare_data.py /path/to/rosbag /path/to/prepared_data/
+```
+
+The rosbag must contain the following topics.
+
+| Topic name                                                           | Message type                                    | Description                     |
+| :------------------------------------------------------------------- | :---------------------------------------------- | :------------------------------ |
+| `/tf_static`                                                         | `tf2_msgs::msg::TFMessage`                      | tf_static                       |
+| `/localization/pose_twist_fusion_filter/biased_pose_with_covariance` | `geometry_msgs::msg::PoseWithCovarianceStamped` | EKF Pose without IMU correction |
+| `/sensing/camera/traffic_light/image_raw`                            | `sensor_msgs::msg::Image`                       | Camera Image                    |
+| `/sensing/camera/traffic_light/camera_info`                          | `sensor_msgs::msg::CameraInfo`                  | Camera Info                     |
+
+For example, the following rosbag obtained from AWSIM can be used.
+
+<https://drive.google.com/file/d/1JM8SgrqxZevyS9thnNMkMx8u8JF2oKhu/view?usp=drive_link>
+
+### Execute training
+
+Use training_tool.
+
+```bash
+cd nerf_based_localizer/training_tool/script
+./build_and_exec_training.sh /path/to/result_dir/ /path/to/prepared_data/
+```
+
+Set the train result directory to the parameter `train_result_dir` in the `nerf_based_localizer/config/nerf_based_localizer.param.yaml`.
+
+## Principle
+
+[NeRF](https://www.matthewtancik.com/nerf), standing for Neural Radiance Fields, presents a novel approach to synthesize novel views of a scene by leveraging the power of neural networks. It was introduced with an aim to handle the challenges of view synthesis, which includes creating novel, previously unseen views of a 3D scene given a sparse set of input photographs.
+
+Training Phase: The model is trained with a set of 2D images of a 3D scene and their corresponding camera parameters (position and orientation). The neural network learns to predict the color and transparency of rays cast through the scene, effectively learning a representation of the 3D scene. The objective is to minimize the difference between the rendered images and the input images.
+
+Inference Phase: After training, NeRF synthesizes novel views of the scene by sampling and summing the colors of volumetric points along the rays cast from a new camera viewpoint. The synthesized images exhibit high-quality view synthesis even under significant viewpoint changes.
+
+### Application in Localization
+
+Implementing NeRF for localization involves utilizing the learned 3D scene representation to estimate the position and orientation of a camera (or observer) in the scene. By comparing the synthesized views and the actual camera view, the algorithm iteratively refines the estimated camera parameters to minimize the difference between the rendered and actual views.
+
+This approach unlocks the potential to achieve accurate and robust self-localization in various environments, allowing devices and robots to comprehend their position and orientation within a previously learned 3D space.
+
+## Acknowledgement
+
+The code for this package is based on [F2-NeRF](https://github.com/Totoro97/f2-nerf) with significant code changes.
+
+The license of F2-NeRF is Apache License 2.0: <https://github.com/Totoro97/f2-nerf/blob/main/LICENSE>.
+
+The license file is also copied to this directory as [ORIGINAL_LICENSE](./ORIGINAL_LICENSE).
diff --git a/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml
new file mode 100644
index 0000000000000..eedf165cac55a
--- /dev/null
+++ b/localization/nerf_based_localizer/config/nerf_based_localizer.param.yaml
@@ -0,0 +1,23 @@
+/**:
+  ros__parameters:
+    train_result_dir: "/path/to/train_result_dir"
+    optimization_mode: 1  # 0: random_search, 1: differential
+
+    # if optimization_mode is 0, the following parameters are used
+    particle_num: 50
+    render_pixel_num: 128
+    noise_position_x: 0.10  # [m]
+    noise_position_y: 0.10  # [m]
+    noise_position_z: 0.05  # [m]
+    noise_rotation_x: 0.05  # [deg]
+    noise_rotation_y: 0.05  # [deg]
+    noise_rotation_z: 0.10  # [deg]
+
+    # if optimization_mode is 1, the following parameters are used
+    iteration_num: 1
+    learning_rate: 0.0001
+
+    # The following parameters are used in both optimization_mode 0 and 1
+    output_covariance: 0.000225
+    resize_factor: 20
+    sample_num_per_ray: 1024
diff --git a/localization/nerf_based_localizer/doc_image/example_of_result.png b/localization/nerf_based_localizer/doc_image/example_of_result.png
new file mode 100644
index 0000000000000..0a29719e937d1
Binary files /dev/null and b/localization/nerf_based_localizer/doc_image/example_of_result.png differ
diff --git a/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg
new file mode 100644
index 0000000000000..e10d00335b71d
--- /dev/null
+++ b/localization/nerf_based_localizer/doc_image/node_diagram.drawio.svg
@@ -0,0 +1,63 @@
+<svg
+  host="65bd71144e"
+  xmlns="http://www.w3.org/2000/svg"
+  xmlns:xlink="http://www.w3.org/1999/xlink"
+  version="1.1"
+  width="538px"
+  height="272px"
+  viewBox="-0.5 -0.5 538 272"
+  content="&lt;mxfile&gt;&lt;diagram id=&quot;z_GJx55S16-dNHi1224P&quot; name=&quot;ページ1&quot;&gt;zZjLcpswFIafxstmkAQYL2tip4veplk0XXkUEEYTzPEI+danrzAHc7VjJ05SFhnOrwvS90s6cgbMX2zvFF/G3yAUyYBa4XbAbgeUjphn/ubCrhBsmxTCXMmwkGrCvfwrULRQXclQZI2KGiDRctkUA0hTEeiGxpWCTbNaBEnzq0s+Fx3hPuBJV/0tQx0XqkeHlf5FyHlcfpm4o6JkwcvKOJMs5iFsahKbDJivAHTxttj6IsnZlVyKdtMjpYeBKZHqcxrQosGaJyucG45L78rJKlilocjrWwM2jiDVaAihGPuQgNpXZtb+MXp3IDi2tVBabGsSDuxOwEJotTNVsNQuIeEqYRhuKuQHLa7hdlHj6PL80HMFwrwgi34udofLd/FrapQxzwwL6pq5knyeX8EsC8NDPcdNJkmNk+9PnKkZyzjTCp5ErcQNPPEYvS1Zx26StbtkidVD1r4CWadD9oeOc3zW55WGjZlWje6SB09mM2WXwp1Ofb8Prkcfmeu+8bJlHweXDXv281RvZKZnG6njWQBrriRPA9FBKkJzxmEISscwh5Qnk0odGyZq94DM98GfPLgZOmV8u62X3u4wOkpVczUXKDHU8nGc5KxEwrVcN8/iPmrY9CdI89mDP9Rp+kM978ZpdpLBSgUC27XoHwZyliHlEdY0RC7yBHPKgBTSU8SvApwM3wc4K/d8CZxYbeDFsDrAO10Rt+kd8dw3c87rc24JmbhoJ5VGbqV+qKwzUW4kwffKxTzY1S3tMZ++2P2CTtP9F21BtOOTdUPMUzR67SpxW6tkePkqKStCFGXitQuA9J6lqVDR7DG/BMySV6b+YzmoeyXYP9fJTnTU2kHuAXM9QTk9Ccq5RoIifVDFU/RympjrP4Rm+yJlGL0nTbt7mcrTfTTLtNmKwTkHU8izeM+W/Hdpo3UgENIidr2z3naPrcqLz/tnbk7ksjzenzask2njrNOffaCxtJXE6ejcC1inK6d9lxt5Z6WM59eICatf4UX16l8ZbPIP&lt;/diagram&gt;&lt;/mxfile&gt;"
+>
+  <defs/>
+  <g>
+    <rect x="267" y="0" width="230" height="60" fill="rgb(255, 255, 255)" stroke="rgb(0, 0, 0)" pointer-events="all"/>
+    <rect x="387" y="10" width="100" height="40" fill="#cce5ff" stroke="#6c8ebf" pointer-events="all"/>
+    <g fill="#000000" font-family="Helvetica" text-anchor="middle" font-size="12px">
+      <text x="436.5" y="27.5">NeRF Based</text>
+      <text x="436.5" y="41.5">Localizer</text>
+    </g>
+    <rect x="277" y="10" width="100" height="40" fill="#ccffcc" stroke="#82b366" pointer-events="all"/>
+    <g fill="#000000" font-family="Helvetica" text-anchor="middle" font-size="12px">
+      <text x="326.5" y="27.5">Other Autoware</text>
+      <text x="326.5" y="41.5">packages</text>
+    </g>
+    <path d="M 97 258.5 L 232 258.5 Q 242 258.5 252 258.47 L 380.63 258.02" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/>
+    <path d="M 385.88 258 L 378.89 261.53 L 380.63 258.02 L 378.87 254.53 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/>
+    <g fill="rgb(0, 0, 0)" font-family="Helvetica" text-anchor="middle" font-size="11px">
+      <rect fill="rgb(255, 255, 255)" stroke="none" x="187" y="253" width="112" height="14" stroke-width="0"/>
+      <text x="241.5" y="262.5">/twist_with_covariance</text>
+    </g>
+    <path d="M 7 156 L 130.63 155.52" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/>
+    <path d="M 135.88 155.5 L 128.9 159.03 L 130.63 155.52 L 128.87 152.03 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/>
+    <g fill="rgb(0, 0, 0)" font-family="Helvetica" text-anchor="middle" font-size="11px">
+      <rect fill="rgb(255, 255, 255)" stroke="none" x="55" y="150" width="35" height="14" stroke-width="0"/>
+      <text x="71.5" y="159.75">/image</text>
+    </g>
+    <path d="M 212 180.5 L 380.9 231.17" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/>
+    <path d="M 385.93 232.68 L 378.22 234.02 L 380.9 231.17 L 380.23 227.31 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/>
+    <g fill="rgb(0, 0, 0)" font-family="Helvetica" text-anchor="middle" font-size="11px">
+      <rect fill="rgb(255, 255, 255)" stroke="none" x="233" y="198" width="114" height="14" stroke-width="0"/>
+      <text x="289.08" y="207.78">/pose_with_covariance</text>
+    </g>
+    <rect x="137" y="130.5" width="150" height="50" fill="#cce5ff" stroke="#666666" pointer-events="all"/>
+    <g fill="#000000" font-family="Helvetica" text-anchor="middle" font-size="12px">
+      <text x="211.5" y="160">/nerf_based_localizer</text>
+    </g>
+    <rect x="387" y="220.5" width="150" height="50" fill="#ccffcc" stroke="#666666" pointer-events="all"/>
+    <g fill="#000000" font-family="Helvetica" text-anchor="middle" font-size="12px">
+      <text x="461.5" y="250">/ekf_localizer</text>
+    </g>
+    <path d="M 212 80 L 212 124.13" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" stroke-dasharray="3 3" pointer-events="stroke"/>
+    <path d="M 212 129.38 L 208.5 122.38 L 212 124.13 L 215.5 122.38 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/>
+    <g fill="rgb(0, 0, 0)" font-family="Helvetica" text-anchor="middle" font-size="11px">
+      <rect fill="rgb(255, 255, 255)" stroke="none" x="191" y="99" width="43" height="14" stroke-width="0"/>
+      <text x="211.5" y="109">/tf_static</text>
+    </g>
+    <path d="M 462 220.5 L 462 165.5 Q 462 155.5 452 155.5 L 293.37 155.5" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/>
+    <path d="M 288.12 155.5 L 295.12 152 L 293.37 155.5 L 295.12 159 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/>
+    <g fill="rgb(0, 0, 0)" font-family="Helvetica" text-anchor="middle" font-size="11px">
+      <rect fill="rgb(255, 255, 255)" stroke="none" x="340" y="150" width="135" height="14" stroke-width="0"/>
+      <text x="406.5" y="159.5">/ekf_pose_with_covariance</text>
+    </g>
+  </g>
+</svg>
diff --git a/localization/nerf_based_localizer/external/.gitignore b/localization/nerf_based_localizer/external/.gitignore
new file mode 100644
index 0000000000000..d6b7ef32c8478
--- /dev/null
+++ b/localization/nerf_based_localizer/external/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml
new file mode 100644
index 0000000000000..77551a5bbe7b0
--- /dev/null
+++ b/localization/nerf_based_localizer/launch/nerf_based_localizer.launch.xml
@@ -0,0 +1,29 @@
+<launch>
+  <arg name="param_file" default="$(find-pkg-share nerf_based_localizer)/config/nerf_based_localizer.param.yaml"/>
+  <arg name="node_name" default="nerf_based_localizer"/>
+
+  <!-- Topics -->
+  <arg name="input_pose" default="~/input/pose"/>
+  <arg name="input_image" default="~/input/image"/>
+  <arg name="service_optimize_pose" default="~/service/optimize_pose"/>
+  <arg name="service_trigger_node" default="~/service/trigger_node"/>
+
+  <arg name="output_pose" default="~/output/pose"/>
+  <arg name="output_pose_with_covariance" default="~/output/pose_with_covariance"/>
+  <arg name="output_score" default="~/output/score"/>
+  <arg name="output_image" default="~/output/image"/>
+
+  <node pkg="nerf_based_localizer" exec="nerf_based_localizer" name="$(var node_name)" output="log" args="--ros-args --log-level INFO">
+    <remap from="~/input/pose" to="$(var input_pose)"/>
+    <remap from="~/input/image" to="$(var input_image)"/>
+    <remap from="~/service/optimize_pose" to="$(var service_optimize_pose)"/>
+    <remap from="~/service/trigger_node" to="$(var service_trigger_node)"/>
+
+    <remap from="~/output/pose" to="$(var output_pose)"/>
+    <remap from="~/output/pose_with_covariance" to="$(var output_pose_with_covariance)"/>
+    <remap from="~/output/score" to="$(var output_score)"/>
+    <remap from="~/output/image" to="$(var output_image)"/>
+
+    <param from="$(var param_file)"/>
+  </node>
+</launch>
diff --git a/localization/nerf_based_localizer/package.xml b/localization/nerf_based_localizer/package.xml
new file mode 100644
index 0000000000000..bf68bd690df8b
--- /dev/null
+++ b/localization/nerf_based_localizer/package.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>nerf_based_localizer</name>
+  <version>0.1.0</version>
+  <description>The nerf_based_localizer package</description>
+  <maintainer email="shintaro.sakoda@tier4.jp">Shintaro Sakoda</maintainer>
+  <license>Apache License 2.0</license>
+  <author email="shintaro.sakoda@tier4.jp">Shintaro Sakoda</author>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>autoware_cmake</buildtool_depend>
+
+  <depend>autoware_map_msgs</depend>
+  <depend>autoware_universe_utils</depend>
+  <depend>diagnostic_msgs</depend>
+  <depend>fmt</depend>
+  <depend>geometry_msgs</depend>
+  <depend>libpcl-all-dev</depend>
+  <depend>nav_msgs</depend>
+  <depend>rclcpp</depend>
+  <depend>sensor_msgs</depend>
+  <depend>std_msgs</depend>
+  <depend>std_srvs</depend>
+  <depend>tf2</depend>
+  <depend>tf2_eigen</depend>
+  <depend>tf2_geometry_msgs</depend>
+  <depend>tf2_ros</depend>
+  <depend>tf2_sensor_msgs</depend>
+  <depend>tier4_debug_msgs</depend>
+  <depend>tier4_localization_msgs</depend>
+  <depend>visualization_msgs</depend>
+
+  <test_depend>ament_cmake_cppcheck</test_depend>
+  <test_depend>ament_lint_auto</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/localization/nerf_based_localizer/src/main.cpp b/localization/nerf_based_localizer/src/main.cpp
new file mode 100644
index 0000000000000..ca9b12a47c0e5
--- /dev/null
+++ b/localization/nerf_based_localizer/src/main.cpp
@@ -0,0 +1,24 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "nerf_based_localizer.hpp"
+
+#include <rclcpp/rclcpp.hpp>
+
+int main(int argc, char * argv[])
+{
+  rclcpp::init(argc, argv);
+  std::shared_ptr<NerfBasedLocalizer> node = std::make_shared<NerfBasedLocalizer>();
+  rclcpp::spin(node);
+  rclcpp::shutdown();
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp
new file mode 100644
index 0000000000000..a856542b6be3d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cpp
@@ -0,0 +1,38 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cpp
+//
+// Created by ppwang on 2022/10/5.
+//
+
+#include "CustomOps.hpp"
+
+namespace torch::autograd
+{
+
+variable_list TruncExp::forward(AutogradContext * ctx, Tensor input)
+{
+  ctx->save_for_backward({input});
+  return {torch::exp(input)};
+}
+
+variable_list TruncExp::backward(AutogradContext * ctx, variable_list grad_output)
+{
+  Tensor x = ctx->get_saved_variables()[0];
+  return {grad_output[0] * torch::exp(x.clamp(-100.f, 5.f))};
+}
+
+}  // namespace torch::autograd
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu
new file mode 100644
index 0000000000000..87e6b246b0686
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.cu
@@ -0,0 +1,135 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.cu
+//
+// Created by ppwang on 2023/3/17.
+//
+
+#include "../common.hpp"
+#include "../common_cuda.hpp"
+#include "CustomOps.hpp"
+
+#define SCALE (16.f)
+
+using Tensor = torch::Tensor;
+
+__global__ void WeightVarLossForwardKernel(
+  int n_outs, float * weights, int * idx_start_end, float * out_vars)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  if (idx_start >= idx_end) {
+    out_vars[idx] = 0.f;
+    return;
+  }
+  float mean = 0.f;
+  float weight_sum = 1e-6f;
+  float len = SCALE;
+  for (int i = 0; i + idx_start < idx_end; i++) {
+    mean += weights[i + idx_start] * (float(i) / len);
+    weight_sum += weights[i + idx_start];
+  }
+  mean /= weight_sum;
+  float variance = 0.f;
+  for (int i = 0; i + idx_start < idx_end; i++) {
+    float bias = float(i) / len - mean;
+    variance += weights[i + idx_start] * bias * bias;
+  }
+  out_vars[idx] = variance;
+}
+
+__global__ void WeightVarLossBackwardKernel(
+  int n_outs, float * weights, int * idx_start_end, float * dl_dvars, float * dl_dw)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  if (idx_start >= idx_end) {
+    return;
+  }
+  float mean = 0.f;
+  float weight_sum = 1e-6f;
+  float len = SCALE;
+  for (int i = 0; i + idx_start < idx_end; i++) {
+    mean += weights[i + idx_start] * (float(i) / len);
+    weight_sum += weights[i + idx_start];
+  }
+  mean /= weight_sum;
+  float variance = 0.f;
+  float tmp = 0.f;
+  for (int i = 0; i + idx_start < idx_end; i++) {
+    float bias = float(i) / len - mean;
+    variance += weights[i + idx_start] * bias * bias;
+    tmp += weights[i + idx_start] * 2.f * bias;
+  }
+  for (int i = 0; i + idx_start < idx_end; i++) {
+    float bias = float(i) / len - mean;
+    float grad = (bias * bias + tmp * -(float(i) / len) / weight_sum);
+    dl_dw[i + idx_start] = dl_dvars[idx] * grad;
+  }
+}
+
+namespace torch::autograd
+{
+
+class WeightVarLoss : public Function<WeightVarLoss>
+{
+public:
+  static variable_list forward(AutogradContext * ctx, Tensor weights, Tensor idx_start_end)
+  {
+    CHECK(weights.is_contiguous());
+    CHECK(idx_start_end.is_contiguous());
+    int n_outs = idx_start_end.size(0);
+    Tensor out_vars = torch::empty({n_outs}, CUDAFloat);
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+    WeightVarLossForwardKernel<<<grid_dim, block_dim>>>(
+      n_outs, weights.data_ptr<float>(), idx_start_end.data_ptr<int>(), out_vars.data_ptr<float>());
+    ctx->save_for_backward({weights, idx_start_end});
+    return {out_vars};
+  }
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+  {
+    Tensor dl_dvar = grad_output[0].contiguous();
+    auto saved_tensors = ctx->get_saved_variables();
+    Tensor & weights = saved_tensors[0];
+    Tensor & idx_start_end = saved_tensors[1];
+
+    int n_outs = idx_start_end.size(0);
+    int n_all = weights.size(0);
+
+    Tensor dl_dw = torch::empty({n_all}, CUDAFloat);
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+
+    WeightVarLossBackwardKernel<<<grid_dim, block_dim>>>(
+      n_outs, weights.data_ptr<float>(), idx_start_end.data_ptr<int>(), dl_dvar.data_ptr<float>(),
+      dl_dw.data_ptr<float>());
+
+    return {dl_dw, Tensor()};
+  }
+};
+
+}  // namespace torch::autograd
+
+Tensor CustomOps::WeightVar(Tensor weights, Tensor idx_start_end)
+{
+  return torch::autograd::WeightVarLoss::apply(weights.contiguous(), idx_start_end.contiguous())[0];
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp
new file mode 100644
index 0000000000000..e62aa9fd84bba
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/CustomOps.hpp
@@ -0,0 +1,46 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/CustomOps.h
+//
+// Created by ppwang on 2022/10/5.
+//
+
+#ifndef NERF__CUSTOMOPS__CUSTOMOPS_HPP_
+#define NERF__CUSTOMOPS__CUSTOMOPS_HPP_
+
+#include <torch/torch.h>
+
+namespace torch::autograd
+{
+
+class TruncExp : public Function<TruncExp>
+{
+public:
+  static variable_list forward(AutogradContext * ctx, Tensor input);
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output);
+};
+
+}  // namespace torch::autograd
+
+namespace CustomOps
+{
+
+torch::Tensor WeightVar(torch::Tensor weights, torch::Tensor idx_start_end);
+
+}
+
+#endif  // NERF__CUSTOMOPS__CUSTOMOPS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp
new file mode 100644
index 0000000000000..8eaf748bc13ea
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cpp
@@ -0,0 +1,21 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cpp
+//
+// Created by ppwang on 2023/2/11.
+//
+
+#include "FlexOps.hpp"
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu
new file mode 100644
index 0000000000000..2aee27d4b74aa
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.cu
@@ -0,0 +1,245 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.cu
+
+#include "../common_cuda.hpp"
+#include "FlexOps.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void FlexSumForwardKernel(int n_outs, float * val, int * idx_start_end, float * sum)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  float out_val = 0.f;
+  for (int i = idx_start; i < idx_end; i++) {
+    out_val += val[i];
+  }
+  sum[idx] = out_val;
+}
+
+__global__ void FlexSumBackwardKernel(
+  int n_outs, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  float fill_val = dl_dsum[idx];
+  for (int i = idx_start; i < idx_end; i++) {
+    dl_dval[i] = fill_val;
+  }
+}
+
+__global__ void FlexSumVecForwardKernel(
+  int n_outs, int vec_size, float * val, int * idx_start_end, float * sum)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  for (int j = 0; j < vec_size; j++) {
+    float out_val = 0.f;
+    for (int i = idx_start; i < idx_end; i++) {
+      out_val += val[i * vec_size + j];
+    }
+    sum[idx * vec_size + j] = out_val;
+  }
+}
+
+__global__ void FlexSumVecBackwardKernel(
+  int n_outs, int vec_size, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  for (int j = 0; j < vec_size; j++) {
+    float fill_val = dl_dsum[idx * vec_size + j];
+    for (int i = idx_start; i < idx_end; i++) {
+      dl_dval[i * vec_size + j] = fill_val;
+    }
+  }
+}
+
+__global__ void FlexAccumulateSumForwardKernel(
+  int n_outs, bool include_this, float * val, int * idx_start_end, float * sum)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  float out_val = 0.f;
+  if (include_this) {
+    for (int i = idx_start; i < idx_end; i++) {
+      out_val += val[i];
+      sum[i] = out_val;
+    }
+  } else {
+    for (int i = idx_start; i < idx_end; i++) {
+      sum[i] = out_val;
+      out_val += val[i];
+    }
+  }
+}
+
+__global__ void FlexAccumulateSumBackwardKernel(
+  int n_outs, bool include_this, float * dl_dsum, int * idx_start_end, float * dl_dval)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_outs) return;
+  int idx_start = idx_start_end[idx * 2];
+  int idx_end = idx_start_end[idx * 2 + 1];
+  float wp = 0.f;
+  if (include_this) {
+    for (int i = idx_end - 1; i >= idx_start; i--) {
+      wp += dl_dsum[i];
+      dl_dval[i] = wp;
+    }
+  } else {
+    for (int i = idx_end - 1; i >= idx_start; i--) {
+      dl_dval[i] = wp;
+      wp += dl_dsum[i];
+    }
+  }
+}
+
+namespace torch::autograd
+{
+
+class FlexSum : public Function<FlexSum>
+{
+public:
+  static variable_list forward(AutogradContext * ctx, Tensor val, Tensor idx_start_end)
+  {
+    CHECK(val.is_contiguous());
+    CHECK(idx_start_end.is_contiguous());
+    int n_outs = idx_start_end.size(0);
+    Tensor sum;
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+
+    if (val.sizes().size() == 1) {
+      sum = torch::empty({n_outs}, CUDAFloat);
+      FlexSumForwardKernel<<<grid_dim, block_dim>>>(
+        n_outs, val.data_ptr<float>(), idx_start_end.data_ptr<int>(), sum.data_ptr<float>());
+    } else {
+      int vec_size = val.size(1);
+      sum = torch::empty({n_outs, vec_size}, CUDAFloat);
+      FlexSumVecForwardKernel<<<grid_dim, block_dim>>>(
+        n_outs, vec_size, val.data_ptr<float>(), idx_start_end.data_ptr<int>(),
+        sum.data_ptr<float>());
+    }
+    ctx->save_for_backward({val, idx_start_end});
+    return {sum};
+  }
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+  {
+    Tensor dl_dsum = grad_output[0].contiguous();
+    auto saved_tensors = ctx->get_saved_variables();
+    Tensor & val = saved_tensors[0];
+    Tensor & idx_start_end = saved_tensors[1];
+    int n_outs = idx_start_end.size(0);
+    int n_all = val.size(0);
+
+    Tensor dl_dval;
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+
+    if (val.sizes().size() == 1) {
+      dl_dval = torch::empty({n_all}, CUDAFloat);
+      FlexSumBackwardKernel<<<grid_dim, block_dim>>>(
+        n_outs, dl_dsum.data_ptr<float>(), idx_start_end.data_ptr<int>(),
+        dl_dval.data_ptr<float>());
+
+    } else {
+      int vec_size = val.size(1);
+      dl_dval = torch::empty({n_all, vec_size}, CUDAFloat);
+      FlexSumVecBackwardKernel<<<grid_dim, block_dim>>>(
+        n_outs, vec_size, dl_dsum.data_ptr<float>(), idx_start_end.data_ptr<int>(),
+        dl_dval.data_ptr<float>());
+    }
+    return {dl_dval, Tensor()};
+  }
+};
+
+class FlexAccumulateSum : public Function<FlexAccumulateSum>
+{
+public:
+  static variable_list forward(
+    AutogradContext * ctx, Tensor val, Tensor idx_start_end, torch::IValue include_this_ivalue)
+  {
+    CHECK(val.is_contiguous());
+    CHECK(idx_start_end.is_contiguous());
+    bool include_this = include_this_ivalue.toBool();
+    int n_all = val.size(0);
+    int n_outs = idx_start_end.size(0);
+    Tensor sum = torch::empty({n_all}, CUDAFloat);
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+
+    FlexAccumulateSumForwardKernel<<<grid_dim, block_dim>>>(
+      n_outs, include_this, val.data_ptr<float>(), idx_start_end.data_ptr<int>(),
+      sum.data_ptr<float>());
+
+    ctx->save_for_backward({val, idx_start_end});
+    ctx->saved_data["include_this"] = include_this_ivalue;
+    return {sum};
+  }
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+  {
+    Tensor dl_dsum = grad_output[0].contiguous();
+
+    auto saved_tensors = ctx->get_saved_variables();
+    bool include_this = ctx->saved_data["include_this"].toBool();
+    Tensor & val = saved_tensors[0];
+    Tensor & idx_start_end = saved_tensors[1];
+    int n_outs = idx_start_end.size(0);
+    int n_all = val.size(0);
+
+    Tensor dl_dval = torch::empty({n_all}, CUDAFloat);
+    dim3 grid_dim = LIN_GRID_DIM(n_outs);
+    dim3 block_dim = LIN_BLOCK_DIM;
+
+    FlexAccumulateSumBackwardKernel<<<grid_dim, block_dim>>>(
+      n_outs, include_this, dl_dsum.data_ptr<float>(), idx_start_end.data_ptr<int>(),
+      dl_dval.data_ptr<float>());
+
+    return {dl_dval, Tensor(), Tensor()};
+  }
+};
+
+}  // namespace torch::autograd
+
+namespace FlexOps
+{
+
+Tensor Sum(Tensor val, Tensor idx_start_end)
+{
+  return torch::autograd::FlexSum::apply(val.contiguous(), idx_start_end.contiguous())[0];
+}
+
+Tensor AccumulateSum(Tensor val, Tensor idx_start_end, bool include_this)
+{
+  return torch::autograd::FlexAccumulateSum::apply(
+    val.contiguous(), idx_start_end.contiguous(), torch::IValue(include_this))[0];
+}
+
+}  // namespace FlexOps
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp
new file mode 100644
index 0000000000000..d8171d06d7798
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/FlexOps.hpp
@@ -0,0 +1,36 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/FlexOps.h
+//
+// Created by ppwang on 2023/2/11.
+//
+
+#ifndef NERF__CUSTOMOPS__FLEXOPS_HPP_
+#define NERF__CUSTOMOPS__FLEXOPS_HPP_
+
+#include "../common.hpp"
+
+#include <torch/torch.h>
+
+namespace FlexOps
+{
+
+torch::Tensor Sum(torch::Tensor val, torch::Tensor idx_start_end);
+torch::Tensor AccumulateSum(torch::Tensor val, torch::Tensor idx_start_end, bool include_this);
+
+}  // namespace FlexOps
+
+#endif  // NERF__CUSTOMOPS__FLEXOPS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp
new file mode 100644
index 0000000000000..5af970d6b98f9
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cpp
@@ -0,0 +1,21 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cpp
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#include "Scatter.hpp"
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu
new file mode 100644
index 0000000000000..05eba608c858d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.cu
@@ -0,0 +1,149 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.cu
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#include "../common_cuda.hpp"
+#include "Scatter.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void ScatterAddFuncForward(
+  int n_all, int n_channels, float * emb, int * scatter_idx, float * to_add)
+{
+  int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  int c_idx = blockIdx.y;
+  if (pts_idx >= n_all || c_idx >= n_channels) return;
+  int emb_idx = scatter_idx[pts_idx];
+  to_add = to_add + pts_idx * n_channels + c_idx;
+  emb = emb + emb_idx * n_channels + c_idx;
+  to_add[0] += emb[0];
+}
+
+__global__ void ScatterAddFuncBackwardBlock(
+  int n_emb, int n_blocks, int n_all, int block_size, int n_channels, float * dl_demb_pool,
+  int * scatter_idx, float * dl_dsum)
+{
+  int block_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  int emb_idx = blockIdx.y;
+  if (emb_idx >= n_emb || block_idx >= n_blocks || block_idx * block_size >= n_all) return;
+  scatter_idx = scatter_idx + block_idx * block_size;
+  dl_dsum = dl_dsum + (block_idx * block_size) * n_channels;
+  dl_demb_pool = dl_demb_pool + emb_idx * n_blocks * n_channels + block_idx * n_channels;
+  int block_min = block_size;
+  if (block_idx * block_size + block_size > n_all) {
+    block_min = n_all - (block_idx * block_size);
+  }
+  for (int i = 0; i < block_min; i++) {
+    if (scatter_idx[i] == emb_idx) {
+      for (int c = 0; c < n_channels; c++) {
+        dl_demb_pool[c] += dl_dsum[i * n_channels + c];
+      }
+    }
+  }
+}
+
+namespace torch::autograd
+{
+
+class ScatterAddFunc : public Function<ScatterAddFunc>
+{
+public:
+  static variable_list forward(AutogradContext * ctx, Tensor emb, Tensor idx, Tensor to_add)
+  {
+    emb = emb.contiguous();
+    idx = idx.contiguous();
+    int n_all = idx.size(0);
+    int n_channels = emb.size(1);
+    CHECK(n_all == to_add.size(0));
+    CHECK(n_channels == to_add.size(1));
+
+    const unsigned thread_cap = 512;
+    dim3 grid_dim = {unsigned(n_all + thread_cap - 1) / thread_cap, unsigned(n_channels), 1};
+    dim3 block_dim = {unsigned(thread_cap), 1, 1};
+
+    Tensor sum = to_add.clone().contiguous();
+    ScatterAddFuncForward<<<grid_dim, block_dim>>>(
+      n_all, n_channels, emb.data_ptr<float>(), idx.data_ptr<int>(), sum.data_ptr<float>());
+    ctx->save_for_backward({emb, idx});
+    return {sum};
+  }
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output)
+  {
+    Tensor dl_dsum = grad_output[0].contiguous();
+    auto saved_tensors = ctx->get_saved_variables();
+    Tensor & emb = saved_tensors[0];
+    Tensor & idx = saved_tensors[1];
+
+    int n_all = idx.size(0);
+    int n_channels = dl_dsum.size(1);
+
+    int n_emb = emb.size(0);
+    int block_size = (int(std::sqrt(n_all + 1024)) >> 5) << 5;
+    int n_blocks = (n_all + block_size - 1) / block_size;
+
+    const unsigned thread_cap = 512;
+    dim3 grid_dim = {unsigned(n_blocks + thread_cap - 1) / thread_cap, unsigned(n_emb), 1};
+    dim3 block_dim = {unsigned(thread_cap), 1, 1};
+
+    Tensor dl_demb_pool = torch::zeros({n_emb, n_blocks, n_channels}, CUDAFloat);
+    ScatterAddFuncBackwardBlock<<<grid_dim, block_dim>>>(
+      n_emb, n_blocks, n_all, block_size, n_channels, dl_demb_pool.data_ptr<float>(),
+      idx.data_ptr<int>(), dl_dsum.data_ptr<float>());
+
+    Tensor dl_demb = torch::sum(dl_demb_pool, 1, false);
+    Tensor dl_dto_add = dl_dsum.clone();
+
+    return {dl_demb, Tensor(), dl_dto_add};
+  }
+};
+
+}  // namespace torch::autograd
+
+Tensor CustomOps::ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add)
+{
+  return torch::autograd::ScatterAddFunc::apply(emb, idx, to_add)[0];
+}
+
+__global__ void ScatterIdxKernal(int n_rays, int * idx_start_end, int * emb_idx, int * all_emb_idx)
+{
+  int ray_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (ray_idx >= n_rays) return;
+  int idx_start = idx_start_end[ray_idx * 2];
+  int idx_end = idx_start_end[ray_idx * 2 + 1];
+
+  int fill = emb_idx[ray_idx];
+  for (int i = idx_start; i < idx_end; i++) {
+    all_emb_idx[i] = fill;
+  }
+}
+
+Tensor CustomOps::ScatterIdx(int n_all_pts, Tensor idx_start_end, Tensor emb_idx)
+{
+  Tensor ret =
+    torch::empty({n_all_pts}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA));
+  int n_rays = idx_start_end.size(0);
+  dim3 grid_dim = LIN_GRID_DIM(n_rays);
+  dim3 block_dim = LIN_BLOCK_DIM;
+
+  ScatterIdxKernal<<<grid_dim, block_dim>>>(
+    n_rays, idx_start_end.data_ptr<int>(), emb_idx.data_ptr<int>(), ret.data_ptr<int>());
+
+  return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp
new file mode 100644
index 0000000000000..fafc79b616db9
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/CustomOps/Scatter.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/CustomOps/Scatter.h
+//
+// Created by ppwang on 2023/3/27.
+//
+
+#ifndef NERF__CUSTOMOPS__SCATTER_HPP_
+#define NERF__CUSTOMOPS__SCATTER_HPP_
+
+#include "../common.hpp"
+
+#include <torch/torch.h>
+
+class Scatter
+{
+};
+
+namespace CustomOps
+{
+
+torch::Tensor ScatterAdd(torch::Tensor emb, torch::Tensor idx, torch::Tensor to_add);
+torch::Tensor ScatterIdx(int n_all_pts, torch::Tensor idx_start_end, torch::Tensor emb_idx);
+}  // namespace CustomOps
+
+#endif  // NERF__CUSTOMOPS__SCATTER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/common.hpp b/localization/nerf_based_localizer/src/nerf/common.hpp
new file mode 100644
index 0000000000000..107be3081dac5
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/common.hpp
@@ -0,0 +1,29 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h
+//
+// Created by ppwang on 2022/5/8.
+//
+
+#ifndef NERF__COMMON_HPP_
+#define NERF__COMMON_HPP_
+
+#include <torch/torch.h>
+
+using Slc = torch::indexing::Slice;
+const auto CUDAFloat = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA);
+
+#endif  // NERF__COMMON_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/common_cuda.hpp b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp
new file mode 100644
index 0000000000000..c80d4c458c39e
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/common_cuda.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Common.h
+//
+// Created by ppwang on 2022/5/8.
+//
+
+#ifndef NERF__COMMON_CUDA_HPP_
+#define NERF__COMMON_CUDA_HPP_
+
+#include <cuda_runtime.h>
+
+inline unsigned int DivUp(const unsigned int x, const unsigned int y)
+{
+  return (x + y - 1) / y;
+}
+
+constexpr unsigned int THREAD_CAP = 512;
+constexpr dim3 LIN_BLOCK_DIM = {THREAD_CAP, 1, 1};
+
+inline dim3 LIN_GRID_DIM(const int x)
+{
+  return dim3{DivUp(x, THREAD_CAP), 1, 1};
+}
+
+#endif  // NERF__COMMON_CUDA_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/dataset.cpp b/localization/nerf_based_localizer/src/nerf/dataset.cpp
new file mode 100644
index 0000000000000..267426ec3d6c1
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/dataset.cpp
@@ -0,0 +1,195 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.cpp
+//
+// Created by ppwang on 2022/5/7.
+//
+#include "dataset.hpp"
+
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+#include <experimental/filesystem>
+
+#include <fmt/core.h>
+#include <glob.h>
+
+#include <fstream>
+#include <iostream>
+
+using Tensor = torch::Tensor;
+
+namespace fs = std::experimental::filesystem::v1;
+
+Dataset::Dataset(const std::string & data_path)
+{
+  ScopeWatch dataset_watch("Dataset::Dataset");
+
+  std::cout << "data_path = " << data_path << std::endl;
+
+  // Load camera pose
+  CHECK(fs::exists(data_path + "/cams_meta.tsv"));
+  {
+    std::ifstream ifs(data_path + "/cams_meta.tsv");
+    std::string line;
+    std::getline(ifs, line);  // header
+    std::vector<Tensor> poses_vec, intrinsics_vec, dist_params_vec;
+    while (std::getline(ifs, line)) {
+      std::istringstream iss(line);
+      std::vector<std::string> tokens;
+      std::string token;
+      while (std::getline(iss, token, '\t')) {
+        tokens.push_back(token);
+      }
+      const int POSE_NUM = 12;       //(3, 4)
+      const int INTRINSIC_NUM = 9;   //(3, 3)
+      const int DISTORTION_NUM = 4;  //(k1, k2, p1, p2)
+      CHECK(tokens.size() == POSE_NUM + INTRINSIC_NUM + DISTORTION_NUM);
+      Tensor pose = torch::zeros({3, 4}, torch::kFloat32);
+      for (int i = 0; i < POSE_NUM; i++) {
+        pose.index_put_({i / 4, i % 4}, std::stof(tokens[i]));
+      }
+      pose = pose.reshape({3, 4});
+      poses_vec.push_back(pose);
+
+      Tensor intrinsic = torch::zeros({3, 3}, torch::kFloat32);
+      for (int i = 0; i < INTRINSIC_NUM; i++) {
+        intrinsic.index_put_({i / 3, i % 3}, std::stof(tokens[POSE_NUM + i]));
+      }
+      intrinsic = intrinsic.reshape({3, 3});
+      intrinsics_vec.push_back(intrinsic);
+
+      Tensor dist_param = torch::zeros({4}, torch::kFloat32);
+      for (int i = 0; i < DISTORTION_NUM; i++) {
+        dist_param.index_put_({i}, std::stof(tokens[POSE_NUM + INTRINSIC_NUM + i]));
+      }
+      dist_params_vec.push_back(dist_param);
+    }
+
+    n_images = poses_vec.size();
+    poses = torch::stack(poses_vec, 0).contiguous().to(torch::kCUDA);
+    intrinsics = torch::stack(intrinsics_vec, 0).contiguous().to(torch::kCUDA);
+    dist_params = torch::stack(dist_params_vec, 0).contiguous().to(torch::kCUDA);
+  }
+
+  // Normalize scene
+  {
+    Tensor cam_pos = poses.index({Slc(), Slc(0, 3), 3}).clone();
+    center = cam_pos.mean(0, false);
+    Tensor bias = cam_pos - center.unsqueeze(0);
+    radius = torch::linalg_norm(bias, 2, -1, false).max().item<float>();
+    cam_pos = (cam_pos - center.unsqueeze(0)) / radius;
+    poses.index_put_({Slc(), Slc(0, 3), 3}, cam_pos);
+    poses = poses.contiguous();
+  }
+
+  std::vector<Tensor> images_vec;
+  // Load images
+  {
+    ScopeWatch watch("LoadImages");
+    const std::vector<std::string> image_paths = glob_image_paths(data_path + "/images/");
+    for (int i = 0; i < n_images; i++) {
+      const std::string image_path = image_paths[i];
+      images_vec.push_back(utils::read_image_tensor(image_path).to(torch::kCPU));
+    }
+  }
+
+  std::cout << "Number of images: " << n_images << std::endl;
+
+  height = images_vec[0].size(0);
+  width = images_vec[0].size(1);
+  images = torch::stack(images_vec, 0).contiguous();
+}
+
+void Dataset::save_inference_params(const std::string & train_result_dir) const
+{
+  std::ofstream ofs(train_result_dir + "/inference_params.yaml");
+  ofs << std::fixed;
+  ofs << "%YAML 1.2" << std::endl;
+  ofs << "---" << std::endl;
+  ofs << "n_images: " << n_images << std::endl;
+  ofs << "height: " << height << std::endl;
+  ofs << "width: " << width << std::endl;
+
+  ofs << "intrinsic: [";
+  ofs << intrinsics[0][0][0].item() << ", ";
+  ofs << intrinsics[0][0][1].item() << ", ";
+  ofs << intrinsics[0][0][2].item() << "," << std::endl;
+  ofs << "            ";
+  ofs << intrinsics[0][1][0].item() << ", ";
+  ofs << intrinsics[0][1][1].item() << ", ";
+  ofs << intrinsics[0][1][2].item() << "," << std::endl;
+  ofs << "            ";
+  ofs << intrinsics[0][2][0].item() << ", ";
+  ofs << intrinsics[0][2][1].item() << ", ";
+  ofs << intrinsics[0][2][2].item() << "]" << std::endl;
+
+  ofs << "normalizing_center: [" << center[0].item();
+  ofs << ", " << center[1].item();
+  ofs << ", " << center[2].item() << "]" << std::endl;
+  ofs << "normalizing_radius: " << radius << std::endl;
+}
+
+Rays Dataset::get_all_rays_of_camera(int idx)
+{
+  int H = height;
+  int W = width;
+  Tensor ii = torch::linspace(0.f, H - 1.f, H, CUDAFloat);
+  Tensor jj = torch::linspace(0.f, W - 1.f, W, CUDAFloat);
+  auto ij = torch::meshgrid({ii, jj}, "ij");
+  Tensor i = ij[0].reshape({-1});
+  Tensor j = ij[1].reshape({-1});
+
+  auto [rays_o, rays_d] = get_rays_from_pose(
+    poses[idx].unsqueeze(0), intrinsics[idx].unsqueeze(0), torch::stack({i, j}, -1));
+  return {rays_o, rays_d};
+}
+
+std::tuple<Rays, Tensor, Tensor> Dataset::sample_random_rays(int batch_size)
+{
+  const auto CPULong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
+  Tensor cam_indices = torch::randint(n_images, {batch_size}, CPULong);
+  Tensor i = torch::randint(0, height, batch_size, CPULong);
+  Tensor j = torch::randint(0, width, batch_size, CPULong);
+  Tensor ij = torch::stack({i, j}, -1).to(torch::kCUDA).contiguous();
+
+  Tensor gt_colors = images.view({-1, 3})
+                       .index({(cam_indices * height * width + i * width + j).to(torch::kLong)})
+                       .to(torch::kCUDA)
+                       .contiguous();
+  cam_indices = cam_indices.to(torch::kCUDA);
+  cam_indices = cam_indices.to(torch::kInt32);
+  ij = ij.to(torch::kInt32);
+
+  Tensor selected_poses = torch::index_select(poses, 0, cam_indices);
+  Tensor selected_intrinsics = torch::index_select(intrinsics, 0, cam_indices);
+  auto [rays_o, rays_d] = get_rays_from_pose(selected_poses, selected_intrinsics, ij);
+
+  return {{rays_o, rays_d}, gt_colors, cam_indices.to(torch::kInt32).contiguous()};
+}
+
+std::vector<std::string> Dataset::glob_image_paths(const std::string & input_dir)
+{
+  glob_t buffer;
+  std::vector<std::string> files;
+  glob((input_dir + "*.png").c_str(), 0, NULL, &buffer);
+  for (size_t i = 0; i < buffer.gl_pathc; i++) {
+    files.push_back(buffer.gl_pathv[i]);
+  }
+  globfree(&buffer);
+  std::sort(files.begin(), files.end());
+  return files;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/dataset.hpp b/localization/nerf_based_localizer/src/nerf/dataset.hpp
new file mode 100644
index 0000000000000..7842a82befe4a
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/dataset.hpp
@@ -0,0 +1,55 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Dataset/Dataset.h
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#ifndef NERF__DATASET_HPP_
+#define NERF__DATASET_HPP_
+
+#include "common.hpp"
+#include "rays.hpp"
+
+#include <torch/torch.h>
+
+#include <string>
+#include <tuple>
+#include <vector>
+
+struct Dataset
+{
+  using Tensor = torch::Tensor;
+
+public:
+  Dataset(const std::string & data_path);
+
+  void save_inference_params(const std::string & train_result_dir) const;
+
+  Rays get_all_rays_of_camera(int idx);
+
+  std::tuple<Rays, Tensor, Tensor> sample_random_rays(int batch_size);
+
+  static std::vector<std::string> glob_image_paths(const std::string & input_dir);
+
+  int n_images;
+  Tensor poses, images, intrinsics, dist_params;
+  Tensor center;
+  float radius;
+  int height, width;
+};
+
+#endif  // NERF__DATASET_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp
new file mode 100644
index 0000000000000..64ed1a69cc647
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cpp
@@ -0,0 +1,130 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cpp
+//
+// Created by ppwang on 2022/7/17.
+//
+
+#include "hash_3d_anchored.hpp"
+
+#include "common.hpp"
+#include "stop_watch.hpp"
+
+#include <torch/torch.h>
+
+using Tensor = torch::Tensor;
+
+TORCH_LIBRARY(dec_hash3d_anchored, m)
+{
+  m.class_<Hash3DAnchoredInfo>("Hash3DAnchoredInfo").def(torch::init());
+}
+
+Hash3DAnchored::Hash3DAnchored()
+{
+  pool_size_ = (1 << 19) * N_LEVELS;
+
+  // Feat pool
+  feat_pool_ = (torch::rand({pool_size_, N_CHANNELS}, CUDAFloat) * .2f - 1.f) * 1e-4f;
+  feat_pool_.requires_grad_(true);
+  CHECK(feat_pool_.is_contiguous());
+
+  // Get prime numbers
+  auto is_prim = [](int x) {
+    for (int i = 2; i * i <= x; i++) {
+      if (x % i == 0) return false;
+    }
+    return true;
+  };
+
+  std::vector<int> prim_selected;
+  int min_local_prim = 1 << 28;
+  int max_local_prim = 1 << 30;
+
+  const auto CPUInt = torch::TensorOptions().dtype(torch::kInt).device(torch::kCPU);
+
+  for (int i = 0; i < 3 * N_LEVELS; i++) {
+    int val;
+    do {
+      val = torch::randint(min_local_prim, max_local_prim, {1}, CPUInt).item<int>();
+    } while (!is_prim(val));
+    prim_selected.push_back(val);
+  }
+
+  CHECK(prim_selected.size() == 3 * N_LEVELS);
+
+  prim_pool_ = torch::from_blob(prim_selected.data(), 3 * N_LEVELS, CPUInt).to(torch::kCUDA);
+  prim_pool_ = prim_pool_.reshape({N_LEVELS, 3}).contiguous();
+
+  bias_pool_ = (torch::rand({N_LEVELS, 3}, CUDAFloat) * 1000.f + 100.f).contiguous();
+
+  local_size_ = pool_size_ / N_LEVELS;
+  local_size_ = (local_size_ >> 4) << 4;
+
+  // MLP
+  const int mlp_out_dim = 16;
+  mlp_ = torch::nn::Linear(N_LEVELS * N_CHANNELS, mlp_out_dim);
+
+  register_parameter("feat_pool", feat_pool_);
+  register_parameter("prim_pool", prim_pool_, false);
+  register_parameter("bias_pool", bias_pool_);
+  register_module("mlp", mlp_);
+}
+
+Tensor Hash3DAnchored::query(const Tensor & points)
+{
+#ifdef PROFILE
+  ScopeWatch watch(__func__);
+#endif
+
+  auto info = torch::make_intrusive<Hash3DAnchoredInfo>();
+  info->hash3d_ = this;
+
+  const float radius = 1.0f;
+  Tensor norm = points.norm(2, {1}, true);
+  Tensor mask = (norm <= radius);
+  Tensor x = points * mask + ~mask * (1 + radius - radius / norm) * points / norm;
+
+  Tensor feat =
+    torch::autograd::Hash3DAnchoredFunction::apply(x, feat_pool_, torch::IValue(info))[0];
+  Tensor output = mlp_->forward(feat);
+  return output;
+}
+
+std::vector<torch::optim::OptimizerParamGroup> Hash3DAnchored::optim_param_groups(float lr)
+{
+  std::vector<torch::optim::OptimizerParamGroup> ret;
+
+  {
+    auto opt = std::make_unique<torch::optim::AdamOptions>(lr);
+    opt->betas() = {0.9, 0.99};
+    opt->eps() = 1e-15;
+
+    std::vector<Tensor> params = {feat_pool_};
+    ret.emplace_back(std::move(params), std::move(opt));
+  }
+
+  {
+    auto opt = std::make_unique<torch::optim::AdamOptions>(lr);
+    opt->betas() = {0.9, 0.99};
+    opt->eps() = 1e-15;
+    opt->weight_decay() = 1e-6;
+
+    std::vector<Tensor> params = mlp_->parameters();
+    ret.emplace_back(std::move(params), std::move(opt));
+  }
+
+  return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu
new file mode 100644
index 0000000000000..00ef8c5619416
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.cu
@@ -0,0 +1,231 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.cu
+//
+// Created by ppwang on 2022/7/17.
+//
+#include "common.hpp"
+#include "common_cuda.hpp"
+#include "hash_3d_anchored.hpp"
+
+#include <Eigen/Eigen>
+
+#include <torch/torch.h>
+
+using Tensor = torch::Tensor;
+
+constexpr float RES_FINE_POW_2 = 10.f;
+constexpr float RES_BASE_POW_2 = 3.f;
+
+const auto CUDAFlex = torch::TensorOptions().dtype(torch::kFloat16).device(torch::kCUDA);
+using FlexType = __half;
+
+__device__ inline void calculate_pos_and_w(
+  const Eigen::Vector3f & pt, const int local_size, const int * const prim_pool,
+  const int level_idx, unsigned pos[8], float w[8])
+{
+  const int offset = level_idx * 3;
+  const unsigned pa = prim_pool[offset + 0];
+  const unsigned pb = prim_pool[offset + 1];
+  const unsigned pc = prim_pool[offset + 2];
+
+  const unsigned pos_x = static_cast<unsigned>(floorf(pt[0]));
+  const unsigned pos_y = static_cast<unsigned>(floorf(pt[1]));
+  const unsigned pos_z = static_cast<unsigned>(floorf(pt[2]));
+  pos[0] = ((pos_x * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size;
+  pos[1] = ((pos_x * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+  pos[2] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size;
+  pos[3] = ((pos_x * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+  pos[4] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ (pos_z * pc)) % local_size;
+  pos[5] = (((pos_x + 1u) * pa) ^ (pos_y * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+  pos[6] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ (pos_z * pc)) % local_size;
+  pos[7] = (((pos_x + 1u) * pa) ^ ((pos_y + 1u) * pb) ^ ((pos_z + 1u) * pc)) % local_size;
+
+  const float a = pt[0] - floorf(pt[0]);
+  const float b = pt[1] - floorf(pt[1]);
+  const float c = pt[2] - floorf(pt[2]);
+
+  w[0] = (1.f - a) * (1.f - b) * (1.f - c);
+  w[1] = (1.f - a) * (1.f - b) * c;
+  w[2] = (1.f - a) * b * (1.f - c);
+  w[3] = (1.f - a) * b * c;
+  w[4] = a * (1.f - b) * (1.f - c);
+  w[5] = a * (1.f - b) * c;
+  w[6] = a * b * (1.f - c);
+  w[7] = a * b * c;
+}
+
+template <typename T>
+__global__ void Hash3DAnchoredForwardKernel(
+  int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool,
+  Eigen::Vector3f * points_ptr, T * out_feat)
+{
+  const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int level_idx = blockIdx.y;
+  if (pts_idx >= n_points) {
+    return;
+  }
+  feat_pool = feat_pool + local_size * level_idx;
+
+  const float mul = exp2f(
+    (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2);
+  const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]);
+
+  float ws[8] = {};
+  unsigned pos[8] = {};
+  calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws);
+
+  out_feat = out_feat + pts_idx * (N_LEVELS * N_CHANNELS);
+
+#pragma unroll
+  for (int k = 0; k < N_CHANNELS; k++) {
+    out_feat[level_idx * N_CHANNELS + k] = (T)(ws[0] * float(feat_pool[pos[0] * N_CHANNELS + k]) +
+                                               ws[1] * float(feat_pool[pos[1] * N_CHANNELS + k]) +
+                                               ws[2] * float(feat_pool[pos[2] * N_CHANNELS + k]) +
+                                               ws[3] * float(feat_pool[pos[3] * N_CHANNELS + k]) +
+                                               ws[4] * float(feat_pool[pos[4] * N_CHANNELS + k]) +
+                                               ws[5] * float(feat_pool[pos[5] * N_CHANNELS + k]) +
+                                               ws[6] * float(feat_pool[pos[6] * N_CHANNELS + k]) +
+                                               ws[7] * float(feat_pool[pos[7] * N_CHANNELS + k]));
+  }
+}
+
+template <typename T>
+__global__ void Hash3DAnchoredBackwardKernel(
+  int n_points, int local_size, T * feat_pool, int * prim_pool, Eigen::Vector3f * bias_pool,
+  Eigen::Vector3f * points_ptr,
+  T * grad_in,      // [ n_points, n_levels, n_channels ]
+  T * grad_points,  // [ n_points, 3 ]
+  T * grad_embeds   // [ pool_size, n_channels ]
+)
+{
+  const int pts_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int level_idx = blockIdx.y;
+  if (pts_idx >= n_points) {
+    return;
+  }
+  feat_pool = feat_pool + local_size * level_idx;
+
+  const float mul = exp2f(
+    (RES_FINE_POW_2 - RES_BASE_POW_2) * float(level_idx) / float(N_LEVELS - 1) + RES_BASE_POW_2);
+  const Eigen::Vector3f pt = (points_ptr[pts_idx] * mul + bias_pool[level_idx]);
+
+  float ws[8] = {};
+  unsigned pos[8] = {};
+  calculate_pos_and_w(pt, local_size, prim_pool, level_idx, pos, ws);
+
+  const float sign_x[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+  const float sign_y[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
+  const float sign_z[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
+
+  grad_in = grad_in + (N_LEVELS * N_CHANNELS) * pts_idx + level_idx * N_CHANNELS;
+
+  grad_points = grad_points + pts_idx * 3;
+  grad_embeds = grad_embeds + local_size * level_idx;
+
+#pragma unroll
+  for (int d = 0; d < 8; d++) {
+    for (int k = 0; k < N_CHANNELS; k += 2) {
+      float w0 = (float)grad_in[k];
+      float w1 = (float)grad_in[k + 1];
+      if (w0 != 0.f || w1 != 0.f) {
+        __half2 cur_w = {(__half)(float(w0) * ws[d]), (__half)(float(w1) * ws[d])};
+        atomicAdd((__half2 *)(grad_embeds + pos[d] * N_CHANNELS + k), cur_w);
+      }
+    }
+    for (int k = 0; k < N_CHANNELS; k++) {
+      const float norm = (float)(feat_pool[pos[d] * N_CHANNELS + k]) * mul * (float)grad_in[k];
+      atomicAdd(grad_points + 0, (T)(sign_x[d] * norm));
+      atomicAdd(grad_points + 1, (T)(sign_y[d] * norm));
+      atomicAdd(grad_points + 2, (T)(sign_z[d] * norm));
+    }
+  }
+}
+
+namespace torch::autograd
+{
+
+variable_list Hash3DAnchoredFunction::forward(
+  AutogradContext * ctx, Tensor points, Tensor feat_pool, IValue hash3d_info)
+{
+  auto info_ptr = hash3d_info.toCustomClass<Hash3DAnchoredInfo>();
+  ctx->saved_data["hash3d_info"] = hash3d_info;
+  ctx->saved_data["points"] = points;
+  ctx->saved_data["feat_pool"] = feat_pool;
+  Tensor & prim_pool = info_ptr->hash3d_->prim_pool_;
+  Tensor & bias_pool = info_ptr->hash3d_->bias_pool_;
+  CHECK(points.device().is_cuda());
+
+  int n_points = points.sizes()[0];
+
+  dim3 block_dim = LIN_BLOCK_DIM;
+  dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1};
+
+  Tensor out_feat = torch::zeros({n_points, N_LEVELS * N_CHANNELS}, CUDAFlex);
+  CHECK(out_feat.is_contiguous());
+
+  Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous();
+
+  Hash3DAnchoredForwardKernel<FlexType><<<grid_dim, block_dim>>>(
+    n_points, info_ptr->hash3d_->local_size_,
+    reinterpret_cast<FlexType *>(feat_pool_true.data_ptr()), prim_pool.data_ptr<int>(),
+    reinterpret_cast<Eigen::Vector3f *>(bias_pool.data_ptr()),
+    reinterpret_cast<Eigen::Vector3f *>(points.data_ptr()),
+    reinterpret_cast<FlexType *>(out_feat.data_ptr()));
+
+  return {out_feat.to(torch::kFloat32)};
+}
+
+variable_list Hash3DAnchoredFunction::backward(AutogradContext * ctx, variable_list grad_output)
+{
+  auto info_ptr = ctx->saved_data["hash3d_info"].toCustomClass<Hash3DAnchoredInfo>();
+  Tensor & points = ctx->saved_data["points"].toTensor();  // [ n_points, 3 ]
+  Tensor & feat_pool = ctx->saved_data["feat_pool"].toTensor();
+  Tensor & prim_pool = info_ptr->hash3d_->prim_pool_;
+  Tensor & bias_pool = info_ptr->hash3d_->bias_pool_;
+  CHECK(points.device().is_cuda());
+
+  const float grad_scale = 128.f;
+  int n_points = points.sizes()[0];
+
+  int pool_size = info_ptr->hash3d_->pool_size_;
+
+  dim3 block_dim = LIN_BLOCK_DIM;
+  dim3 grid_dim = {DivUp(n_points, THREAD_CAP), unsigned(N_LEVELS), 1};
+
+  Tensor feat_pool_true = feat_pool.to(torch::kFloat16).contiguous();
+
+  Tensor grad_in = (grad_output[0] * grad_scale).to(torch::kFloat16).contiguous();
+
+  Tensor points_grad = torch::zeros({n_points, 3}, CUDAFlex);
+  Tensor embeds_grad = torch::zeros({pool_size, N_CHANNELS}, CUDAFlex);
+
+  Hash3DAnchoredBackwardKernel<FlexType><<<grid_dim, block_dim>>>(
+    n_points, info_ptr->hash3d_->local_size_,
+    reinterpret_cast<FlexType *>(feat_pool_true.data_ptr()), prim_pool.data_ptr<int>(),
+    reinterpret_cast<Eigen::Vector3f *>(bias_pool.data_ptr()),
+    reinterpret_cast<Eigen::Vector3f *>(points.data_ptr()),
+    reinterpret_cast<FlexType *>(grad_in.data_ptr()),
+    reinterpret_cast<FlexType *>(points_grad.data_ptr()),
+    reinterpret_cast<FlexType *>(embeds_grad.data_ptr()));
+
+  points_grad = points_grad.to(torch::kFloat32) / grad_scale;
+  embeds_grad = embeds_grad.to(torch::kFloat32) / grad_scale;
+
+  return {points_grad, embeds_grad, Tensor()};
+}
+
+}  // namespace torch::autograd
diff --git a/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp
new file mode 100644
index 0000000000000..2c0cce1096b51
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/hash_3d_anchored.hpp
@@ -0,0 +1,70 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Field/Hash3DAnchored.h
+//
+// Created by ppwang on 2022/7/17.
+//
+
+#ifndef NERF__HASH_3D_ANCHORED_HPP_
+#define NERF__HASH_3D_ANCHORED_HPP_
+
+#include <torch/torch.h>
+
+static constexpr int64_t N_CHANNELS = 2;
+static constexpr int64_t N_LEVELS = 16;
+
+class Hash3DAnchored : public torch::nn::Module
+{
+  using Tensor = torch::Tensor;
+
+public:
+  Hash3DAnchored();
+
+  Tensor query(const Tensor & points);
+
+  std::vector<torch::optim::OptimizerParamGroup> optim_param_groups(float lr);
+
+  int pool_size_;
+  int local_size_;
+
+  Tensor feat_pool_;  // [ pool_size_, n_channels_ ];
+  Tensor prim_pool_;  // [ n_levels, 3 ];
+  Tensor bias_pool_;  // [ n_levels, 3 ];
+
+  torch::nn::Linear mlp_ = nullptr;
+};
+
+class Hash3DAnchoredInfo : public torch::CustomClassHolder
+{
+public:
+  Hash3DAnchored * hash3d_ = nullptr;
+};
+
+namespace torch::autograd
+{
+
+class Hash3DAnchoredFunction : public Function<Hash3DAnchoredFunction>
+{
+public:
+  static variable_list forward(
+    AutogradContext * ctx, Tensor points, Tensor feat_pool_, IValue hash3d_info);
+
+  static variable_list backward(AutogradContext * ctx, variable_list grad_output);
+};
+
+}  // namespace torch::autograd
+
+#endif  // NERF__HASH_3D_ANCHORED_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/localizer.cpp b/localization/nerf_based_localizer/src/nerf/localizer.cpp
new file mode 100644
index 0000000000000..b36f9fae819cd
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/localizer.cpp
@@ -0,0 +1,354 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "localizer.hpp"
+
+#include "dataset.hpp"
+#include "stop_watch.hpp"
+
+#include <Eigen/Geometry>
+#include <opencv2/core.hpp>
+
+#include <random>
+
+using Tensor = torch::Tensor;
+
+Localizer::Localizer(const LocalizerParam & param) : param_(param)
+{
+  const std::string train_result_dir = param.train_result_dir;
+
+  cv::FileStorage inference_params(
+    train_result_dir + "/inference_params.yaml", cv::FileStorage::READ);
+  if (!inference_params.isOpened()) {
+    throw std::runtime_error("Failed to open " + train_result_dir + "/inference_params.yaml");
+  }
+
+  const int n_images = (int)inference_params["n_images"];
+  const int train_height = (int)inference_params["height"];
+  const int train_width = (int)inference_params["width"];
+
+  std::vector<float> intrinsic_vector;
+  inference_params["intrinsic"] >> intrinsic_vector;
+  intrinsic_ = torch::tensor(intrinsic_vector, torch::kFloat).view({3, 3}).to(torch::kCUDA);
+
+  std::vector<float> normalizing_center;
+  inference_params["normalizing_center"] >> normalizing_center;
+  center_ = torch::tensor(normalizing_center, torch::kFloat).to(torch::kCUDA);
+
+  radius_ = (float)inference_params["normalizing_radius"];
+
+  renderer_ = std::make_shared<Renderer>(n_images, param.sample_num_per_ray);
+
+  torch::load(renderer_, train_result_dir + "/checkpoints/latest/renderer.pt");
+
+  // set
+  infer_height_ = train_height / param.resize_factor;
+  infer_width_ = train_width / param.resize_factor;
+  intrinsic_ /= param.resize_factor;
+  intrinsic_[2][2] = 1.0;
+
+  /*
+  [[+1,  0,  0,  0 ],
+  [  0, -1,  0,  0 ],
+  [  0,  0, -1,  0 ],
+  [  0,  0,  0, +1 ]]
+*/
+  axis_convert_mat_ = torch::zeros({4, 4});
+  axis_convert_mat_[0][0] = +1;
+  axis_convert_mat_[1][1] = -1;
+  axis_convert_mat_[2][2] = -1;
+  axis_convert_mat_[3][3] = +1;
+  axis_convert_mat_ = axis_convert_mat_.to(torch::kCUDA);
+}
+
+std::vector<Particle> Localizer::optimize_pose_by_random_search(
+  Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff)
+{
+  torch::NoGradGuard no_grad_guard;
+
+  std::mt19937_64 engine(std::random_device{}());
+
+  // Note that the order of the axes is different
+  // World coordinates (x: Front, y: Left, z: Up)
+  // NeRF coordinates (x: Right, y: Up, z: Back)
+  const float pos_noise_x_in_nerf = param_.noise_position_y * noise_coeff / radius_;
+  const float pos_noise_y_in_nerf = param_.noise_position_z * noise_coeff / radius_;
+  const float pos_noise_z_in_nerf = param_.noise_position_x * noise_coeff / radius_;
+  const float theta_x_in_nerf = param_.noise_rotation_y * noise_coeff;
+  const float theta_y_in_nerf = param_.noise_rotation_z * noise_coeff;
+  const float theta_z_in_nerf = param_.noise_rotation_x * noise_coeff;
+
+  std::normal_distribution<float> dist_position_x(0.0f, pos_noise_x_in_nerf);
+  std::normal_distribution<float> dist_position_y(0.0f, pos_noise_y_in_nerf);
+  std::normal_distribution<float> dist_position_z(0.0f, pos_noise_z_in_nerf);
+  std::normal_distribution<float> dist_rotation_x(0.0f, theta_x_in_nerf);
+  std::normal_distribution<float> dist_rotation_y(0.0f, theta_y_in_nerf);
+  std::normal_distribution<float> dist_rotation_z(0.0f, theta_z_in_nerf);
+
+  std::vector<Tensor> poses;
+  for (int64_t i = 0; i < particle_num; i++) {
+    // Sample a random translation
+    Tensor curr_pose = initial_pose.clone();
+    if (i == 0) {
+      poses.push_back(curr_pose);
+      continue;
+    }
+    curr_pose[0][3] += dist_position_x(engine);
+    curr_pose[1][3] += dist_position_y(engine);
+    curr_pose[2][3] += dist_position_z(engine);
+
+    // orientation
+    const float theta_x = dist_rotation_x(engine) * M_PI / 180.0;
+    const float theta_y = dist_rotation_y(engine) * M_PI / 180.0;
+    const float theta_z = dist_rotation_z(engine) * M_PI / 180.0;
+    Eigen::Matrix3f rotation_matrix_x(Eigen::AngleAxisf(theta_x, Eigen::Vector3f::UnitX()));
+    Eigen::Matrix3f rotation_matrix_y(Eigen::AngleAxisf(theta_y, Eigen::Vector3f::UnitY()));
+    Eigen::Matrix3f rotation_matrix_z(Eigen::AngleAxisf(theta_z, Eigen::Vector3f::UnitZ()));
+    const torch::Device dev = initial_pose.device();
+    Tensor rotation_tensor_x =
+      torch::from_blob(rotation_matrix_x.data(), {3, 3}).to(torch::kFloat32).to(dev);
+    Tensor rotation_tensor_y =
+      torch::from_blob(rotation_matrix_y.data(), {3, 3}).to(torch::kFloat32).to(dev);
+    Tensor rotation_tensor_z =
+      torch::from_blob(rotation_matrix_z.data(), {3, 3}).to(torch::kFloat32).to(dev);
+    Tensor rotated = rotation_tensor_z.mm(
+      rotation_tensor_y.mm(rotation_tensor_x.mm(curr_pose.index({Slc(0, 3), Slc(0, 3)}))));
+    curr_pose.index_put_({Slc(0, 3), Slc(0, 3)}, rotated);
+    poses.push_back(curr_pose);
+  }
+
+  const std::vector<float> weights = evaluate_poses(poses, image_tensor);
+  const int pose_num = poses.size();
+
+  std::vector<Particle> result;
+  for (int i = 0; i < pose_num; i++) {
+    result.push_back({poses[i], weights[i]});
+  }
+  return result;
+}
+
+torch::Tensor gram_schmidt(torch::Tensor A)
+{
+  A = A.clone();
+  for (int i = 0; i < A.size(0); ++i) {
+    for (int j = 0; j < i; ++j) {
+      A[i] -= torch::dot(A[j], A[i]) * A[j];
+    }
+    A[i] = A[i] / A[i].norm();
+  }
+  return A;
+}
+
+std::vector<Tensor> Localizer::optimize_pose_by_differential(
+  Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate)
+{
+  std::vector<Tensor> results;
+  initial_pose = initial_pose.requires_grad_(true);
+  image_tensor = image_tensor.view({infer_height_, infer_width_, 3});
+  torch::optim::Adam optimizer({initial_pose}, learning_rate);
+  for (int64_t i = 0; i < iteration_num; i++) {
+    Tensor pred_img = render_image(initial_pose);
+    Tensor loss = torch::nn::functional::mse_loss(pred_img, image_tensor);
+    optimizer.zero_grad();
+    // For some reason, backward may fail, so check here
+    try {
+      loss.backward();
+    } catch (const std::runtime_error & e) {
+      return results;
+    }
+    optimizer.step();
+
+    Tensor curr_result = initial_pose.clone().detach();
+    results.push_back(curr_result);
+  }
+  return results;
+}
+
+Tensor Localizer::render_image(const Tensor & pose)
+{
+  auto [image, _] =
+    renderer_->render_image(pose, intrinsic_, infer_height_, infer_width_, param_.ray_batch_size);
+  return image;
+}
+
+std::vector<float> Localizer::evaluate_poses(
+  const std::vector<Tensor> & poses, const Tensor & image)
+{
+  torch::NoGradGuard no_grad_guard;
+  Timer timer;
+
+  const int pixel_num = param_.render_pixel_num;
+  const auto CUDALong = torch::TensorOptions().dtype(torch::kLong).device(torch::kCUDA);
+
+  // Pick rays by constant interval
+  // const int step = H * W / pixel_num;
+  // std::vector<int64_t> i_vec, j_vec;
+  // for (int k = 0; k < pixel_num; k++) {
+  //   const int v = k * step;
+  //   const int64_t i = v / W;
+  //   const int64_t j = v % W;
+  //   i_vec.push_back(i);
+  //   j_vec.push_back(j);
+  // }
+  // const Tensor i = torch::tensor(i_vec, CUDALong);
+  // const Tensor j = torch::tensor(j_vec, CUDALong);
+
+  // Pick rays by random sampling without replacement
+  std::vector<int> indices(infer_height_ * infer_width_);
+  std::iota(indices.begin(), indices.end(), 0);
+  std::mt19937 engine(std::random_device{}());
+  std::shuffle(indices.begin(), indices.end(), engine);
+  std::vector<int64_t> i_vec, j_vec;
+  for (int k = 0; k < pixel_num; k++) {
+    const int v = indices[k];
+    const int64_t i = v / infer_width_;
+    const int64_t j = v % infer_width_;
+    i_vec.push_back(i);
+    j_vec.push_back(j);
+  }
+  Tensor i = torch::tensor(i_vec, CUDALong);
+  Tensor j = torch::tensor(j_vec, CUDALong);
+
+  // Pick rays by random sampling with replacement
+  // const Tensor i = torch::randint(0, H, pixel_num, CUDALong);
+  // const Tensor j = torch::randint(0, infer_width_, pixel_num, CUDALong);
+
+  const Tensor ij = torch::stack({i, j}, -1).to(torch::kFloat32);
+  std::vector<Tensor> rays_o_vec;
+  std::vector<Tensor> rays_d_vec;
+  for (const Tensor & pose : poses) {
+    auto [rays_o, rays_d] = get_rays_from_pose(pose.unsqueeze(0), intrinsic_.unsqueeze(0), ij);
+    rays_o_vec.push_back(rays_o);
+    rays_d_vec.push_back(rays_d);
+  }
+
+  const int64_t pose_num = poses.size();
+  const int64_t numel = pixel_num * pose_num;
+
+  Tensor rays_o = torch::cat(rays_o_vec);  // (numel, 3)
+  Tensor rays_d = torch::cat(rays_d_vec);  // (numel, 3)
+
+  timer.start();
+  auto [pred_colors, _] = renderer_->render_all_rays(rays_o, rays_d, (1 << 16));
+
+  Tensor pred_pixels = pred_colors.view({pose_num, pixel_num, 3});
+  pred_pixels = pred_pixels.clip(0.f, 1.f);
+  pred_pixels = pred_pixels.to(image.device());  // (pose_num, pixel_num, 3)
+
+  i = i.to(image.device());
+  j = j.to(image.device());
+
+  Tensor gt_pixels = image.index({i, j});              // (pixel_num, 3)
+  Tensor diff = pred_pixels - gt_pixels;               // (pose_num, pixel_num, 3)
+  Tensor loss = (diff * diff).mean(-1).sum(-1).cpu();  // (pose_num,)
+  loss = pixel_num / (loss + 1e-6f);
+  loss = torch::pow(loss, 5);
+  loss /= loss.sum();
+
+  std::vector<float> result(loss.data_ptr<float>(), loss.data_ptr<float>() + loss.numel());
+  return result;
+}
+
+Eigen::Matrix3d compute_rotation_average(
+  const std::vector<Eigen::Matrix3d> & rotations, const std::vector<double> & weights)
+{
+  // cf. https://stackoverflow.com/questions/12374087/average-of-multiple-quaternions
+  std::vector<Eigen::Quaterniond> quaternions;
+  for (const Eigen::Matrix3d & rot : rotations) {
+    Eigen::Quaterniond quat(rot);
+    quaternions.push_back(quat);
+  }
+
+  Eigen::Vector4d cumulative(0.0, 0.0, 0.0, 0.0);
+  const Eigen::Quaterniond & front = quaternions[0];
+
+  for (Eigen::Quaterniond & q : quaternions) {
+    if (q.dot(front) < 0.0) {
+      q = Eigen::Quaterniond(-q.coeffs());
+    }
+    cumulative += q.coeffs();
+  }
+
+  cumulative /= quaternions.size();
+
+  Eigen::Quaterniond average_quaternion;
+  average_quaternion.coeffs() = cumulative;
+  average_quaternion.normalize();
+
+  return average_quaternion.toRotationMatrix();
+}
+
+Tensor Localizer::calc_average_pose(const std::vector<Particle> & particles)
+{
+  torch::Device device = particles.front().pose.device();
+  torch::Tensor avg_position_tensor = torch::zeros({3, 1}, device).to(torch::kFloat32);
+  std::vector<Eigen::Matrix3d> rotations;
+  std::vector<double> weights;
+
+  for (const Particle & particle : particles) {
+    torch::Tensor pose = particle.pose;
+    torch::Tensor position = pose.index({Slc(0, 3), Slc(3, 4)});
+    avg_position_tensor += position * particle.weight;
+
+    // slice to get 3x3 rotation matrix, convert it to Eigen::Matrix3f
+    torch::Tensor rotation_tensor = pose.index({Slc(0, 3), Slc(0, 3)}).to(torch::kDouble).cpu();
+    Eigen::Matrix3d rotation;
+    std::memcpy(
+      rotation.data(), rotation_tensor.data_ptr(), sizeof(double) * rotation_tensor.numel());
+    rotations.push_back(rotation);
+    weights.push_back(particle.weight);
+  }
+
+  Eigen::Matrix3d avg_rotation_matrix = compute_rotation_average(rotations, weights);
+  torch::Tensor avg_rotation_tensor = torch::from_blob(
+    avg_rotation_matrix.data(), {3, 3}, torch::TensorOptions().dtype(torch::kDouble));
+  avg_rotation_tensor = avg_rotation_tensor.to(torch::kFloat32);
+  avg_rotation_tensor = avg_rotation_tensor.to(device);
+
+  // combine average position and rotation to form average pose
+  torch::Tensor avg_pose = torch::zeros_like(particles.front().pose);
+  avg_pose.index_put_({Slc(0, 3), Slc(3, 4)}, avg_position_tensor);
+  avg_pose.index_put_({Slc(0, 3), Slc(0, 3)}, avg_rotation_tensor);
+
+  return avg_pose;
+}
+
+torch::Tensor Localizer::camera2nerf(const torch::Tensor & pose_in_world)
+{
+  torch::Tensor x = pose_in_world;
+  x = torch::mm(x, axis_convert_mat_);
+  x = torch::mm(axis_convert_mat_.t(), x);
+
+  // normalize t
+  Tensor t = x.index({Slc(0, 3), 3}).clone();
+  t = (t - center_.unsqueeze(0)) / radius_;
+  x.index_put_({Slc(0, 3), 3}, t);
+
+  x = x.index({Slc(0, 3), Slc(0, 4)});
+  return x;
+}
+
+torch::Tensor Localizer::nerf2camera(const torch::Tensor & pose_in_camera)
+{
+  torch::Tensor x = pose_in_camera;
+  x = torch::cat({x, torch::tensor({0, 0, 0, 1}).view({1, 4}).to(torch::kCUDA)});
+
+  // denormalize t
+  Tensor t = x.index({Slc(0, 3), 3}).clone();
+  t = t * radius_ + center_.unsqueeze(0);
+  x.index_put_({Slc(0, 3), 3}, t);
+
+  x = torch::mm(x, axis_convert_mat_.t());
+  x = torch::mm(axis_convert_mat_, x);
+  return x;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/localizer.hpp b/localization/nerf_based_localizer/src/nerf/localizer.hpp
new file mode 100644
index 0000000000000..020ffca3e5dcc
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/localizer.hpp
@@ -0,0 +1,81 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NERF__LOCALIZER_HPP_
+#define NERF__LOCALIZER_HPP_
+
+#include "dataset.hpp"
+#include "renderer.hpp"
+
+#include <torch/torch.h>
+
+struct Particle
+{
+  torch::Tensor pose;  // (3, 4)
+  float weight;
+};
+
+struct LocalizerParam
+{
+  std::string train_result_dir;
+  int32_t render_pixel_num = 256;
+  float noise_position_x = 0.025f;
+  float noise_position_y = 0.025f;
+  float noise_position_z = 0.025f;
+  float noise_rotation_x = 2.5f;
+  float noise_rotation_y = 2.5f;
+  float noise_rotation_z = 2.5f;
+  int32_t resize_factor = 1;
+  int32_t sample_num_per_ray = 1024;
+  int32_t ray_batch_size = (1 << 8);
+};
+
+class Localizer
+{
+  using Tensor = torch::Tensor;
+
+public:
+  Localizer() = default;
+  Localizer(const LocalizerParam & param);
+
+  Tensor render_image(const Tensor & pose);
+  std::vector<Particle> optimize_pose_by_random_search(
+    Tensor initial_pose, Tensor image_tensor, int64_t particle_num, float noise_coeff);
+  std::vector<Tensor> optimize_pose_by_differential(
+    Tensor initial_pose, Tensor image_tensor, int64_t iteration_num, float learning_rate);
+
+  torch::Tensor camera2nerf(const torch::Tensor & pose_in_world);
+  torch::Tensor nerf2camera(const torch::Tensor & pose_in_camera);
+
+  static Tensor calc_average_pose(const std::vector<Particle> & particles);
+
+  float radius() const { return radius_; }
+  int infer_height() const { return infer_height_; }
+  int infer_width() const { return infer_width_; }
+
+private:
+  std::vector<float> evaluate_poses(const std::vector<Tensor> & poses, const Tensor & image);
+
+  LocalizerParam param_;
+
+  std::shared_ptr<Renderer> renderer_;
+
+  torch::Tensor axis_convert_mat_;
+
+  int infer_height_, infer_width_;
+  Tensor intrinsic_;
+  Tensor center_;
+  float radius_;
+};
+
+#endif  // NERF__LOCALIZER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.cpp b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp
new file mode 100644
index 0000000000000..3d302a50d6cf3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/points_sampler.cpp
@@ -0,0 +1,79 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.cpp
+//
+// Created by ppwang on 2022/9/26.
+//
+
+#include "points_sampler.hpp"
+
+#include "dataset.hpp"
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+#include <algorithm>
+#include <random>
+
+using Tensor = torch::Tensor;
+
+PtsSampler::PtsSampler(const int sample_num_per_ray) : sample_num_per_ray_(sample_num_per_ray)
+{
+}
+
+SampleResultFlex PtsSampler::get_samples(
+  const Tensor & rays_o_raw, const Tensor & rays_d_raw, RunningMode mode)
+{
+  Tensor rays_o = rays_o_raw.contiguous();
+  Tensor rays_d = (rays_d_raw / torch::linalg_norm(rays_d_raw, 2, -1, true)).contiguous();
+
+  int n_rays = rays_o.sizes()[0];
+
+  const int n_all_pts = n_rays * sample_num_per_ray_;
+
+  Tensor rays_noise;
+  if (mode == RunningMode::VALIDATE) {
+    rays_noise = torch::ones({n_all_pts}, CUDAFloat);
+  } else {
+    rays_noise = ((torch::rand({n_all_pts}, CUDAFloat) - .5f) + 1.f).contiguous();
+  }
+  rays_noise = rays_noise.view({n_rays, sample_num_per_ray_}).contiguous();
+  Tensor cum_noise = torch::cumsum(rays_noise, 1) * SAMPLE_L;
+  Tensor sampled_t = cum_noise.reshape({n_all_pts}).contiguous();
+
+  rays_o = rays_o.view({n_rays, 1, 3}).contiguous();
+  rays_d = rays_d.view({n_rays, 1, 3}).contiguous();
+  cum_noise = cum_noise.unsqueeze(-1).contiguous();
+  Tensor sampled_pts = rays_o + rays_d * cum_noise;
+
+  Tensor sampled_distances = torch::diff(sampled_pts, 1, 1).norm(2, -1).contiguous();
+  sampled_distances =
+    torch::cat({torch::zeros({n_rays, 1}, CUDAFloat), sampled_distances}, 1).contiguous();
+  sampled_pts = sampled_pts.view({n_all_pts, 3});
+  sampled_distances = sampled_distances.view({n_all_pts}).contiguous();
+
+  Tensor pts_idx_start_end =
+    torch::ones({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA)) *
+    sample_num_per_ray_;
+  Tensor pts_num = pts_idx_start_end.index({Slc(), 0});
+  Tensor cum_num = torch::cumsum(pts_num, 0);
+  pts_idx_start_end.index_put_({Slc(), 0}, cum_num - pts_num);
+  pts_idx_start_end.index_put_({Slc(), 1}, cum_num);
+
+  Tensor sampled_dirs =
+    rays_d.expand({-1, sample_num_per_ray_, -1}).reshape({n_all_pts, 3}).contiguous();
+
+  return {sampled_pts, sampled_dirs, sampled_distances, sampled_t, pts_idx_start_end};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/points_sampler.hpp b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp
new file mode 100644
index 0000000000000..f606ae62f83e3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/points_sampler.hpp
@@ -0,0 +1,58 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/PtsSampler/PersSampler.h
+//
+// Created by ppwang on 2022/6/20.
+//
+
+#ifndef NERF__POINTS_SAMPLER_HPP_
+#define NERF__POINTS_SAMPLER_HPP_
+
+#include "Eigen/Eigen"
+#include "common.hpp"
+
+#include <torch/torch.h>
+
+#include <memory>
+
+struct SampleResultFlex
+{
+  using Tensor = torch::Tensor;
+  Tensor pts;             // [ n_all_pts, 3 ]
+  Tensor dirs;            // [ n_all_pts, 3 ]
+  Tensor dt;              // [ n_all_pts, 1 ]
+  Tensor t;               // [ n_all_pts, 1 ]
+  Tensor pts_idx_bounds;  // [ n_rays, 2 ] // start, end
+};
+
+enum RunningMode { TRAIN, VALIDATE };
+
+class PtsSampler
+{
+  using Tensor = torch::Tensor;
+
+public:
+  explicit PtsSampler(const int sample_num_per_ray);
+
+  SampleResultFlex get_samples(const Tensor & rays_o, const Tensor & rays_d, RunningMode mode);
+
+private:
+  const int sample_num_per_ray_;
+
+  static constexpr float SAMPLE_L = 1.0 / 256;
+};
+
+#endif  // NERF__POINTS_SAMPLER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/rays.cpp b/localization/nerf_based_localizer/src/nerf/rays.cpp
new file mode 100644
index 0000000000000..289a81566ef68
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/rays.cpp
@@ -0,0 +1,42 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "rays.hpp"
+
+#include "common.hpp"
+
+using Tensor = torch::Tensor;
+
+Rays get_rays_from_pose(const Tensor & pose, const Tensor & intrinsic, const Tensor & ij)
+{
+  // Shift half pixel
+  Tensor i = ij.index({"...", 0}).to(torch::kFloat32) + .5f;
+  Tensor j = ij.index({"...", 1}).to(torch::kFloat32) + .5f;
+
+  Tensor cx = intrinsic.index({Slc(), 0, 2});
+  Tensor cy = intrinsic.index({Slc(), 1, 2});
+  Tensor fx = intrinsic.index({Slc(), 0, 0});
+  Tensor fy = intrinsic.index({Slc(), 1, 1});
+
+  Tensor u_tensor = ((j - cx) / fx).unsqueeze(-1);
+  Tensor v_tensor = -((i - cy) / fy).unsqueeze(-1);
+  Tensor w_tensor = -torch::ones_like(u_tensor);
+
+  Tensor dir_tensor = torch::cat({u_tensor, v_tensor, w_tensor}, 1).unsqueeze(-1);
+  Tensor ori_tensor = pose.index({Slc(), Slc(0, 3), Slc(0, 3)});
+  Tensor pos_tensor = pose.index({Slc(), Slc(0, 3), 3});
+  Tensor rays_d = torch::matmul(ori_tensor, dir_tensor).squeeze();
+  Tensor rays_o = pos_tensor.expand({rays_d.sizes()[0], 3}).contiguous();
+
+  return {rays_o, rays_d};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/rays.hpp b/localization/nerf_based_localizer/src/nerf/rays.hpp
new file mode 100644
index 0000000000000..5c5cb84558fb7
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/rays.hpp
@@ -0,0 +1,28 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NERF__RAYS_HPP_
+#define NERF__RAYS_HPP_
+
+#include <torch/torch.h>
+
+struct alignas(32) Rays
+{
+  torch::Tensor origins;
+  torch::Tensor dirs;
+};
+
+Rays get_rays_from_pose(
+  const torch::Tensor & pose, const torch::Tensor & intrinsic, const torch::Tensor & ij);
+
+#endif  // NERF__RAYS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/renderer.cpp b/localization/nerf_based_localizer/src/nerf/renderer.cpp
new file mode 100644
index 0000000000000..18d73f1fab16c
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/renderer.cpp
@@ -0,0 +1,214 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.cpp
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#include "renderer.hpp"
+
+#include "CustomOps/CustomOps.hpp"
+#include "CustomOps/FlexOps.hpp"
+#include "CustomOps/Scatter.hpp"
+#include "common.hpp"
+#include "rays.hpp"
+#include "stop_watch.hpp"
+#include "utils.hpp"
+
+using Tensor = torch::Tensor;
+namespace F = torch::nn::functional;
+
+Renderer::Renderer(int n_images, const int sample_num_per_ray)
+: sample_num_per_ray_(sample_num_per_ray)
+{
+  pts_sampler_ = std::make_shared<PtsSampler>(sample_num_per_ray_);
+
+  scene_field_ = std::make_shared<Hash3DAnchored>();
+  register_module("scene_field", scene_field_);
+
+  shader_ = std::make_shared<SHShader>();
+  register_module("shader", shader_);
+
+  app_emb_ = torch::randn({n_images, 16}, CUDAFloat) * .1f;
+  app_emb_.requires_grad_(true);
+  register_parameter("app_emb", app_emb_);
+}
+
+RenderResult Renderer::render(
+  const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode)
+{
+  int n_rays = rays_o.sizes()[0];
+  SampleResultFlex sample_result = pts_sampler_->get_samples(rays_o, rays_d, mode);
+  int n_all_pts = sample_result.pts.sizes()[0];
+  CHECK(sample_result.pts_idx_bounds.max().item<int>() <= n_all_pts);
+  CHECK(sample_result.pts_idx_bounds.min().item<int>() >= 0);
+
+  Tensor bg_color =
+    ((mode == RunningMode::TRAIN) ? torch::rand({n_rays, 3}, CUDAFloat)
+                                  : torch::ones({n_rays, 3}, CUDAFloat) * .5f);
+
+  if (n_all_pts <= 0) {
+    return {
+      bg_color, torch::zeros({n_rays}, CUDAFloat), torch::full({n_rays}, 512.f, CUDAFloat),
+      Tensor()};
+  }
+  CHECK(rays_o.sizes()[0] == sample_result.pts_idx_bounds.sizes()[0]);
+
+  auto DensityAct = [](Tensor x) -> Tensor {
+    const float shift = 3.f;
+    return torch::autograd::TruncExp::apply(x - shift)[0];
+  };
+
+  // First, inference - early stop
+  SampleResultFlex sample_result_early_stop;
+  {
+    Tensor scene_feat = scene_field_->query(sample_result.pts);
+    Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)}));
+    Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result.dt;
+    Tensor alphas = 1.f - torch::exp(-sec_density);
+    Tensor acc_density = FlexOps::AccumulateSum(sec_density, sample_result.pts_idx_bounds, false);
+    Tensor trans = torch::exp(-acc_density);
+    Tensor weights = trans * alphas;
+    Tensor mask = trans > 1e-4f;
+    Tensor mask_idx = torch::where(mask)[0];
+
+    sample_result_early_stop.pts = sample_result.pts.index({mask_idx}).contiguous();
+    sample_result_early_stop.dirs = sample_result.dirs.index({mask_idx}).contiguous();
+    sample_result_early_stop.dt = sample_result.dt.index({mask_idx}).contiguous();
+    sample_result_early_stop.t = sample_result.t.index({mask_idx}).contiguous();
+
+    Tensor mask_2d = mask.reshape({n_rays, sample_num_per_ray_});
+    Tensor num = mask_2d.sum(1);
+    Tensor cum_num = torch::cumsum(num, 0);
+    Tensor idx_bounds =
+      torch::zeros({n_rays, 2}, torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA));
+    idx_bounds.index_put_({Slc(), 0}, cum_num - num);
+    idx_bounds.index_put_({Slc(), 1}, cum_num);
+    sample_result_early_stop.pts_idx_bounds = idx_bounds;
+
+    CHECK(
+      sample_result_early_stop.pts_idx_bounds.max().item<int>() ==
+      sample_result_early_stop.pts.size(0));
+  }
+
+  n_all_pts = sample_result_early_stop.pts.size(0);
+
+  Tensor scene_feat = scene_field_->query(sample_result_early_stop.pts);
+  Tensor sampled_density = DensityAct(scene_feat.index({Slc(), Slc(0, 1)}));
+
+  Tensor shading_feat = torch::cat(
+    {torch::ones_like(scene_feat.index({Slc(), Slc(0, 1)}), CUDAFloat),
+     scene_feat.index({Slc(), Slc(1, torch::indexing::None)})},
+    1);
+
+  if (mode == RunningMode::TRAIN) {
+    Tensor all_emb_idx =
+      CustomOps::ScatterIdx(n_all_pts, sample_result_early_stop.pts_idx_bounds, emb_idx);
+    shading_feat = CustomOps::ScatterAdd(app_emb_, all_emb_idx, shading_feat);
+  }
+
+  Tensor sampled_colors = shader_->query(shading_feat, sample_result_early_stop.dirs);
+  Tensor sampled_t = (sample_result_early_stop.t + 1e-2f).contiguous();
+  Tensor sec_density = sampled_density.index({Slc(), 0}) * sample_result_early_stop.dt;
+  Tensor alphas = 1.f - torch::exp(-sec_density);
+  Tensor idx_start_end = sample_result_early_stop.pts_idx_bounds;
+  Tensor acc_density = FlexOps::AccumulateSum(sec_density, idx_start_end, false);
+  Tensor trans = torch::exp(-acc_density);
+  Tensor weights = trans * alphas;
+
+  Tensor last_trans = torch::exp(-FlexOps::Sum(sec_density, idx_start_end));
+  Tensor colors = FlexOps::Sum(weights.unsqueeze(-1) * sampled_colors, idx_start_end);
+  colors = colors + last_trans.unsqueeze(-1) * bg_color;
+  Tensor depths = FlexOps::Sum(weights * sampled_t, idx_start_end) / (1.f - last_trans + 1e-4f);
+
+  CHECK(std::isfinite((colors).mean().item<float>()));
+
+  return {colors, depths, weights, idx_start_end};
+}
+
+std::tuple<Tensor, Tensor> Renderer::render_all_rays(
+  const Tensor & rays_o, const Tensor & rays_d, const int batch_size)
+{
+  const int n_rays = rays_d.sizes()[0];
+
+  std::vector<Tensor> pred_colors;
+  std::vector<Tensor> pred_depths;
+
+  const int ray_batch_size = (1 << 16);
+  for (int i = 0; i < n_rays; i += batch_size) {
+    int i_high = std::min(i + batch_size, n_rays);
+    Tensor cur_rays_o = rays_o.index({Slc(i, i_high)}).contiguous();
+    Tensor cur_rays_d = rays_d.index({Slc(i, i_high)}).contiguous();
+
+    RenderResult render_result = render(cur_rays_o, cur_rays_d, Tensor(), RunningMode::VALIDATE);
+    Tensor colors = render_result.colors;
+    Tensor depths = render_result.depths.squeeze();
+
+    pred_colors.push_back(colors);
+    pred_depths.push_back(depths.unsqueeze(-1));
+  }
+
+  Tensor pred_colors_ts = torch::cat(pred_colors, 0);
+  Tensor pred_depths_ts = torch::cat(pred_depths, 0);
+
+  return {pred_colors_ts, pred_depths_ts};
+}
+
+std::tuple<Tensor, Tensor> Renderer::render_image(
+  const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w,
+  const int batch_size)
+{
+  Tensor ii = torch::linspace(0.f, h - 1.f, h, CUDAFloat);
+  Tensor jj = torch::linspace(0.f, w - 1.f, w, CUDAFloat);
+  auto ij = torch::meshgrid({ii, jj}, "ij");
+  Tensor i = ij[0].reshape({-1});
+  Tensor j = ij[1].reshape({-1});
+  auto [rays_o, rays_d] =
+    get_rays_from_pose(pose.unsqueeze(0), intrinsic.unsqueeze(0), torch::stack({i, j}, -1));
+  auto [pred_colors, pred_depths] = render_all_rays(rays_o, rays_d, batch_size);
+  pred_colors = pred_colors.reshape({h, w, 3});
+  pred_depths = pred_depths.reshape({h, w, 1});
+
+  pred_colors = pred_colors.clip(0.0f, 1.0f);
+  pred_depths = pred_depths.repeat({1, 1, 3});
+
+  return {pred_colors, pred_depths};
+}
+
+std::vector<torch::optim::OptimizerParamGroup> Renderer::optim_param_groups(float lr)
+{
+  std::vector<torch::optim::OptimizerParamGroup> ret;
+
+  // scene_field_
+  for (const auto & para_group : scene_field_->optim_param_groups(lr)) {
+    ret.emplace_back(para_group);
+  }
+
+  // shader_
+  for (const auto & para_group : shader_->optim_param_groups(lr)) {
+    ret.emplace_back(para_group);
+  }
+
+  // app_emb_
+  auto opt = std::make_unique<torch::optim::AdamOptions>(lr);
+  opt->betas() = {0.9, 0.99};
+  opt->eps() = 1e-15;
+  opt->weight_decay() = 1e-6;
+  std::vector<Tensor> params{app_emb_};
+  ret.emplace_back(std::move(params), std::move(opt));
+
+  return ret;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/renderer.hpp b/localization/nerf_based_localizer/src/nerf/renderer.hpp
new file mode 100644
index 0000000000000..4b7f06dc2f54d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/renderer.hpp
@@ -0,0 +1,69 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Renderer/Renderer.h
+//
+// Created by ppwang on 2022/5/7.
+//
+
+#ifndef NERF__RENDERER_HPP_
+#define NERF__RENDERER_HPP_
+
+#include "hash_3d_anchored.hpp"
+#include "points_sampler.hpp"
+#include "sh_shader.hpp"
+
+#include <memory>
+#include <vector>
+
+struct RenderResult
+{
+  using Tensor = torch::Tensor;
+  Tensor colors;
+  Tensor depths;
+  Tensor weights;
+  Tensor idx_start_end;
+};
+
+class Renderer : public torch::nn::Module
+{
+  using Tensor = torch::Tensor;
+
+public:
+  Renderer(int n_images, const int sample_num_per_ray = 1024);
+
+  RenderResult render(
+    const Tensor & rays_o, const Tensor & rays_d, const Tensor & emb_idx, RunningMode mode);
+
+  std::tuple<Tensor, Tensor> render_all_rays(
+    const Tensor & rays_o, const Tensor & rays_d, const int batch_size);
+
+  std::tuple<Tensor, Tensor> render_image(
+    const torch::Tensor & pose, const torch::Tensor & intrinsic, const int h, const int w,
+    const int batch_size);
+
+  std::vector<torch::optim::OptimizerParamGroup> optim_param_groups(float lr);
+
+private:
+  const int sample_num_per_ray_;
+
+  std::shared_ptr<PtsSampler> pts_sampler_;
+  std::shared_ptr<Hash3DAnchored> scene_field_;
+  std::shared_ptr<SHShader> shader_;
+
+  Tensor app_emb_;
+};
+
+#endif  // NERF__RENDERER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cpp b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp
new file mode 100644
index 0000000000000..84ed5004a23e4
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cpp
@@ -0,0 +1,56 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cpp
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#include "sh_shader.hpp"
+
+#include "common.hpp"
+
+using Tensor = torch::Tensor;
+
+SHShader::SHShader()
+{
+  const int d_in = 32;
+  const int d_hidden = 64;
+  const int d_out = 3;
+
+  mlp_ = torch::nn::Sequential(
+    torch::nn::Linear(d_in, d_hidden), torch::nn::ReLU(), torch::nn::Linear(d_hidden, d_out));
+  register_module("mlp", mlp_);
+}
+
+Tensor SHShader::query(const Tensor & feats, const Tensor & dirs)
+{
+  Tensor enc = encode(dirs);
+  Tensor input = torch::cat({feats, enc}, -1);
+  Tensor output = mlp_->forward(input);
+  float eps = 1e-3f;
+  return (1.f + 2.f * eps) / (1.f + torch::exp(-output)) - eps;
+}
+
+std::vector<torch::optim::OptimizerParamGroup> SHShader::optim_param_groups(float lr)
+{
+  auto opt = std::make_unique<torch::optim::AdamOptions>(lr);
+  opt->betas() = {0.9, 0.99};
+  opt->eps() = 1e-15;
+  opt->weight_decay() = 1e-6;
+
+  std::vector<Tensor> params = mlp_->parameters();
+  return {torch::optim::OptimizerParamGroup(params, std::move(opt))};
+}
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.cu b/localization/nerf_based_localizer/src/nerf/sh_shader.cu
new file mode 100644
index 0000000000000..7a46b30eae60a
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.cu
@@ -0,0 +1,224 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.cu
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#include "common.hpp"
+#include "common_cuda.hpp"
+#include "sh_shader.hpp"
+
+using Tensor = torch::Tensor;
+
+__global__ void SHKernel(
+  const uint32_t num_elements, const uint32_t degree, float * data_in, float * data_out)
+{
+  const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x;
+  if (i >= num_elements) return;
+
+  data_out = data_out + (degree * degree) * i;
+
+  float x = data_in[i * 3];
+  float y = data_in[i * 3 + 1];
+  float z = data_in[i * 3 + 2];
+
+  // Let compiler figure out how to sequence/reorder these calculations w.r.t. branches
+  float xy = x * y, xz = x * z, yz = y * z, x2 = x * x, y2 = y * y, z2 = z * z;
+  float x4 = x2 * x2, y4 = y2 * y2, z4 = z2 * z2;
+  float x6 = x4 * x2, y6 = y4 * y2, z6 = z4 * z2;
+
+  // SH polynomials generated using scripts/gen_sh.py based on the recurrence relations in appendix
+  // A1 of https://www.ppsloan.org/publications/StupidSH36.pdf
+
+  data_out[0] = 0.28209479177387814f;  // 1/(2*sqrt(pi))
+  if (degree <= 1) {
+    return;
+  }
+  data_out[1] = -0.48860251190291987f * y;  // -sqrt(3)*y/(2*sqrt(pi))
+  data_out[2] = 0.48860251190291987f * z;   // sqrt(3)*z/(2*sqrt(pi))
+  data_out[3] = -0.48860251190291987f * x;  // -sqrt(3)*x/(2*sqrt(pi))
+  if (degree <= 2) {
+    return;
+  }
+  data_out[4] = 1.0925484305920792f * xy;   // sqrt(15)*xy/(2*sqrt(pi))
+  data_out[5] = -1.0925484305920792f * yz;  // -sqrt(15)*yz/(2*sqrt(pi))
+  data_out[6] =
+    0.94617469575755997f * z2 - 0.31539156525251999f;  // sqrt(5)*(3*z2 - 1)/(4*sqrt(pi))
+  data_out[7] = -1.0925484305920792f * xz;             // -sqrt(15)*xz/(2*sqrt(pi))
+  data_out[8] =
+    0.54627421529603959f * x2 - 0.54627421529603959f * y2;  // sqrt(15)*(x2 - y2)/(4*sqrt(pi))
+  if (degree <= 3) {
+    return;
+  }
+  data_out[9] =
+    0.59004358992664352f * y * (-3.0f * x2 + y2);  // sqrt(70)*y*(-3*x2 + y2)/(8*sqrt(pi))
+  data_out[10] = 2.8906114426405538f * xy * z;     // sqrt(105)*xy*z/(2*sqrt(pi))
+  data_out[11] =
+    0.45704579946446572f * y * (1.0f - 5.0f * z2);  // sqrt(42)*y*(1 - 5*z2)/(8*sqrt(pi))
+  data_out[12] = 0.3731763325901154f * z * (5.0f * z2 - 3.0f);  // sqrt(7)*z*(5*z2 - 3)/(4*sqrt(pi))
+  data_out[13] =
+    0.45704579946446572f * x * (1.0f - 5.0f * z2);     // sqrt(42)*x*(1 - 5*z2)/(8*sqrt(pi))
+  data_out[14] = 1.4453057213202769f * z * (x2 - y2);  // sqrt(105)*z*(x2 - y2)/(4*sqrt(pi))
+  data_out[15] =
+    0.59004358992664352f * x * (-x2 + 3.0f * y2);  // sqrt(70)*x*(-x2 + 3*y2)/(8*sqrt(pi))
+  if (degree <= 4) {
+    return;
+  }
+  data_out[16] = 2.5033429417967046f * xy * (x2 - y2);  // 3*sqrt(35)*xy*(x2 - y2)/(4*sqrt(pi))
+  data_out[17] =
+    1.7701307697799304f * yz * (-3.0f * x2 + y2);  // 3*sqrt(70)*yz*(-3*x2 + y2)/(8*sqrt(pi))
+  data_out[18] =
+    0.94617469575756008f * xy * (7.0f * z2 - 1.0f);  // 3*sqrt(5)*xy*(7*z2 - 1)/(4*sqrt(pi))
+  data_out[19] =
+    0.66904654355728921f * yz * (3.0f - 7.0f * z2);  // 3*sqrt(10)*yz*(3 - 7*z2)/(8*sqrt(pi))
+  data_out[20] = -3.1735664074561294f * z2 + 3.7024941420321507f * z4 +
+                 0.31735664074561293f;  // 3*(-30*z2 + 35*z4 + 3)/(16*sqrt(pi))
+  data_out[21] =
+    0.66904654355728921f * xz * (3.0f - 7.0f * z2);  // 3*sqrt(10)*xz*(3 - 7*z2)/(8*sqrt(pi))
+  data_out[22] = 0.47308734787878004f * (x2 - y2) *
+                 (7.0f * z2 - 1.0f);  // 3*sqrt(5)*(x2 - y2)*(7*z2 - 1)/(8*sqrt(pi))
+  data_out[23] =
+    1.7701307697799304f * xz * (-x2 + 3.0f * y2);  // 3*sqrt(70)*xz*(-x2 + 3*y2)/(8*sqrt(pi))
+  data_out[24] = -3.7550144126950569f * x2 * y2 + 0.62583573544917614f * x4 +
+                 0.62583573544917614f * y4;  // 3*sqrt(35)*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi))
+  if (degree <= 5) {
+    return;
+  }
+  data_out[25] =
+    0.65638205684017015f * y *
+    (10.0f * x2 * y2 - 5.0f * x4 - y4);  // 3*sqrt(154)*y*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi))
+  data_out[26] =
+    8.3026492595241645f * xy * z * (x2 - y2);  // 3*sqrt(385)*xy*z*(x2 - y2)/(4*sqrt(pi))
+  data_out[27] = -0.48923829943525038f * y * (3.0f * x2 - y2) *
+                 (9.0f * z2 - 1.0f);  // -sqrt(770)*y*(3*x2 - y2)*(9*z2 - 1)/(32*sqrt(pi))
+  data_out[28] =
+    4.7935367849733241f * xy * z * (3.0f * z2 - 1.0f);  // sqrt(1155)*xy*z*(3*z2 - 1)/(4*sqrt(pi))
+  data_out[29] = 0.45294665119569694f * y *
+                 (14.0f * z2 - 21.0f * z4 - 1.0f);  // sqrt(165)*y*(14*z2 - 21*z4 - 1)/(16*sqrt(pi))
+  data_out[30] =
+    0.1169503224534236f * z *
+    (-70.0f * z2 + 63.0f * z4 + 15.0f);  // sqrt(11)*z*(-70*z2 + 63*z4 + 15)/(16*sqrt(pi))
+  data_out[31] = 0.45294665119569694f * x *
+                 (14.0f * z2 - 21.0f * z4 - 1.0f);  // sqrt(165)*x*(14*z2 - 21*z4 - 1)/(16*sqrt(pi))
+  data_out[32] = 2.3967683924866621f * z * (x2 - y2) *
+                 (3.0f * z2 - 1.0f);  // sqrt(1155)*z*(x2 - y2)*(3*z2 - 1)/(8*sqrt(pi))
+  data_out[33] = -0.48923829943525038f * x * (x2 - 3.0f * y2) *
+                 (9.0f * z2 - 1.0f);  // -sqrt(770)*x*(x2 - 3*y2)*(9*z2 - 1)/(32*sqrt(pi))
+  data_out[34] = 2.0756623148810411f * z *
+                 (-6.0f * x2 * y2 + x4 + y4);  // 3*sqrt(385)*z*(-6*x2*y2 + x4 + y4)/(16*sqrt(pi))
+  data_out[35] =
+    0.65638205684017015f * x *
+    (10.0f * x2 * y2 - x4 - 5.0f * y4);  // 3*sqrt(154)*x*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi))
+  if (degree <= 6) {
+    return;
+  }
+  data_out[36] = 1.3663682103838286f * xy *
+                 (-10.0f * x2 * y2 + 3.0f * x4 +
+                  3.0f * y4);  // sqrt(6006)*xy*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi))
+  data_out[37] =
+    2.3666191622317521f * yz *
+    (10.0f * x2 * y2 - 5.0f * x4 - y4);  // 3*sqrt(2002)*yz*(10*x2*y2 - 5*x4 - y4)/(32*sqrt(pi))
+  data_out[38] = 2.0182596029148963f * xy * (x2 - y2) *
+                 (11.0f * z2 - 1.0f);  // 3*sqrt(91)*xy*(x2 - y2)*(11*z2 - 1)/(8*sqrt(pi))
+  data_out[39] = -0.92120525951492349f * yz * (3.0f * x2 - y2) *
+                 (11.0f * z2 - 3.0f);  // -sqrt(2730)*yz*(3*x2 - y2)*(11*z2 - 3)/(32*sqrt(pi))
+  data_out[40] =
+    0.92120525951492349f * xy *
+    (-18.0f * z2 + 33.0f * z4 + 1.0f);  // sqrt(2730)*xy*(-18*z2 + 33*z4 + 1)/(32*sqrt(pi))
+  data_out[41] =
+    0.58262136251873131f * yz *
+    (30.0f * z2 - 33.0f * z4 - 5.0f);  // sqrt(273)*yz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi))
+  data_out[42] = 6.6747662381009842f * z2 - 20.024298714302954f * z4 + 14.684485723822165f * z6 -
+                 0.31784601133814211f;  // sqrt(13)*(105*z2 - 315*z4 + 231*z6 - 5)/(32*sqrt(pi))
+  data_out[43] =
+    0.58262136251873131f * xz *
+    (30.0f * z2 - 33.0f * z4 - 5.0f);  // sqrt(273)*xz*(30*z2 - 33*z4 - 5)/(16*sqrt(pi))
+  data_out[44] = 0.46060262975746175f * (x2 - y2) *
+                 (11.0f * z2 * (3.0f * z2 - 1.0f) - 7.0f * z2 +
+                  1.0f);  // sqrt(2730)*(x2 - y2)*(11*z2*(3*z2 - 1) - 7*z2 + 1)/(64*sqrt(pi))
+  data_out[45] = -0.92120525951492349f * xz * (x2 - 3.0f * y2) *
+                 (11.0f * z2 - 3.0f);  // -sqrt(2730)*xz*(x2 - 3*y2)*(11*z2 - 3)/(32*sqrt(pi))
+  data_out[46] =
+    0.50456490072872406f * (11.0f * z2 - 1.0f) *
+    (-6.0f * x2 * y2 + x4 + y4);  // 3*sqrt(91)*(11*z2 - 1)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi))
+  data_out[47] =
+    2.3666191622317521f * xz *
+    (10.0f * x2 * y2 - x4 - 5.0f * y4);  // 3*sqrt(2002)*xz*(10*x2*y2 - x4 - 5*y4)/(32*sqrt(pi))
+  data_out[48] =
+    10.247761577878714f * x2 * y4 - 10.247761577878714f * x4 * y2 + 0.6831841051919143f * x6 -
+    0.6831841051919143f * y6;  // sqrt(6006)*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi))
+  if (degree <= 7) {
+    return;
+  }
+  data_out[49] = 0.70716273252459627f * y *
+                 (-21.0f * x2 * y4 + 35.0f * x4 * y2 - 7.0f * x6 +
+                  y6);  // 3*sqrt(715)*y*(-21*x2*y4 + 35*x4*y2 - 7*x6 + y6)/(64*sqrt(pi))
+  data_out[50] = 5.2919213236038001f * xy * z *
+                 (-10.0f * x2 * y2 + 3.0f * x4 +
+                  3.0f * y4);  // 3*sqrt(10010)*xy*z*(-10*x2*y2 + 3*x4 + 3*y4)/(32*sqrt(pi))
+  data_out[51] = -0.51891557872026028f * y * (13.0f * z2 - 1.0f) *
+                 (-10.0f * x2 * y2 + 5.0f * x4 +
+                  y4);  // -3*sqrt(385)*y*(13*z2 - 1)*(-10*x2*y2 + 5*x4 + y4)/(64*sqrt(pi))
+  data_out[52] = 4.1513246297620823f * xy * z * (x2 - y2) *
+                 (13.0f * z2 - 3.0f);  // 3*sqrt(385)*xy*z*(x2 - y2)*(13*z2 - 3)/(8*sqrt(pi))
+  data_out[53] = -0.15645893386229404f * y * (3.0f * x2 - y2) *
+                 (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 +
+                  3.0f);  // -3*sqrt(35)*y*(3*x2 - y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi))
+  data_out[54] =
+    0.44253269244498261f * xy * z *
+    (-110.0f * z2 + 143.0f * z4 + 15.0f);  // 3*sqrt(70)*xy*z*(-110*z2 + 143*z4 + 15)/(32*sqrt(pi))
+  data_out[55] = 0.090331607582517306f * y *
+                 (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 +
+                  5.0f);  // sqrt(105)*y*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi))
+  data_out[56] = 0.068284276912004949f * z *
+                 (315.0f * z2 - 693.0f * z4 + 429.0f * z6 -
+                  35.0f);  // sqrt(15)*z*(315*z2 - 693*z4 + 429*z6 - 35)/(32*sqrt(pi))
+  data_out[57] = 0.090331607582517306f * x *
+                 (-135.0f * z2 + 495.0f * z4 - 429.0f * z6 +
+                  5.0f);  // sqrt(105)*x*(-135*z2 + 495*z4 - 429*z6 + 5)/(64*sqrt(pi))
+  data_out[58] = 0.07375544874083044f * z * (x2 - y2) *
+                 (143.0f * z2 * (3.0f * z2 - 1.0f) - 187.0f * z2 +
+                  45.0f);  // sqrt(70)*z*(x2 - y2)*(143*z2*(3*z2 - 1) - 187*z2 + 45)/(64*sqrt(pi))
+  data_out[59] = -0.15645893386229404f * x * (x2 - 3.0f * y2) *
+                 (13.0f * z2 * (11.0f * z2 - 3.0f) - 27.0f * z2 +
+                  3.0f);  // -3*sqrt(35)*x*(x2 - 3*y2)*(13*z2*(11*z2 - 3) - 27*z2 + 3)/(64*sqrt(pi))
+  data_out[60] =
+    1.0378311574405206f * z * (13.0f * z2 - 3.0f) *
+    (-6.0f * x2 * y2 + x4 + y4);  // 3*sqrt(385)*z*(13*z2 - 3)*(-6*x2*y2 + x4 + y4)/(32*sqrt(pi))
+  data_out[61] = -0.51891557872026028f * x * (13.0f * z2 - 1.0f) *
+                 (-10.0f * x2 * y2 + x4 +
+                  5.0f * y4);  // -3*sqrt(385)*x*(13*z2 - 1)*(-10*x2*y2 + x4 + 5*y4)/(64*sqrt(pi))
+  data_out[62] = 2.6459606618019f * z *
+                 (15.0f * x2 * y4 - 15.0f * x4 * y2 + x6 -
+                  y6);  // 3*sqrt(10010)*z*(15*x2*y4 - 15*x4*y2 + x6 - y6)/(64*sqrt(pi))
+  data_out[63] = 0.70716273252459627f * x *
+                 (-35.0f * x2 * y4 + 21.0f * x4 * y2 - x6 +
+                  7.0f * y6);  // 3*sqrt(715)*x*(-35*x2*y4 + 21*x4*y2 - x6 + 7*y6)/(64*sqrt(pi))
+}
+
+Tensor SHShader::encode(const Tensor & dirs)
+{
+  CHECK(dirs.is_contiguous());
+  int n_pts = dirs.size(0);
+  Tensor out = torch::empty({n_pts, DEGREE * DEGREE}, CUDAFloat);
+  dim3 grid_dim = LIN_GRID_DIM(n_pts);
+  dim3 block_dim = LIN_BLOCK_DIM;
+
+  SHKernel<<<grid_dim, block_dim>>>(n_pts, DEGREE, dirs.data_ptr<float>(), out.data_ptr<float>());
+
+  return out;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/sh_shader.hpp b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp
new file mode 100644
index 0000000000000..66f0b99625561
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/sh_shader.hpp
@@ -0,0 +1,45 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Shader/SHShader.h
+//
+// Created by ppwang on 2022/10/8.
+//
+
+#ifndef NERF__SH_SHADER_HPP_
+#define NERF__SH_SHADER_HPP_
+
+#include <torch/torch.h>
+
+class SHShader : public torch::nn::Module
+{
+  using Tensor = torch::Tensor;
+
+public:
+  SHShader();
+
+  Tensor query(const Tensor & feats, const Tensor & dirs);
+
+  std::vector<torch::optim::OptimizerParamGroup> optim_param_groups(float lr);
+
+private:
+  static constexpr int DEGREE = 4;
+
+  Tensor encode(const Tensor & dirs);
+
+  torch::nn::Sequential mlp_ = nullptr;
+};
+
+#endif  // NERF__SH_SHADER_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.cpp b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp
new file mode 100644
index 0000000000000..c629b56f9bda4
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/stop_watch.cpp
@@ -0,0 +1,41 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.cpp
+//
+// Created by ppwang on 2022/5/18.
+//
+
+#include "stop_watch.hpp"
+
+#include <torch/torch.h>
+
+#include <iostream>
+
+ScopeWatch::ScopeWatch(const std::string & scope_name) : scope_name_(scope_name)
+{
+  torch::cuda::synchronize();
+  t_point_ = std::chrono::steady_clock::now();
+  std::cout << "[" << scope_name_ << "] begin" << std::endl;
+}
+
+ScopeWatch::~ScopeWatch()
+{
+  torch::cuda::synchronize();
+  std::chrono::steady_clock::time_point new_point = std::chrono::steady_clock::now();
+  std::chrono::duration<double> time_span =
+    std::chrono::duration_cast<std::chrono::duration<double>>(new_point - t_point_);
+  std::cout << "[" << scope_name_ << "] end in " << time_span.count() << " seconds" << std::endl;
+}
diff --git a/localization/nerf_based_localizer/src/nerf/stop_watch.hpp b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp
new file mode 100644
index 0000000000000..86483eee9484d
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/stop_watch.hpp
@@ -0,0 +1,53 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/StopWatch.h
+//
+// Created by ppwang on 2022/5/18.
+//
+
+#ifndef NERF__STOP_WATCH_HPP_
+#define NERF__STOP_WATCH_HPP_
+
+#include <chrono>
+#include <string>
+
+class ScopeWatch
+{
+public:
+  ScopeWatch(const std::string & scope_name);
+  ~ScopeWatch();
+
+private:
+  std::chrono::steady_clock::time_point t_point_;
+  std::string scope_name_;
+};
+
+class Timer
+{
+public:
+  void start() { start_time_ = std::chrono::steady_clock::now(); }
+  int64_t elapsed_milli_seconds() const
+  {
+    auto elapsed = std::chrono::steady_clock::now() - start_time_;
+    return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count();
+  }
+  double elapsed_seconds() const { return elapsed_milli_seconds() / 1000.0; }
+
+private:
+  std::chrono::steady_clock::time_point start_time_;
+};
+
+#endif  // NERF__STOP_WATCH_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf/utils.cpp b/localization/nerf_based_localizer/src/nerf/utils.cpp
new file mode 100644
index 0000000000000..4c72509422381
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/utils.cpp
@@ -0,0 +1,80 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.cpp
+//
+// Created by ppwang on 2023/4/4.
+//
+
+#include "utils.hpp"
+
+#include "common.hpp"
+
+#include <opencv2/opencv.hpp>
+
+#include <torch/torch.h>
+
+using Tensor = torch::Tensor;
+
+Tensor utils::read_image_tensor(const std::string & path)
+{
+  cv::Mat img = cv::imread(path, cv::IMREAD_UNCHANGED);
+  cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+  img.convertTo(img, CV_32FC3, 1.0 / 255.0);
+  Tensor img_tensor =
+    torch::from_blob(img.data, {img.rows, img.cols, img.channels()}, torch::kFloat32).clone();
+  return img_tensor;
+}
+
+bool utils::write_image_tensor(const std::string & path, Tensor img)
+{
+  img = img.contiguous();
+  img = (img * 255.f).clamp(0, 255).to(torch::kUInt8).to(torch::kCPU);
+  cv::Mat img_mat(img.size(0), img.size(1), CV_8UC3, img.data_ptr());
+  cv::cvtColor(img_mat, img_mat, cv::COLOR_RGB2BGR);
+  cv::imwrite(path, img_mat);
+  return true;
+}
+
+Tensor utils::resize_image(Tensor image, const int resize_height, const int resize_width)
+{
+  const int height = image.size(0);
+  const int width = image.size(1);
+  if (height == resize_height && width == resize_width) {
+    return image;
+  }
+
+  // change HWC to CHW
+  image = image.permute({2, 0, 1});
+  image = image.unsqueeze(0);  // add batch dim
+
+  // Resize
+  std::vector<int64_t> size = {resize_height, resize_width};
+  image = torch::nn::functional::interpolate(
+    image, torch::nn::functional::InterpolateFuncOptions().size(size));
+
+  // change CHW to HWC
+  image = image.squeeze(0);  // remove batch dim
+  image = image.permute({1, 2, 0});
+  return image;
+}
+
+float utils::calc_loss(Tensor pred_image, Tensor gt_image)
+{
+  Tensor diff = pred_image - gt_image;
+  Tensor loss = (diff * diff).mean(-1);
+  Tensor score = loss.numel() / (loss.sum() + 1e-6f);
+  return score.mean().item<float>();
+}
diff --git a/localization/nerf_based_localizer/src/nerf/utils.hpp b/localization/nerf_based_localizer/src/nerf/utils.hpp
new file mode 100644
index 0000000000000..1942c3b71c2a3
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf/utils.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/Utils/Utils.h
+//
+// Created by ppwang on 2022/5/11.
+//
+
+#ifndef NERF__UTILS_HPP_
+#define NERF__UTILS_HPP_
+
+#include <torch/torch.h>
+
+#include <string>
+
+namespace utils
+{
+using Tensor = torch::Tensor;
+
+Tensor read_image_tensor(const std::string & path);
+bool write_image_tensor(const std::string & path, Tensor img);
+Tensor resize_image(Tensor image, const int resize_height, const int resize_width);
+float calc_loss(Tensor pred_image, Tensor gt_image);
+
+}  // namespace utils
+
+#endif  // NERF__UTILS_HPP_
diff --git a/localization/nerf_based_localizer/src/nerf_based_localizer.cpp b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp
new file mode 100644
index 0000000000000..5b89179fc9a81
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf_based_localizer.cpp
@@ -0,0 +1,378 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "nerf_based_localizer.hpp"
+
+#include "nerf/stop_watch.hpp"
+#include "nerf/utils.hpp"
+
+#include <Eigen/Eigen>
+#include <experimental/filesystem>
+#include <rclcpp/rclcpp.hpp>
+
+#include <torch/torch.h>
+
+#include <sstream>
+
+geometry_msgs::msg::Pose transform_pose(
+  const geometry_msgs::msg::Pose & pose, const geometry_msgs::msg::TransformStamped & transform)
+{
+  Eigen::Quaterniond R1(
+    transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y,
+    transform.transform.rotation.z);
+  Eigen::Vector3d t1(
+    transform.transform.translation.x, transform.transform.translation.y,
+    transform.transform.translation.z);
+
+  Eigen::Quaterniond R2(
+    pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z);
+  Eigen::Vector3d t2(pose.position.x, pose.position.y, pose.position.z);
+
+  Eigen::Quaterniond R = R2 * R1;
+  Eigen::Vector3d t = R2._transformVector(t1) + t2;
+
+  geometry_msgs::msg::Pose result_pose;
+  result_pose.orientation.x = R.x();
+  result_pose.orientation.y = R.y();
+  result_pose.orientation.z = R.z();
+  result_pose.orientation.w = R.w();
+  result_pose.position.x = t.x();
+  result_pose.position.y = t.y();
+  result_pose.position.z = t.z();
+
+  return result_pose;
+}
+
+NerfBasedLocalizer::NerfBasedLocalizer(
+  const std::string & name_space, const rclcpp::NodeOptions & options)
+: Node("nerf_based_localizer", name_space, options),
+  tf_buffer_(this->get_clock()),
+  tf_listener_(tf_buffer_),
+  tf2_broadcaster_(*this),
+  map_frame_("map"),
+  particle_num_(this->declare_parameter<int>("particle_num")),
+  output_covariance_(this->declare_parameter<double>("output_covariance")),
+  iteration_num_(this->declare_parameter<int>("iteration_num")),
+  learning_rate_(this->declare_parameter<float>("learning_rate")),
+  is_activated_(true),
+  optimization_mode_(this->declare_parameter<int>("optimization_mode"))
+{
+  LocalizerParam param;
+  param.train_result_dir = this->declare_parameter<std::string>("train_result_dir");
+  param.render_pixel_num = this->declare_parameter<int>("render_pixel_num");
+  param.noise_position_x = this->declare_parameter<float>("noise_position_x");
+  param.noise_position_y = this->declare_parameter<float>("noise_position_y");
+  param.noise_position_z = this->declare_parameter<float>("noise_position_z");
+  param.noise_rotation_x = this->declare_parameter<float>("noise_rotation_x");
+  param.noise_rotation_y = this->declare_parameter<float>("noise_rotation_y");
+  param.noise_rotation_z = this->declare_parameter<float>("noise_rotation_z");
+  param.resize_factor = this->declare_parameter<int>("resize_factor");
+  param.sample_num_per_ray = this->declare_parameter<int>("sample_num_per_ray");
+  localizer_ = Localizer(param);
+
+  initial_pose_with_covariance_subscriber_ =
+    this->create_subscription<geometry_msgs::msg::PoseWithCovarianceStamped>(
+      "~/input/pose", 100,
+      std::bind(&NerfBasedLocalizer::callback_initial_pose, this, std::placeholders::_1));
+
+  image_subscriber_ = this->create_subscription<sensor_msgs::msg::Image>(
+    "~/input/image", rclcpp::SensorDataQoS().keep_last(0),
+    std::bind(&NerfBasedLocalizer::callback_image, this, std::placeholders::_1));
+
+  // create publishers
+  nerf_pose_publisher_ =
+    this->create_publisher<geometry_msgs::msg::PoseStamped>("~/output/pose", 10);
+  nerf_pose_with_covariance_publisher_ =
+    this->create_publisher<geometry_msgs::msg::PoseWithCovarianceStamped>(
+      "~/output/pose_with_covariance", 10);
+  nerf_score_publisher_ = this->create_publisher<std_msgs::msg::Float32>("~/output/score", 10);
+  nerf_image_publisher_ = this->create_publisher<sensor_msgs::msg::Image>("~/output/image", 10);
+
+  service_ = this->create_service<tier4_localization_msgs::srv::PoseWithCovarianceStamped>(
+    "~/service/optimize_pose",
+    std::bind(&NerfBasedLocalizer::service, this, std::placeholders::_1, std::placeholders::_2),
+    rclcpp::ServicesQoS().get_rmw_qos_profile());
+
+  service_trigger_node_ = this->create_service<std_srvs::srv::SetBool>(
+    "~/service/trigger_node",
+    std::bind(
+      &NerfBasedLocalizer::service_trigger_node, this, std::placeholders::_1,
+      std::placeholders::_2),
+    rclcpp::ServicesQoS().get_rmw_qos_profile());
+
+  RCLCPP_DEBUG(this->get_logger(), "nerf_based_localizer is created.");
+}
+
+void NerfBasedLocalizer::callback_initial_pose(
+  const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr initial_pose_msg_ptr)
+{
+  // if rosbag restart, clear buffer
+  if (!initial_pose_msg_ptr_array_.empty()) {
+    const builtin_interfaces::msg::Time & t_front =
+      initial_pose_msg_ptr_array_.front()->header.stamp;
+    const builtin_interfaces::msg::Time & t_msg = initial_pose_msg_ptr->header.stamp;
+    if (t_front.sec > t_msg.sec || (t_front.sec == t_msg.sec && t_front.nanosec > t_msg.nanosec)) {
+      initial_pose_msg_ptr_array_.clear();
+    }
+  }
+
+  if (initial_pose_msg_ptr->header.frame_id == map_frame_) {
+    initial_pose_msg_ptr_array_.push_back(initial_pose_msg_ptr);
+    if (initial_pose_msg_ptr_array_.size() > 1) {
+      initial_pose_msg_ptr_array_.pop_front();
+    }
+  } else {
+    RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not in map frame.");
+    std::exit(1);
+  }
+}
+
+void NerfBasedLocalizer::callback_image(const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr)
+{
+  target_frame_ = image_msg_ptr->header.frame_id;
+  image_msg_ptr_array_.push_back(image_msg_ptr);
+  if (image_msg_ptr_array_.size() > 1) {
+    image_msg_ptr_array_.pop_front();
+  }
+
+  if (!is_activated_) {
+    RCLCPP_ERROR(this->get_logger(), "NerfBasedLocalizer is not activated in callback_image.");
+    return;
+  }
+
+  if (initial_pose_msg_ptr_array_.empty()) {
+    RCLCPP_ERROR(this->get_logger(), "initial_pose_with_covariance is not received.");
+    return;
+  }
+
+  const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr pose_base_link =
+    initial_pose_msg_ptr_array_.back();
+  initial_pose_msg_ptr_array_.pop_back();
+
+  // Process
+  const auto [pose_msg, image_msg, score_msg] = localize(pose_base_link->pose.pose, *image_msg_ptr);
+
+  // (1) publish nerf_pose
+  geometry_msgs::msg::PoseStamped pose_stamped_msg;
+  pose_stamped_msg.header.frame_id = map_frame_;
+  pose_stamped_msg.header.stamp = image_msg_ptr->header.stamp;
+  pose_stamped_msg.pose = pose_msg;
+  nerf_pose_publisher_->publish(pose_stamped_msg);
+
+  // (2) publish nerf_pose_with_covariance
+  geometry_msgs::msg::PoseWithCovarianceStamped pose_with_cov_msg;
+  pose_with_cov_msg.header.frame_id = map_frame_;
+  pose_with_cov_msg.header.stamp = image_msg_ptr->header.stamp;
+  pose_with_cov_msg.pose.pose = pose_msg;
+  pose_with_cov_msg.pose.covariance[0] = output_covariance_;
+  pose_with_cov_msg.pose.covariance[7] = output_covariance_;
+  pose_with_cov_msg.pose.covariance[14] = output_covariance_;
+  pose_with_cov_msg.pose.covariance[21] = output_covariance_ * 10;
+  pose_with_cov_msg.pose.covariance[28] = output_covariance_ * 10;
+  pose_with_cov_msg.pose.covariance[35] = output_covariance_ * 10;
+  nerf_pose_with_covariance_publisher_->publish(pose_with_cov_msg);
+
+  // (3) publish score
+  nerf_score_publisher_->publish(score_msg);
+
+  // (4) publish image
+  nerf_image_publisher_->publish(image_msg);
+}
+
+void NerfBasedLocalizer::service(
+  const tier4_localization_msgs::srv::PoseWithCovarianceStamped::Request::SharedPtr req,
+  tier4_localization_msgs::srv::PoseWithCovarianceStamped::Response::SharedPtr res)
+{
+  RCLCPP_DEBUG(this->get_logger(), "start NerfBasedLocalizer::service");
+
+  if (image_msg_ptr_array_.empty()) {
+    RCLCPP_ERROR(this->get_logger(), "image is not received.");
+    res->success = false;
+    return;
+  }
+
+  // Get the oldest image
+  const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr = image_msg_ptr_array_.back();
+
+  // Process
+  const auto [pose_msg, image_msg, score_msg] =
+    localize(req->pose_with_covariance.pose.pose, *image_msg_ptr);
+
+  res->success = true;
+  res->pose_with_covariance.header.frame_id = map_frame_;
+  res->pose_with_covariance.header.stamp = image_msg_ptr->header.stamp;
+  res->pose_with_covariance.pose.pose = pose_msg;
+  res->pose_with_covariance.pose.covariance = req->pose_with_covariance.pose.covariance;
+
+  RCLCPP_DEBUG(this->get_logger(), "finish NerfBasedLocalizer::service");
+}
+
+std::tuple<geometry_msgs::msg::Pose, sensor_msgs::msg::Image, std_msgs::msg::Float32>
+NerfBasedLocalizer::localize(
+  const geometry_msgs::msg::Pose & pose_msg, const sensor_msgs::msg::Image & image_msg)
+{
+  Timer timer;
+  timer.start();
+
+  // Get data of image_ptr
+  // Accessing header information
+  const std_msgs::msg::Header header = image_msg.header;
+
+  // Accessing image properties
+  const uint32_t width = image_msg.width;
+  const uint32_t height = image_msg.height;
+
+  RCLCPP_DEBUG_STREAM(
+    this->get_logger(), "Image received. width: " << width << ", height: " << height);
+
+  // Accessing image data
+  torch::Tensor image_tensor = torch::tensor(image_msg.data);
+  image_tensor = image_tensor.view({height, width, 3});
+  image_tensor = image_tensor.index({Slc(0, 850)});
+  image_tensor = image_tensor.to(torch::kFloat32);
+  image_tensor /= 255.0;
+  image_tensor = image_tensor.flip(2);  // BGR to RGB
+  image_tensor =
+    utils::resize_image(image_tensor, localizer_.infer_height(), localizer_.infer_width());
+  image_tensor = image_tensor.to(torch::kCUDA);
+
+  geometry_msgs::msg::PoseWithCovarianceStamped pose_camera;
+  try {
+    geometry_msgs::msg::TransformStamped transform =
+      tf_buffer_.lookupTransform("base_link", target_frame_, tf2::TimePointZero);
+    pose_camera.pose.pose = transform_pose(pose_msg, transform);
+  } catch (tf2::TransformException & ex) {
+    RCLCPP_WARN(this->get_logger(), "%s", ex.what());
+  }
+
+  const geometry_msgs::msg::Pose pose = pose_camera.pose.pose;
+
+  Eigen::Quaternionf quat_in(
+    pose.orientation.w, pose.orientation.x, pose.orientation.y, pose.orientation.z);
+  Eigen::Matrix3f rot_in = quat_in.toRotationMatrix();
+
+  torch::Tensor initial_pose = torch::eye(4);
+  initial_pose[0][0] = rot_in(0, 0);
+  initial_pose[0][1] = rot_in(0, 1);
+  initial_pose[0][2] = rot_in(0, 2);
+  initial_pose[0][3] = pose.position.x;
+  initial_pose[1][0] = rot_in(1, 0);
+  initial_pose[1][1] = rot_in(1, 1);
+  initial_pose[1][2] = rot_in(1, 2);
+  initial_pose[1][3] = pose.position.y;
+  initial_pose[2][0] = rot_in(2, 0);
+  initial_pose[2][1] = rot_in(2, 1);
+  initial_pose[2][2] = rot_in(2, 2);
+  initial_pose[2][3] = pose.position.z;
+  initial_pose = initial_pose.to(torch::kCUDA);
+  initial_pose = initial_pose.to(torch::kFloat32);
+  RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_before:\n" << initial_pose);
+
+  initial_pose = localizer_.camera2nerf(initial_pose);
+
+  // run NeRF
+  torch::Tensor optimized_pose;
+  std::vector<Particle> particles;
+
+  if (optimization_mode_ == 0) {
+    const float noise_coeff = 1.0f;
+    particles = localizer_.optimize_pose_by_random_search(
+      initial_pose, image_tensor, particle_num_, noise_coeff);
+    optimized_pose = Localizer::calc_average_pose(particles);
+  } else {
+    std::vector<torch::Tensor> optimized_poses = localizer_.optimize_pose_by_differential(
+      initial_pose, image_tensor, iteration_num_, learning_rate_);
+    optimized_pose = optimized_poses.back();
+  }
+
+  torch::Tensor nerf_image = localizer_.render_image(optimized_pose);
+  const float score = utils::calc_loss(nerf_image, image_tensor);
+
+  RCLCPP_DEBUG_STREAM(this->get_logger(), "score = " << score);
+
+  optimized_pose = localizer_.nerf2camera(optimized_pose);
+
+  RCLCPP_DEBUG_STREAM(this->get_logger(), "pose_after:\n" << optimized_pose);
+
+  geometry_msgs::msg::Pose result_pose_camera;
+  result_pose_camera.position.x = optimized_pose[0][3].item<float>();
+  result_pose_camera.position.y = optimized_pose[1][3].item<float>();
+  result_pose_camera.position.z = optimized_pose[2][3].item<float>();
+  Eigen::Matrix3f rot_out;
+  rot_out << optimized_pose[0][0].item<float>(), optimized_pose[0][1].item<float>(),
+    optimized_pose[0][2].item<float>(), optimized_pose[1][0].item<float>(),
+    optimized_pose[1][1].item<float>(), optimized_pose[1][2].item<float>(),
+    optimized_pose[2][0].item<float>(), optimized_pose[2][1].item<float>(),
+    optimized_pose[2][2].item<float>();
+  Eigen::Quaternionf quat_out(rot_out);
+  result_pose_camera.orientation.x = quat_out.x();
+  result_pose_camera.orientation.y = quat_out.y();
+  result_pose_camera.orientation.z = quat_out.z();
+  result_pose_camera.orientation.w = quat_out.w();
+
+  geometry_msgs::msg::Pose result_pose_base_link;
+  try {
+    geometry_msgs::msg::TransformStamped transform =
+      tf_buffer_.lookupTransform(target_frame_, "base_link", tf2::TimePointZero);
+    result_pose_base_link = transform_pose(result_pose_camera, transform);
+  } catch (tf2::TransformException & ex) {
+    RCLCPP_WARN(this->get_logger(), "%s", ex.what());
+  }
+
+  nerf_image = nerf_image * 255;
+  nerf_image = nerf_image.to(torch::kUInt8);
+  nerf_image = nerf_image.to(torch::kCPU);
+  nerf_image = nerf_image.contiguous();
+  sensor_msgs::msg::Image nerf_image_msg;
+  nerf_image_msg.header = header;
+  nerf_image_msg.width = nerf_image.size(1);
+  nerf_image_msg.height = nerf_image.size(0);
+  nerf_image_msg.step = nerf_image.size(1) * 3;
+  nerf_image_msg.encoding = "rgb8";
+  nerf_image_msg.data.resize(nerf_image.numel());
+  std::copy(
+    nerf_image.data_ptr<uint8_t>(), nerf_image.data_ptr<uint8_t>() + nerf_image.numel(),
+    nerf_image_msg.data.begin());
+
+  std_msgs::msg::Float32 score_msg;
+  score_msg.data = score;
+
+  geometry_msgs::msg::TransformStamped transform;
+  transform.transform.translation.x = result_pose_base_link.position.x;
+  transform.transform.translation.y = result_pose_base_link.position.y;
+  transform.transform.translation.z = result_pose_base_link.position.z;
+  transform.transform.rotation = result_pose_base_link.orientation;
+  transform.header = header;
+  transform.header.frame_id = map_frame_;
+  transform.child_frame_id = "nerf_base_link";
+  tf2_broadcaster_.sendTransform(transform);
+
+  RCLCPP_DEBUG_STREAM(get_logger(), "localize time: " << timer.elapsed_milli_seconds());
+
+  return std::make_tuple(result_pose_base_link, nerf_image_msg, score_msg);
+}
+
+void NerfBasedLocalizer::service_trigger_node(
+  const std_srvs::srv::SetBool::Request::SharedPtr req,
+  std_srvs::srv::SetBool::Response::SharedPtr res)
+{
+  RCLCPP_DEBUG_STREAM(
+    this->get_logger(), "service_trigger " << req->data << " is arrived to NerfBasedLocalizer.");
+
+  is_activated_ = req->data;
+  if (is_activated_) {
+    initial_pose_msg_ptr_array_.clear();
+    image_msg_ptr_array_.clear();
+  }
+  res->success = true;
+}
diff --git a/localization/nerf_based_localizer/src/nerf_based_localizer.hpp b/localization/nerf_based_localizer/src/nerf_based_localizer.hpp
new file mode 100644
index 0000000000000..0dc3a52dfc70e
--- /dev/null
+++ b/localization/nerf_based_localizer/src/nerf_based_localizer.hpp
@@ -0,0 +1,101 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NERF_BASED_LOCALIZER_HPP_
+#define NERF_BASED_LOCALIZER_HPP_
+
+#include "nerf/localizer.hpp"
+
+#include <rclcpp/rclcpp.hpp>
+
+#include <geometry_msgs/msg/pose_stamped.hpp>
+#include <geometry_msgs/msg/pose_with_covariance_stamped.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <std_msgs/msg/float32.hpp>
+#include <std_srvs/srv/set_bool.hpp>
+#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
+#include <tier4_localization_msgs/srv/pose_with_covariance_stamped.hpp>
+
+#include <tf2_ros/buffer.h>
+#include <tf2_ros/transform_broadcaster.h>
+#include <tf2_ros/transform_listener.h>
+
+#include <deque>
+
+class NerfBasedLocalizer : public rclcpp::Node
+{
+public:
+  NerfBasedLocalizer(
+    const std::string & name_space = "",
+    const rclcpp::NodeOptions & options = rclcpp::NodeOptions());
+
+private:
+  void callback_initial_pose(
+    const geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr pose_conv_msg_ptr);
+  void callback_image(const sensor_msgs::msg::Image::ConstSharedPtr image_msg_ptr);
+  void service(
+    const tier4_localization_msgs::srv::PoseWithCovarianceStamped::Request::SharedPtr req,
+    tier4_localization_msgs::srv::PoseWithCovarianceStamped::Response::SharedPtr res);
+  void service_trigger_node(
+    const std_srvs::srv::SetBool::Request::SharedPtr req,
+    std_srvs::srv::SetBool::Response::SharedPtr res);
+
+  std::tuple<geometry_msgs::msg::Pose, sensor_msgs::msg::Image, std_msgs::msg::Float32> localize(
+    const geometry_msgs::msg::Pose & pose_msg, const sensor_msgs::msg::Image & image_msg);
+
+  // NerfBasedLocalizer subscribes to the following topics:
+  // (1) initial_pose_with_covariance [geometry_msgs::msg::PoseWithCovarianceStamped]
+  rclcpp::Subscription<geometry_msgs::msg::PoseWithCovarianceStamped>::SharedPtr
+    initial_pose_with_covariance_subscriber_;
+  // (2) image [sensor_msgs::msg::Image]
+  rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_subscriber_;
+
+  // NerfBasedLocalizer publishes the following topics:
+  // (1) nerf_pose [geometry_msgs::msg::PoseStamped]
+  rclcpp::Publisher<geometry_msgs::msg::PoseStamped>::SharedPtr nerf_pose_publisher_;
+  // (2) nerf_pose_with_covariance [geometry_msgs::msg::PoseWithCovarianceStamped]
+  rclcpp::Publisher<geometry_msgs::msg::PoseWithCovarianceStamped>::SharedPtr
+    nerf_pose_with_covariance_publisher_;
+  // (3) nerf_score [std_msgs::msg::Float32]
+  rclcpp::Publisher<std_msgs::msg::Float32>::SharedPtr nerf_score_publisher_;
+  // (4) nerf_image [sensor_msgs::msg::Image]
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr nerf_image_publisher_;
+
+  // tf
+  tf2_ros::Buffer tf_buffer_;
+  tf2_ros::TransformListener tf_listener_;
+  tf2_ros::TransformBroadcaster tf2_broadcaster_;
+
+  // Service
+  rclcpp::Service<tier4_localization_msgs::srv::PoseWithCovarianceStamped>::SharedPtr service_;
+  rclcpp::Service<std_srvs::srv::SetBool>::SharedPtr service_trigger_node_;
+
+  const std::string map_frame_;
+  std::string target_frame_;
+  const int particle_num_;
+  const double output_covariance_;
+  const int iteration_num_;
+  const float learning_rate_;
+
+  // data deque
+  std::deque<geometry_msgs::msg::PoseWithCovarianceStamped::ConstSharedPtr>
+    initial_pose_msg_ptr_array_;
+  std::deque<sensor_msgs::msg::Image::ConstSharedPtr> image_msg_ptr_array_;
+
+  bool is_activated_;
+
+  Localizer localizer_;
+  const int optimization_mode_;
+};
+
+#endif  // NERF_BASED_LOCALIZER_HPP_
diff --git a/localization/nerf_based_localizer/training_tool/CMakeLists.txt b/localization/nerf_based_localizer/training_tool/CMakeLists.txt
new file mode 100644
index 0000000000000..e7aae6e4cb34c
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/CMakeLists.txt
@@ -0,0 +1,48 @@
+cmake_minimum_required(VERSION 3.18)
+project(nerf_training_tool
+    VERSION 0.1
+    LANGUAGES CUDA CXX)
+
+include(CheckLanguage)
+check_language(CUDA)
+
+set(CMAKE_CXX_STANDARD 17)
+
+# ##############################################################################
+# CUDA compiler setup
+# ##############################################################################
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+set(CMAKE_CUDA_EXTENSIONS OFF)
+set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC)
+
+# Libtorch
+set(CMAKE_PREFIX_PATH ${DCMAKE_PREFIX_PATH}$ ${CMAKE_SOURCE_DIR}/../external/libtorch/)
+find_package(Torch REQUIRED PATHS ${CMAKE_SOURCE_DIR}/../external/libtorch NO_DEFAULT_PATH)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+link_directories(${CMAKE_SOURCE_DIR}/../external/libtorch/lib/)
+include_directories(${TORCH_INCLUDE_DIRS})
+
+# eigen
+find_package(Eigen3 REQUIRED)
+include_directories(SYSTEM ${EIGEN3_INCLUDE_DIR})
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+file(GLOB_RECURSE SRC_ALL
+    src/*.cpp
+    src/*.cu
+    ../src/nerf/*.cpp
+    ../src/nerf/*.cu)
+
+set(ALL_LIBS ${TORCH_LIBRARIES}
+    ${TORCH_CUDA_LIBRARIES}
+    ${OpenCV_LIBS}
+    stdc++fs)
+
+# ------------------------------------ Add executables ---------------------------------------
+add_executable(main ${SRC_ALL})
+target_link_libraries(main ${ALL_LIBS})
+set_property(TARGET main PROPERTY CXX_STANDARD 17)
diff --git a/localization/nerf_based_localizer/training_tool/config/train_config.yaml b/localization/nerf_based_localizer/training_tool/config/train_config.yaml
new file mode 100644
index 0000000000000..e98d3cdce2368
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/config/train_config.yaml
@@ -0,0 +1,16 @@
+%YAML 1.2
+---
+train:
+  pts_batch_size: 262144
+  end_iter: 20000
+  report_freq: 200
+  vis_freq: 2500
+  ray_batch_size: 4096
+  save_freq: 20000
+  validate_freq: 100000
+  learning_rate: 1e-2
+  learning_rate_alpha: 1e-1
+  learning_rate_warm_up_end_iter: 1000
+  var_loss_weight: 1e-2
+  var_loss_start: 5000
+  var_loss_end: 10000
diff --git a/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh b/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh
new file mode 100755
index 0000000000000..3e47824e36c71
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/build_and_exec_test.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -eux
+
+ROOT_DIR=$(readlink -f $(dirname $0)/../)
+TRAIN_RESULT_DIR=$(readlink -f $1)
+DATASET_DIR=$(readlink -f $2)
+
+cd ${ROOT_DIR}
+cmake . -B build
+cmake --build build --config RelWithDebInfo -j8
+
+cd ${ROOT_DIR}/build
+./main test ${TRAIN_RESULT_DIR} ${DATASET_DIR}
diff --git a/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh b/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh
new file mode 100755
index 0000000000000..f68ab0ed151c1
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/build_and_exec_training.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -eux
+
+ROOT_DIR=$(readlink -f $(dirname $0)/../)
+TRAIN_RESULT_DIR=$(readlink -f $1)
+DATASET_DIR=$(readlink -f $2)
+
+cd ${ROOT_DIR}
+cmake . -B build
+cmake --build build --config RelWithDebInfo -j8
+
+cd ${ROOT_DIR}/build
+rm -rf ${TRAIN_RESULT_DIR}
+mkdir ${TRAIN_RESULT_DIR}
+cp ${ROOT_DIR}/config/train_config.yaml ${TRAIN_RESULT_DIR}/
+./main train ${TRAIN_RESULT_DIR} ${DATASET_DIR}
diff --git a/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py b/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py
new file mode 100644
index 0000000000000..41c68564b54ca
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/convert_pose_tsv_to_cams_meta.py
@@ -0,0 +1,97 @@
+import argparse
+import os
+
+import numpy as np
+import pandas as pd
+from scipy.spatial.transform import Rotation
+import yaml
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("target_dir", type=str)
+    return parser.parse_args()
+
+
+def load_camera_info_from_yaml(filename):
+    with open(filename, "r") as input_file:
+        camera_info_dict = yaml.safe_load(input_file)
+        camera_info_dict["D"] = np.array(camera_info_dict["D"])
+        camera_info_dict["K"] = np.array(camera_info_dict["K"]).reshape((3, 3))
+        camera_info_dict["R"] = np.array(camera_info_dict["R"]).reshape((3, 3))
+        camera_info_dict["P"] = np.array(camera_info_dict["P"]).reshape((3, 4))
+        return camera_info_dict
+
+
+AXIS_CONVERT_MAT_W2N = np.array(
+    [[+1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, +1]], dtype=np.float64
+)
+
+
+def generate_cams_meta(target_dir: str) -> None:
+    df_pose = pd.read_csv(f"{target_dir}/pose.tsv", sep="\t", index_col=0)
+    n = len(df_pose)
+    pose_xyz = df_pose[["x", "y", "z"]].values
+    pose_quat = df_pose[["qx", "qy", "qz", "qw"]].values
+    rotation_mat = Rotation.from_quat(pose_quat).as_matrix()
+    mat = np.tile(np.eye(4), (n, 1, 1))
+    mat[:, 0:3, 0:3] = rotation_mat
+    mat[:, 0:3, 3:4] = pose_xyz.reshape((n, 3, 1))
+
+    # convert axis
+    mat = AXIS_CONVERT_MAT_W2N @ mat @ AXIS_CONVERT_MAT_W2N.T
+    mat = mat[:, 0:3, :]
+    mat = mat.reshape((n, 12))
+
+    # save camera meta
+    camera_info = load_camera_info_from_yaml(f"{target_dir}/camera_info.yaml")
+    k = camera_info["K"]
+    camera_param = np.tile(k, (n, 1, 1))
+    camera_param = camera_param.reshape((n, 9))
+
+    dist_param = camera_info["D"][0:4]
+    dist_param = np.tile(dist_param, (n, 1))
+
+    data = np.concatenate([mat, camera_param, dist_param], axis=1)
+
+    # save as tsv
+    df = pd.DataFrame(
+        data,
+        columns=[
+            # mat
+            "R00",
+            "R01",
+            "R02",
+            "tx",
+            "R10",
+            "R11",
+            "R12",
+            "ty",
+            "R20",
+            "R21",
+            "R22",
+            "tz",
+            # camera_param
+            "p00",
+            "p01",
+            "p02",
+            "p10",
+            "p11",
+            "p12",
+            "p20",
+            "p21",
+            "p22",
+            # dist_param
+            "k1",
+            "k2",
+            "p1",
+            "p2",
+        ],
+    )
+    df.to_csv(os.path.join(target_dir, "cams_meta.tsv"), sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    target_dir = args.target_dir
+    generate_cams_meta(target_dir)
diff --git a/localization/nerf_based_localizer/training_tool/script/interpolate.py b/localization/nerf_based_localizer/training_tool/script/interpolate.py
new file mode 100644
index 0000000000000..f455c30a2b920
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/interpolate.py
@@ -0,0 +1,69 @@
+import numpy as np
+import pandas as pd
+from scipy.spatial.transform import Rotation
+from scipy.spatial.transform import Slerp
+
+
+def interpolate_pose_in_time(df: pd.DataFrame, target_timestamp_list: pd.Series) -> pd.DataFrame:
+    """interpolate a data frame df to match the timestamp indicated by target_timestamp_list
+    constraint
+    * df and target_timestamp_list must be sorted by timestamp
+    * df must have a wide interval of timestamps before and after covering target_timestamp_list
+    output
+    """
+    POSITIONS_KEY = ["x", "y", "z"]
+    ORIENTATIONS_KEY = ["qw", "qx", "qy", "qz"]
+    result_df = pd.DataFrame(columns=df.columns)
+    target_index = 0
+    df_index = 0
+    while df_index < len(df) - 1 and target_index < len(target_timestamp_list):
+        curr_time = df.iloc[df_index]["timestamp"]
+        next_time = df.iloc[df_index + 1]["timestamp"]
+        target_time = target_timestamp_list[target_index]
+
+        if not (curr_time <= target_time <= next_time):
+            df_index += 1
+            continue
+
+        curr_weight = (next_time - target_time) / (next_time - curr_time)
+        next_weight = 1.0 - curr_weight
+
+        curr_position = df.iloc[df_index][POSITIONS_KEY]
+        next_position = df.iloc[df_index + 1][POSITIONS_KEY]
+        target_position = curr_position * curr_weight + next_position * next_weight
+
+        curr_orientation = df.iloc[df_index][ORIENTATIONS_KEY]
+        next_orientation = df.iloc[df_index + 1][ORIENTATIONS_KEY]
+        curr_r = Rotation.from_quat(curr_orientation)
+        next_r = Rotation.from_quat(next_orientation)
+        slerp = Slerp([curr_time, next_time], Rotation.concatenate([curr_r, next_r]))
+        target_orientation = slerp([target_time]).as_quat()[0]
+
+        target_row = df.iloc[df_index].copy()
+        target_row["timestamp"] = target_timestamp_list[target_index]
+        target_row[POSITIONS_KEY] = target_position
+        target_row[ORIENTATIONS_KEY] = target_orientation
+        result_df = result_df.append(target_row)
+        target_index += 1
+    return result_df
+
+
+if __name__ == "__main__":
+    # Test interpolate
+    df = pd.DataFrame(columns=["timestamp", "x", "y", "z", "qx", "qy", "qz", "qw"])
+
+    default_rot = Rotation.from_quat(np.array([0, 0, 0, 1]))
+    quat1 = default_rot.as_quat()
+    df.loc[0] = [0, 0, 0, 0, quat1[0], quat1[1], quat1[2], quat1[3]]
+
+    r2 = Rotation.from_rotvec(np.array([0, 0, 1]) * (90 * (np.pi / 180)))
+    quat2 = (r2 * default_rot).as_quat()
+    df.loc[1] = [1, 0, 0, 0, quat2[0], quat2[1], quat2[2], quat2[3]]
+
+    target_timestamp_list = pd.Series([0.5])
+
+    result_df = interpolate_pose_in_time(df, target_timestamp_list)
+
+    print(result_df)
+    r_ans = Rotation.from_rotvec(np.array([0, 0, 1]) * (45 * (np.pi / 180)))
+    print(r_ans.as_quat())
diff --git a/localization/nerf_based_localizer/training_tool/script/prepare_data.py b/localization/nerf_based_localizer/training_tool/script/prepare_data.py
new file mode 100644
index 0000000000000..1b22ec2711eb2
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/prepare_data.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+
+from convert_pose_tsv_to_cams_meta import generate_cams_meta
+import cv2
+from cv_bridge import CvBridge
+import geometry_msgs
+from geometry_msgs.msg import Pose
+from geometry_msgs.msg import PoseStamped
+from geometry_msgs.msg import PoseWithCovarianceStamped
+from interpolate import interpolate_pose_in_time
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from rclpy.serialization import deserialize_message
+import rosbag2_py
+from scipy.spatial.transform import Rotation
+from sensor_msgs.msg import CameraInfo
+from sensor_msgs.msg import CompressedImage
+from sensor_msgs.msg import Image
+from tf2_msgs.msg import TFMessage
+from tf2_ros import Buffer
+from util_camera_info import save_camera_info_to_yaml
+
+
+def create_reader(input_bag_dir: str, storage_id: str):
+    storage_options = rosbag2_py.StorageOptions(uri=input_bag_dir, storage_id=storage_id)
+    converter_options = rosbag2_py.ConverterOptions(
+        input_serialization_format="cdr", output_serialization_format="cdr"
+    )
+    reader = rosbag2_py.SequentialReader()
+    reader.open(storage_options, converter_options)
+    return reader, storage_options, converter_options
+
+
+def transform_pose_base_link_2_camera(pose: Pose, target_frame: str, tf_buffer, time):
+    # get static transform
+    transform = tf_buffer.lookup_transform(
+        target_frame="base_link", source_frame=target_frame, time=time
+    )
+
+    # transform pose
+    R1: geometry_msgs.msg.Quaternion = transform.transform.rotation
+    R1: np.ndarray = Rotation.from_quat([R1.x, R1.y, R1.z, R1.w]).as_matrix()
+    t1: geometry_msgs.msg.Vector3 = transform.transform.translation
+    t1: np.ndarray = np.array([t1.x, t1.y, t1.z])
+
+    # pose
+    R2: geometry_msgs.msg.Quaternion = pose.orientation
+    R2: np.ndarray = Rotation.from_quat([R2.x, R2.y, R2.z, R2.w]).as_matrix()
+    t2: geometry_msgs.msg.Vector3 = pose.position
+    t2: np.ndarray = np.array([t2.x, t2.y, t2.z])
+
+    # transform
+    R: np.ndarray = np.dot(R2, R1)
+    t: np.ndarray = np.dot(R2, t1) + t2
+    q: np.ndarray = Rotation.from_matrix(R).as_quat()
+
+    # convert to geometry_msgs.msg.Pose
+    result_pose = Pose()
+    result_pose.orientation.x = q[0]
+    result_pose.orientation.y = q[1]
+    result_pose.orientation.z = q[2]
+    result_pose.orientation.w = q[3]
+    result_pose.position.x = t[0]
+    result_pose.position.y = t[1]
+    result_pose.position.z = t[2]
+    return result_pose
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("path_to_rosbag", type=str)
+    parser.add_argument("output_dir", type=str)
+    parser.add_argument("--skip", type=int, default=1)
+    parser.add_argument("--storage_id", type=str, default="sqlite3", choices=["mcap", "sqlite3"])
+    parser.add_argument("--crop_height", type=int, default=850)
+    parser.add_argument("--use_cvt_color", action="store_true")
+    args = parser.parse_args()
+
+    path_to_rosbag = args.path_to_rosbag
+    output_dir = args.output_dir
+    storage_id = args.storage_id
+    crop_height = args.crop_height
+    use_cvt_color = args.use_cvt_color
+
+    target_frame = None
+    image_topic_name = "/sensing/camera/traffic_light/image_raw"
+    camera_info_topic_name = "/sensing/camera/traffic_light/camera_info"
+    pose_topic_name = "/localization/pose_twist_fusion_filter/biased_pose_with_covariance"
+    image_topic_type = Image()
+    pose_topic_type = PoseWithCovarianceStamped()
+
+    reader, storage_options, converter_options = create_reader(path_to_rosbag, storage_id)
+    os.makedirs(output_dir, exist_ok=True)
+
+    # save command
+    with open(f"{output_dir}/command.txt", "w") as f:
+        f.write(" ".join(["python3"] + sys.argv) + "\n")
+
+    bridge = CvBridge()
+    tf_buffer = Buffer()
+
+    index_images_all = 0
+    skip = args.skip
+    prev_image = None
+    image_timestamp_list = list()
+    image_list = list()
+    columns = ["timestamp", "x", "y", "z", "qx", "qy", "qz", "qw"]
+    df_pose = pd.DataFrame(columns=columns)
+    while reader.has_next():
+        (topic, data, t) = reader.read_next()
+        t /= 1e9
+        if topic == image_topic_name:
+            image_msg = deserialize_message(data, image_topic_type)
+            if image_topic_type == Image():
+                curr_image = bridge.imgmsg_to_cv2(image_msg, desired_encoding="passthrough")
+            elif image_topic_type == CompressedImage():
+                curr_image = bridge.compressed_imgmsg_to_cv2(
+                    image_msg, desired_encoding="passthrough"
+                )
+            if use_cvt_color:
+                curr_image = cv2.cvtColor(curr_image, cv2.COLOR_BGR2RGB)
+            diff = 1 if prev_image is None else np.abs(prev_image - curr_image).sum()
+            prev_image = curr_image
+            index_images_all += 1
+            if diff == 0 or index_images_all % skip != 0:
+                continue
+            target_frame = image_msg.header.frame_id
+            image_timestamp_list.append(t)
+            image_list.append(curr_image)
+        elif topic == pose_topic_name:
+            pose_msg = deserialize_message(data, pose_topic_type)
+            pose = (
+                pose_msg.pose.pose
+                if pose_topic_type == PoseWithCovarianceStamped()
+                else pose_msg.pose
+            )
+            if target_frame is None:
+                continue
+            try:
+                pose = transform_pose_base_link_2_camera(
+                    pose, target_frame, tf_buffer, pose_msg.header.stamp
+                )
+            except Exception as e:
+                print(e)
+                continue
+            df_pose.loc[len(df_pose)] = [
+                t,
+                pose.position.x,
+                pose.position.y,
+                pose.position.z,
+                pose.orientation.x,
+                pose.orientation.y,
+                pose.orientation.z,
+                pose.orientation.w,
+            ]
+        elif topic == camera_info_topic_name:
+            camera_info = deserialize_message(data, CameraInfo())
+            save_camera_info_to_yaml(camera_info, f"{output_dir}/camera_info.yaml")
+        elif topic == "/tf" or topic == "/tf_static":
+            is_static = topic == "/tf_static"
+            tf_msg = deserialize_message(data, TFMessage())
+            for transform_stamped in tf_msg.transforms:
+                if is_static:
+                    tf_buffer.set_transform_static(transform_stamped, "default_authority")
+                else:
+                    tf_buffer.set_transform(transform_stamped, "default_authority")
+
+    image_timestamp_list = np.array(image_timestamp_list)
+    image_list = np.array(image_list)
+
+    min_pose_t = df_pose["timestamp"].min()
+    max_pose_t = df_pose["timestamp"].max()
+    ok_image_timestamp = (min_pose_t < image_timestamp_list) * (image_timestamp_list < max_pose_t)
+    image_timestamp_list = image_timestamp_list[ok_image_timestamp]
+    image_list = image_list[ok_image_timestamp]
+
+    df_pose = interpolate_pose_in_time(df_pose, image_timestamp_list)
+
+    os.makedirs(f"{output_dir}/images", exist_ok=True)
+    for i, image in enumerate(image_list):
+        save_path = f"{output_dir}/images/{i:08d}.png"
+        image = image[0:crop_height]
+        cv2.imwrite(save_path, image)
+
+    df_pose.to_csv(f"{output_dir}/pose.tsv", index=True, sep="\t", float_format="%.12f")
+
+    # plot all of trajectory
+    save_path = f"{output_dir}/plot_pose.png"
+    plt.plot(df_pose["x"], df_pose["y"])
+    plt.xlabel("x")
+    plt.ylabel("y")
+    plt.axis("equal")
+    plt.savefig(save_path, bbox_inches="tight", pad_inches=0.05)
+    plt.close()
+    print(f"Saved to {save_path}")
+
+    # generate camera meta
+    generate_cams_meta(output_dir)
diff --git a/localization/nerf_based_localizer/training_tool/script/regression_test.sh b/localization/nerf_based_localizer/training_tool/script/regression_test.sh
new file mode 100755
index 0000000000000..d36c8513d777d
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/regression_test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -eux
+
+TRAIN_RESULT_DIR=$(readlink -f $1)
+DATASET_PATH=$(readlink -f $2)
+cd $(dirname $0)
+
+./build_and_exec_training.sh ${TRAIN_RESULT_DIR} ${DATASET_PATH}
+
+./build_and_exec_test.sh ${TRAIN_RESULT_DIR} ${DATASET_PATH}
diff --git a/localization/nerf_based_localizer/training_tool/script/util_camera_info.py b/localization/nerf_based_localizer/training_tool/script/util_camera_info.py
new file mode 100644
index 0000000000000..2f5ab0254dbbf
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/script/util_camera_info.py
@@ -0,0 +1,46 @@
+import numpy as np
+import yaml
+
+
+def camera_info_to_dict(camera_info):
+    return {
+        "header": {
+            "stamp": {
+                "sec": camera_info.header.stamp.sec,
+                "nanosec": camera_info.header.stamp.nanosec,
+            },
+            "frame_id": camera_info.header.frame_id,
+        },
+        "height": camera_info.height,
+        "width": camera_info.width,
+        "distortion_model": camera_info.distortion_model,
+        "D": camera_info.d.tolist(),
+        "K": camera_info.k.tolist(),
+        "R": camera_info.r.tolist(),
+        "P": camera_info.p.tolist(),
+        "binning_x": camera_info.binning_x,
+        "binning_y": camera_info.binning_y,
+        "roi": {
+            "x_offset": camera_info.roi.x_offset,
+            "y_offset": camera_info.roi.y_offset,
+            "height": camera_info.roi.height,
+            "width": camera_info.roi.width,
+            "do_rectify": camera_info.roi.do_rectify,
+        },
+    }
+
+
+def save_camera_info_to_yaml(camera_info, filename):
+    camera_info_dict = camera_info_to_dict(camera_info)
+    with open(filename, "w") as outfile:
+        yaml.dump(camera_info_dict, outfile)
+
+
+def load_camera_info_from_yaml(filename):
+    with open(filename, "r") as input_file:
+        camera_info_dict = yaml.safe_load(input_file)
+        camera_info_dict["D"] = np.array(camera_info_dict["D"])
+        camera_info_dict["K"] = np.array(camera_info_dict["K"]).reshape((3, 3))
+        camera_info_dict["R"] = np.array(camera_info_dict["R"]).reshape((3, 3))
+        camera_info_dict["P"] = np.array(camera_info_dict["P"]).reshape((3, 4))
+        return camera_info_dict
diff --git a/localization/nerf_based_localizer/training_tool/src/infer.cpp b/localization/nerf_based_localizer/training_tool/src/infer.cpp
new file mode 100644
index 0000000000000..0d3b93fa418c6
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/infer.cpp
@@ -0,0 +1,117 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../../src/nerf/localizer.hpp"
+#include "../../src/nerf/stop_watch.hpp"
+#include "../../src/nerf/utils.hpp"
+#include "main_functions.hpp"
+
+#include <experimental/filesystem>
+#include <opencv2/core.hpp>
+
+#include <fstream>
+
+namespace fs = std::experimental::filesystem::v1;
+
+enum Dir { kUp, kUpRight, kRight, kDownRight, kDown, kDownLeft, kLeft, kUpLeft, kDirNum };
+constexpr int64_t kDx[kDirNum] = {0, 1, 1, 1, 0, -1, -1, -1};
+constexpr int64_t kDz[kDirNum] = {1, 1, 0, -1, -1, -1, 0, 1};
+
+void infer(const std::string & train_result_dir, const std::string & dataset_dir)
+{
+  LocalizerParam param{};
+  param.resize_factor = 32;
+  param.train_result_dir = train_result_dir;
+  Localizer core(param);
+
+  constexpr int32_t iteration_num = 10;
+
+  const std::string save_dir = train_result_dir + "/inference_result/";
+  fs::create_directories(save_dir);
+
+  Dataset dataset(dataset_dir);
+
+  Timer timer, timer_local;
+  timer.start();
+
+  std::vector<double> optimize_times;
+
+  const float noise = 0.5f / core.radius();
+  std::cout << "noise = " << noise << std::endl;
+
+  for (int32_t i = 0; i < dataset.n_images; i++) {
+    std::cout << "\rTime " << static_cast<int64_t>(timer.elapsed_seconds()) << " " << i << "/"
+              << dataset.n_images << std::flush;
+    const std::string curr_dir =
+      (std::stringstream() << save_dir << "/" << std::setfill('0') << std::setw(4) << i << "/")
+        .str();
+    fs::create_directories(curr_dir);
+
+    torch::Tensor initial_pose = dataset.poses[i];
+    torch::Tensor image_tensor = dataset.images[i];
+
+    image_tensor = utils::resize_image(image_tensor, core.infer_height(), core.infer_width());
+    image_tensor = image_tensor.to(torch::kCUDA);
+    utils::write_image_tensor(curr_dir + "image_01_gt.png", image_tensor);
+
+    std::ofstream ofs(curr_dir + "/position.tsv");
+    ofs << std::fixed;
+    ofs << "name\tx\ty\tz\tscore" << std::endl;
+    auto output = [&](const std::string & name, const torch::Tensor & pose, float score) {
+      const torch::Tensor out = core.nerf2camera(pose);
+      ofs << name << "\t";
+      ofs << out[0][3].item<float>() << "\t";
+      ofs << out[1][3].item<float>() << "\t";
+      ofs << out[2][3].item<float>() << "\t";
+      ofs << score << std::endl;
+    };
+
+    // Before noise
+    torch::Tensor nerf_image_before = core.render_image(initial_pose);
+    float score_before = utils::calc_loss(nerf_image_before, image_tensor);
+    utils::write_image_tensor(curr_dir + "image_02_before.png", nerf_image_before);
+    output("original", initial_pose, score_before);
+
+    // Added noise
+    for (int32_t d = 0; d < kDirNum; d++) {
+      torch::Tensor curr_pose = initial_pose.clone();
+      curr_pose[0][3] += noise * kDx[d];
+      curr_pose[2][3] += noise * kDz[d];
+      torch::Tensor nerf_image_noised = core.render_image(curr_pose);
+      float score_noised = utils::calc_loss(nerf_image_noised, image_tensor);
+      utils::write_image_tensor(
+        curr_dir + "image_03_noised" + std::to_string(d) + ".png", nerf_image_noised);
+      output("noised_" + std::to_string(d), curr_pose, score_noised);
+
+      // Optimize
+      timer_local.start();
+      std::vector<torch::Tensor> optimized_poses =
+        core.optimize_pose_by_differential(curr_pose, image_tensor, iteration_num, 1e-4f);
+      optimize_times.push_back(timer_local.elapsed_seconds());
+      for (int32_t itr = 0; itr < optimized_poses.size(); itr++) {
+        torch::Tensor optimized_pose = optimized_poses[itr];
+        torch::Tensor nerf_image_after = core.render_image(optimized_pose);
+        float score_after = utils::calc_loss(nerf_image_after, image_tensor);
+        const std::string suffix =
+          (std::stringstream() << d << "_" << std::setfill('0') << std::setw(2) << itr).str();
+        utils::write_image_tensor(curr_dir + "image_04_after_" + suffix + ".png", nerf_image_after);
+        output("optimized_" + suffix, optimized_pose, score_after);
+      }
+    }
+  }
+
+  torch::Tensor optimize_time_tensor = torch::tensor(optimize_times, torch::kFloat);
+
+  std::cout << "\nAverage Time = " << optimize_time_tensor.mean().item<float>() << " sec"
+            << std::endl;
+}
diff --git a/localization/nerf_based_localizer/training_tool/src/main.cpp b/localization/nerf_based_localizer/training_tool/src/main.cpp
new file mode 100644
index 0000000000000..24903f054fbe5
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/main.cpp
@@ -0,0 +1,48 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "main_functions.hpp"
+#include "train_manager.hpp"
+
+#include <torch/torch.h>
+
+#include <iostream>
+#include <memory>
+
+int main(int argc, char * argv[])
+{
+  torch::manual_seed(2022);
+
+  if (argc < 2) {
+    std::cerr << "Please specify a train_result_dir path as command line argument." << std::endl;
+    std::cerr << "argc = " << argc << std::endl;
+    return 1;
+  }
+
+  const std::string command = argv[1];
+  const std::string train_result_dir = argv[2];
+  const std::string dataset_dir = (argc >= 4 ? argv[3] : "");
+  if (command == "train") {
+    auto train_manager = std::make_unique<TrainManager>(train_result_dir, dataset_dir);
+    train_manager->train();
+  } else if (command == "infer") {
+    infer(train_result_dir, dataset_dir);
+  } else if (command == "walk") {
+    walk(train_result_dir);
+  } else if (command == "test") {
+    test(train_result_dir, dataset_dir);
+  } else {
+    std::cerr << "Invalid command line argument : " << command << std::endl;
+    return 1;
+  }
+}
diff --git a/localization/nerf_based_localizer/training_tool/src/main_functions.hpp b/localization/nerf_based_localizer/training_tool/src/main_functions.hpp
new file mode 100644
index 0000000000000..4e25b8c7e550b
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/main_functions.hpp
@@ -0,0 +1,23 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef MAIN_FUNCTIONS_HPP_
+#define MAIN_FUNCTIONS_HPP_
+
+#include <string>
+
+void walk(const std::string & train_result_dir);
+void test(const std::string & train_result_dir, const std::string & dataset_dir);
+void infer(const std::string & train_result_dir, const std::string & dataset_dir);
+
+#endif  // MAIN_FUNCTIONS_HPP_
diff --git a/localization/nerf_based_localizer/training_tool/src/test.cpp b/localization/nerf_based_localizer/training_tool/src/test.cpp
new file mode 100644
index 0000000000000..48a9b1ece5001
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/test.cpp
@@ -0,0 +1,73 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../../src/nerf/localizer.hpp"
+#include "../../src/nerf/stop_watch.hpp"
+#include "../../src/nerf/utils.hpp"
+#include "main_functions.hpp"
+
+#include <experimental/filesystem>
+#include <opencv2/core.hpp>
+
+#include <fstream>
+
+namespace fs = std::experimental::filesystem::v1;
+
+void test(const std::string & train_result_dir, const std::string & dataset_dir)
+{
+  torch::NoGradGuard no_grad_guard;
+  LocalizerParam param;
+  param.train_result_dir = train_result_dir;
+  param.resize_factor = 8;
+  Localizer localizer(param);
+
+  Dataset dataset(dataset_dir);
+  const std::string save_dir = train_result_dir + "/test_result/";
+  fs::create_directories(save_dir);
+
+  Timer timer;
+
+  float score_sum = 0.0f;
+  float time_sum = 0.0f;
+
+  for (int32_t i = 0; i < dataset.n_images; i++) {
+    torch::Tensor initial_pose = dataset.poses[i];
+    torch::Tensor image_tensor = dataset.images[i];
+
+    image_tensor =
+      utils::resize_image(image_tensor, localizer.infer_height(), localizer.infer_width());
+
+    timer.start();
+    torch::Tensor nerf_image = localizer.render_image(initial_pose).cpu();
+    time_sum += timer.elapsed_seconds();
+    torch::Tensor diff = nerf_image - image_tensor;
+    torch::Tensor loss = (diff * diff).mean(-1).sum();
+    torch::Tensor score = (localizer.infer_height() * localizer.infer_width()) / (loss + 1e-6f);
+
+    std::cout << "\rscore[" << i << "] = " << score.item<float>() << std::flush;
+    score_sum += score.item<float>();
+
+    std::stringstream ss;
+    ss << save_dir << std::setfill('0') << std::setw(8) << i << ".png";
+    utils::write_image_tensor(ss.str(), nerf_image);
+  }
+
+  const float average_time = time_sum / dataset.n_images;
+  const float average_score = score_sum / dataset.n_images;
+
+  std::ofstream summary(train_result_dir + "/summary.tsv");
+  summary << std::fixed;
+  summary << "average_time\taverage_score" << std::endl;
+  summary << average_time << "\t" << average_score << std::endl;
+  std::cout << "\ntime = " << average_time << ", score = " << average_score << std::endl;
+}
diff --git a/localization/nerf_based_localizer/training_tool/src/train_manager.cpp b/localization/nerf_based_localizer/training_tool/src/train_manager.cpp
new file mode 100644
index 0000000000000..2d387dd0709aa
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/train_manager.cpp
@@ -0,0 +1,195 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/ExpRunner.cpp
+//
+// Created by ppwang on 2022/5/6.
+//
+
+#include "train_manager.hpp"
+
+#include "../../src/nerf/CustomOps/CustomOps.hpp"
+#include "../../src/nerf/stop_watch.hpp"
+#include "../../src/nerf/utils.hpp"
+
+#include <experimental/filesystem>
+#include <opencv2/core.hpp>
+
+#include <fmt/core.h>
+
+#include <fstream>
+
+namespace fs = std::experimental::filesystem::v1;
+using Tensor = torch::Tensor;
+
+TrainManager::TrainManager(const std::string & train_result_dir, const std::string & dataset_dir)
+{
+  const std::string conf_path = train_result_dir + "/train_config.yaml";
+  fs::path p(conf_path);
+  fs::path canonical_path = fs::canonical(p);
+  const std::string path = canonical_path.string();
+  cv::FileStorage fs(path, cv::FileStorage::READ);
+  if (!fs.isOpened()) {
+    throw std::runtime_error("Failed to open " + conf_path);
+  }
+
+  train_result_dir_ = train_result_dir;
+  fs::create_directories(train_result_dir_);
+
+  const cv::FileNode train_config = fs["train"];
+  pts_batch_size_ = (int)train_config["pts_batch_size"];
+  end_iter_ = (int)train_config["end_iter"];
+  vis_freq_ = (int)train_config["vis_freq"];
+  ray_batch_size_ = (int)train_config["ray_batch_size"];
+  report_freq_ = (int)train_config["report_freq"];
+  save_freq_ = (int)train_config["save_freq"];
+  learning_rate_ = (float)train_config["learning_rate"];
+  learning_rate_alpha_ = (float)train_config["learning_rate_alpha"];
+  learning_rate_warm_up_end_iter_ = (int)train_config["learning_rate_warm_up_end_iter"];
+  var_loss_weight_ = (float)train_config["var_loss_weight"];
+  var_loss_start_ = (int)train_config["var_loss_start"];
+  var_loss_end_ = (int)train_config["var_loss_end"];
+
+  // Dataset
+  dataset_ = std::make_shared<Dataset>(dataset_dir);
+  dataset_->save_inference_params(train_result_dir_);
+
+  // Renderer
+  renderer_ = std::make_shared<Renderer>(dataset_->n_images);
+  renderer_->to(torch::kCUDA);
+
+  // Optimizer
+  optimizer_ = std::make_shared<torch::optim::Adam>(renderer_->optim_param_groups(learning_rate_));
+}
+
+void TrainManager::train()
+{
+  std::ofstream ofs_log(train_result_dir_ + "/train_log.txt");
+
+  Timer timer;
+  timer.start();
+
+  float psnr_smooth = -1.0;
+  update_ada_params();
+
+  for (; iter_step_ < end_iter_;) {
+    constexpr float sampled_pts_per_ray_ = 512.f;
+    int cur_batch_size = int(pts_batch_size_ / sampled_pts_per_ray_) >> 4 << 4;
+    auto [train_rays, gt_colors, emb_idx] = dataset_->sample_random_rays(cur_batch_size);
+
+    Tensor & rays_o = train_rays.origins;
+    Tensor & rays_d = train_rays.dirs;
+
+    auto render_result = renderer_->render(rays_o, rays_d, emb_idx, RunningMode::TRAIN);
+    Tensor pred_colors = render_result.colors.index({Slc(0, cur_batch_size)});
+    Tensor color_loss = torch::sqrt((pred_colors - gt_colors).square() + 1e-4f).mean();
+
+    Tensor sampled_weights = render_result.weights;
+    Tensor idx_start_end = render_result.idx_start_end;
+    Tensor sampled_var = CustomOps::WeightVar(sampled_weights, idx_start_end);
+    Tensor var_loss = (sampled_var + 1e-2).sqrt().mean();
+
+    float var_loss_weight = 0.f;
+    if (iter_step_ > var_loss_end_) {
+      var_loss_weight = var_loss_weight_;
+    } else if (iter_step_ > var_loss_start_) {
+      var_loss_weight = float(iter_step_ - var_loss_start_) /
+                        float(var_loss_end_ - var_loss_start_) * var_loss_weight_;
+    }
+
+    Tensor loss = color_loss + var_loss * var_loss_weight;
+
+    float mse = (pred_colors - gt_colors).square().mean().item<float>();
+    float psnr = 20.f * std::log10(1 / std::sqrt(mse));
+    psnr_smooth = psnr_smooth < 0.f ? psnr : psnr * .1f + psnr_smooth * .9f;
+    CHECK(!std::isnan(pred_colors.mean().item<float>()));
+    CHECK(!std::isnan(gt_colors.mean().item<float>()));
+    CHECK(!std::isnan(mse));
+
+    // There can be some cases that the output colors have no grad due to the occupancy grid.
+    if (loss.requires_grad()) {
+      optimizer_->zero_grad();
+      loss.backward();
+      optimizer_->step();
+    }
+
+    iter_step_++;
+
+    if (iter_step_ % vis_freq_ == 0) {
+      torch::NoGradGuard no_grad_guard;
+
+      const int idx = 0;
+      const int H = dataset_->height;
+      const int W = dataset_->width;
+
+      auto [pred_colors, pred_depths] = renderer_->render_image(
+        dataset_->poses[idx], dataset_->intrinsics[idx], H, W, ray_batch_size_);
+
+      Tensor image = dataset_->images[idx].reshape({H, W, 3}).to(torch::kCPU);
+      pred_colors = pred_colors.to(torch::kCPU);
+      pred_depths = pred_depths.to(torch::kCPU);
+
+      Tensor concat_tensor = torch::cat({image, pred_colors, pred_depths}, 1);
+      fs::create_directories(train_result_dir_ + "/images");
+      std::stringstream ss;
+      ss << std::setw(8) << std::setfill('0') << iter_step_ << "_" << idx << ".png";
+      utils::write_image_tensor(train_result_dir_ + "/images/" + ss.str(), concat_tensor);
+    }
+
+    if (iter_step_ % save_freq_ == 0) {
+      fs::remove_all(train_result_dir_ + "/checkpoints/latest");
+      fs::create_directories(train_result_dir_ + "/checkpoints/latest");
+      torch::save(renderer_, train_result_dir_ + "/checkpoints/latest/renderer.pt");
+    }
+
+    if (iter_step_ % report_freq_ == 0) {
+      const int64_t total_sec = timer.elapsed_seconds();
+      const int64_t total_m = total_sec / 60;
+      const int64_t total_s = total_sec % 60;
+      std::stringstream ss;
+      ss << std::fixed;
+      ss << "Time: " << std::setw(2) << std::setfill('0') << total_m << ":" << std::setw(2)
+         << std::setfill('0') << total_s << " ";
+      ss << "Iter: " << std::setw(6) << iter_step_ << " ";
+      ss << "PSNR: " << psnr_smooth << " ";
+      ss << "LOSS: " << color_loss.item<float>() << " ";
+      ss << "LR: " << optimizer_->param_groups()[0].options().get_lr();
+      const std::string log_str = ss.str();
+      std::cout << log_str << std::endl;
+      ofs_log << log_str << std::endl;
+    }
+    update_ada_params();
+  }
+
+  std::cout << "Train done" << std::endl;
+}
+
+void TrainManager::update_ada_params()
+{
+  // Update learning rate
+  float lr_factor;
+  if (iter_step_ >= learning_rate_warm_up_end_iter_) {
+    float progress = float(iter_step_ - learning_rate_warm_up_end_iter_) /
+                     float(end_iter_ - learning_rate_warm_up_end_iter_);
+    lr_factor = (1.f - learning_rate_alpha_) * (std::cos(progress * float(M_PI)) * .5f + .5f) +
+                learning_rate_alpha_;
+  } else {
+    lr_factor = float(iter_step_) / float(learning_rate_warm_up_end_iter_);
+  }
+  float lr = learning_rate_ * lr_factor;
+  for (auto & g : optimizer_->param_groups()) {
+    g.options().set_lr(lr);
+  }
+}
diff --git a/localization/nerf_based_localizer/training_tool/src/train_manager.hpp b/localization/nerf_based_localizer/training_tool/src/train_manager.hpp
new file mode 100644
index 0000000000000..f6a589c91ce26
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/train_manager.hpp
@@ -0,0 +1,61 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is derived from the following file.
+// https://github.com/Totoro97/f2-nerf/blob/main/src/ExpRunner.h
+//
+// Created by ppwang on 2022/5/6.
+//
+
+#ifndef TRAIN_MANAGER_HPP_
+#define TRAIN_MANAGER_HPP_
+
+#include "../../src/nerf/dataset.hpp"
+#include "../../src/nerf/renderer.hpp"
+
+#include <torch/torch.h>
+
+#include <memory>
+#include <string>
+#include <tuple>
+
+class TrainManager
+{
+  using Tensor = torch::Tensor;
+
+public:
+  TrainManager(const std::string & train_result_dir, const std::string & dataset_dir);
+
+  void train();
+
+  void update_ada_params();
+
+  // data
+  std::string train_result_dir_;
+
+  unsigned iter_step_ = 0;
+  unsigned end_iter_;
+  unsigned report_freq_, vis_freq_, save_freq_;
+  unsigned pts_batch_size_, ray_batch_size_;
+
+  int var_loss_start_, var_loss_end_;
+  float learning_rate_, learning_rate_alpha_, learning_rate_warm_up_end_iter_;
+  float var_loss_weight_;
+
+  std::shared_ptr<Dataset> dataset_;
+  std::shared_ptr<Renderer> renderer_;
+  std::shared_ptr<torch::optim::Adam> optimizer_;
+};
+
+#endif  // TRAIN_MANAGER_HPP_
diff --git a/localization/nerf_based_localizer/training_tool/src/walk.cpp b/localization/nerf_based_localizer/training_tool/src/walk.cpp
new file mode 100644
index 0000000000000..4ed5598aed45b
--- /dev/null
+++ b/localization/nerf_based_localizer/training_tool/src/walk.cpp
@@ -0,0 +1,145 @@
+// Copyright 2023 Autoware Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../../src/nerf/localizer.hpp"
+#include "../../src/nerf/utils.hpp"
+#include "main_functions.hpp"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+
+// Disable checking because of too many give errors
+// cspell:disable
+
+int kbhit(void)
+{
+  struct termios oldt, newt;
+  int ch;
+  int oldf;
+
+  tcgetattr(STDIN_FILENO, &oldt);
+  newt = oldt;
+  newt.c_lflag &= ~(ICANON | ECHO);
+  tcsetattr(STDIN_FILENO, TCSANOW, &newt);
+  oldf = fcntl(STDIN_FILENO, F_GETFL, 0);
+  fcntl(STDIN_FILENO, F_SETFL, oldf | O_NONBLOCK);
+
+  ch = getchar();
+
+  tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
+  fcntl(STDIN_FILENO, F_SETFL, oldf);
+
+  if (ch != EOF) {
+    ungetc(ch, stdin);
+    return 1;
+  }
+
+  return 0;
+}
+
+torch::Tensor calc_rotation_tensor(float degree, Eigen::Vector3f axis)
+{
+  const float theta = degree * M_PI / 180.0;
+  Eigen::AngleAxisf a(theta, axis);
+  Eigen::Matrix3f rotation_matrix(a);
+  torch::Tensor result =
+    torch::from_blob(rotation_matrix.data(), {3, 3}).to(torch::kFloat32).cuda();
+  return result;
+}
+
+void render(Localizer & localizer, torch::Tensor pose)
+{
+  std::cout << "pose:\n" << pose << std::endl;
+  torch::Tensor pose_camera = localizer.camera2nerf(pose);
+  torch::Tensor image = localizer.render_image(pose_camera);
+  utils::write_image_tensor("image.png", image);
+  std::cout << "Move by WASD, E:Up, Q:Down, J:YAW+, K:PITCH+, L:YAW-, I:PITCH-, O:ROll+, U:ROll-"
+            << std::endl;
+}
+
+void walk(const std::string & train_result_dir)
+{
+  torch::NoGradGuard no_grad_guard;
+  LocalizerParam param;
+  param.train_result_dir = train_result_dir;
+  param.resize_factor = 8;
+  Localizer localizer(param);
+  torch::Tensor pose = torch::eye(4).cuda();
+  float step = 0.2;
+  constexpr float degree = 10.0;
+
+  render(localizer, pose);
+
+  while (1) {
+    if (kbhit()) {
+      const char pushed_key = getchar();
+      printf("pushed '%c'\n", pushed_key);
+      torch::Tensor orientation = pose.index({Slc(0, 3), Slc(0, 3)});
+      if (pushed_key == 'w') {
+        torch::Tensor tmp = torch::tensor({step, 0.0f, 0.0f}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 'a') {
+        torch::Tensor tmp = torch::tensor({0.0f, step, 0.0f}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 'd') {
+        torch::Tensor tmp = torch::tensor({0.0f, -step, 0.0f}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 's') {
+        torch::Tensor tmp = torch::tensor({-step, 0.0f, 0.0f}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 'e') {
+        torch::Tensor tmp = torch::tensor({0.0f, 0.0f, step}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 'q') {
+        torch::Tensor tmp = torch::tensor({0.0f, 0.0f, -step}, torch::kFloat32).view({3, 1}).cuda();
+        pose.index({Slc(0, 3), Slc(3, 4)}) += orientation.matmul(tmp);
+      } else if (pushed_key == 'j') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitZ());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'k') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitY());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'l') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitZ());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'i') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitY());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'o') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(+degree, Eigen::Vector3f::UnitX());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'u') {
+        torch::Tensor rotation_matrix = calc_rotation_tensor(-degree, Eigen::Vector3f::UnitX());
+        orientation = torch::matmul(orientation, rotation_matrix);
+        pose.index({Slc(0, 3), Slc(0, 3)}) = orientation;
+      } else if (pushed_key == 'x') {
+        step += 0.1;
+        std::cout << "step = " << step << std::endl;
+      } else if (pushed_key == 'z') {
+        step -= 0.1;
+        std::cout << "step = " << step << std::endl;
+      } else {
+        std::cout << "Unknown kye: " << pushed_key << std::endl;
+        continue;
+      }
+      render(localizer, pose);
+    }
+  }
+}
diff --git a/localization/pose_initializer/CMakeLists.txt b/localization/pose_initializer/CMakeLists.txt
index 324488bedebfc..fe3ed6afab8df 100644
--- a/localization/pose_initializer/CMakeLists.txt
+++ b/localization/pose_initializer/CMakeLists.txt
@@ -7,6 +7,7 @@ autoware_package()
 ament_auto_add_library(${PROJECT_NAME} SHARED
   src/pose_initializer/pose_initializer_core.cpp
   src/pose_initializer/gnss_module.cpp
+  src/pose_initializer/nerf_module.cpp
   src/pose_initializer/localization_module.cpp
   src/pose_initializer/stop_check_module.cpp
   src/pose_initializer/ekf_localization_trigger_module.cpp
diff --git a/localization/pose_initializer/config/pose_initializer.param.yaml b/localization/pose_initializer/config/pose_initializer.param.yaml
index 90ec78257e6cb..62ecaa7dd16a1 100644
--- a/localization/pose_initializer/config/pose_initializer.param.yaml
+++ b/localization/pose_initializer/config/pose_initializer.param.yaml
@@ -10,6 +10,7 @@
     gnss_enabled: $(var gnss_enabled)
     yabloc_enabled: $(var yabloc_enabled)
     ndt_enabled: $(var ndt_enabled)
+    nerf_enabled: $(var nerf_enabled)
     stop_check_enabled: $(var stop_check_enabled)
 
     # from gnss
diff --git a/localization/pose_initializer/launch/pose_initializer.launch.xml b/localization/pose_initializer/launch/pose_initializer.launch.xml
index 2ffdebf39c474..4e8677cd0e998 100644
--- a/localization/pose_initializer/launch/pose_initializer.launch.xml
+++ b/localization/pose_initializer/launch/pose_initializer.launch.xml
@@ -13,6 +13,7 @@
     <param from="$(var config_file)" allow_substs="true"/>
     <remap from="yabloc_align" to="/localization/pose_estimator/yabloc/initializer/yabloc_align_srv"/>
     <remap from="ndt_align" to="/localization/pose_estimator/ndt_align_srv"/>
+    <remap from="nerf_align" to="/localization/pose_estimator/nerf_align_srv"/>
     <remap from="stop_check_twist" to="/sensing/vehicle_velocity_converter/twist_with_covariance"/>
     <remap from="gnss_pose_cov" to="$(var sub_gnss_pose_cov)"/>
     <remap from="pose_reset" to="/initialpose3d"/>
diff --git a/localization/pose_initializer/src/pose_initializer/nerf_module.cpp b/localization/pose_initializer/src/pose_initializer/nerf_module.cpp
new file mode 100644
index 0000000000000..c5e6d37abe1f0
--- /dev/null
+++ b/localization/pose_initializer/src/pose_initializer/nerf_module.cpp
@@ -0,0 +1,50 @@
+// Copyright 2022 The Autoware Contributors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "nerf_module.hpp"
+
+#include <component_interface_specs/localization.hpp>
+#include <component_interface_utils/rclcpp/exceptions.hpp>
+
+#include <memory>
+
+using ServiceException = component_interface_utils::ServiceException;
+using Initialize = localization_interface::Initialize;
+using PoseWithCovarianceStamped = geometry_msgs::msg::PoseWithCovarianceStamped;
+
+NeRFModule::NeRFModule(rclcpp::Node * node) : logger_(node->get_logger())
+{
+  cli_align_ = node->create_client<RequestPoseAlignment>("nerf_align");
+}
+
+PoseWithCovarianceStamped NeRFModule::align_pose(const PoseWithCovarianceStamped & pose)
+{
+  const auto req = std::make_shared<RequestPoseAlignment::Request>();
+  req->pose_with_covariance = pose;
+
+  if (!cli_align_->service_is_ready()) {
+    throw component_interface_utils::ServiceUnready("NeRF align server is not ready.");
+  }
+
+  RCLCPP_INFO(logger_, "Call NeRF align server.");
+  const auto res = cli_align_->async_send_request(req).get();
+  if (!res->success) {
+    throw ServiceException(
+      Initialize::Service::Response::ERROR_ESTIMATION, "NeRF align server failed.");
+  }
+  RCLCPP_INFO(logger_, "NeRF align server succeeded.");
+
+  // Overwrite the covariance.
+  return res->pose_with_covariance;
+}
diff --git a/localization/pose_initializer/src/pose_initializer/nerf_module.hpp b/localization/pose_initializer/src/pose_initializer/nerf_module.hpp
new file mode 100644
index 0000000000000..18142a0b23fab
--- /dev/null
+++ b/localization/pose_initializer/src/pose_initializer/nerf_module.hpp
@@ -0,0 +1,38 @@
+// Copyright 2022 The Autoware Contributors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef POSE_INITIALIZER__NERF_MODULE_HPP_
+#define POSE_INITIALIZER__NERF_MODULE_HPP_
+
+#include <rclcpp/rclcpp.hpp>
+
+#include <geometry_msgs/msg/pose_with_covariance_stamped.hpp>
+#include <tier4_localization_msgs/srv/pose_with_covariance_stamped.hpp>
+
+class NeRFModule
+{
+private:
+  using PoseWithCovarianceStamped = geometry_msgs::msg::PoseWithCovarianceStamped;
+  using RequestPoseAlignment = tier4_localization_msgs::srv::PoseWithCovarianceStamped;
+
+public:
+  explicit NeRFModule(rclcpp::Node * node);
+  PoseWithCovarianceStamped align_pose(const PoseWithCovarianceStamped & pose);
+
+private:
+  rclcpp::Logger logger_;
+  rclcpp::Client<RequestPoseAlignment>::SharedPtr cli_align_;
+};
+
+#endif  // POSE_INITIALIZER__NERF_MODULE_HPP_
diff --git a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp
index 6e023a1309c2d..e7f8d01f2c0c3 100644
--- a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp
+++ b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.cpp
@@ -19,6 +19,7 @@
 #include "gnss_module.hpp"
 #include "localization_module.hpp"
 #include "ndt_localization_trigger_module.hpp"
+#include "nerf_module.hpp"
 #include "stop_check_module.hpp"
 
 #include <memory>
@@ -50,6 +51,9 @@ PoseInitializer::PoseInitializer(const rclcpp::NodeOptions & options)
     ndt_ = std::make_unique<LocalizationModule>(this, "ndt_align");
     ndt_localization_trigger_ = std::make_unique<NdtLocalizationTriggerModule>(this);
   }
+  if (declare_parameter<bool>("nerf_enabled", false)) {
+    nerf_ = std::make_unique<NeRFModule>(this);
+  }
   if (declare_parameter<bool>("stop_check_enabled")) {
     // Add 1.0 sec margin for twist buffer.
     stop_check_duration_ = declare_parameter<double>("stop_check_duration");
@@ -157,6 +161,8 @@ void PoseInitializer::on_initialize(
         // If both the NDT and YabLoc initializer are enabled, prioritize NDT as it offers more
         // accuracy pose.
         pose = yabloc_->align_pose(pose);
+      } else if (nerf_) {
+        pose = nerf_->align_pose(pose);
       }
       pose.pose.covariance = output_pose_covariance_;
       pub_reset_->publish(pose);
diff --git a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp
index 0a9a0614f6d20..8acfd4b784f2a 100644
--- a/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp
+++ b/localization/pose_initializer/src/pose_initializer/pose_initializer_core.hpp
@@ -29,6 +29,7 @@ class LocalizationModule;
 class GnssModule;
 class EkfLocalizationTriggerModule;
 class NdtLocalizationTriggerModule;
+class NeRFModule;
 
 class PoseInitializer : public rclcpp::Node
 {
@@ -49,6 +50,7 @@ class PoseInitializer : public rclcpp::Node
   std::array<double, 36> output_pose_covariance_{};
   std::array<double, 36> gnss_particle_covariance_{};
   std::unique_ptr<GnssModule> gnss_;
+  std::unique_ptr<NeRFModule> nerf_;
   std::unique_ptr<LocalizationModule> ndt_;
   std::unique_ptr<LocalizationModule> yabloc_;
   std::unique_ptr<StopCheckModule> stop_check_;