diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index f6057fdd..fc1afef8 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,7 +1,7 @@
{
- "name": "NerfStudio Dev Container",
+ "name": "Neurad-Studio Dev Container",
"dockerComposeFile": "docker-compose.yml",
- "service": "nerfstudio",
+ "service": "neurad-studio",
"workspaceFolder": "/workspace",
"shutdownAction": "stopCompose",
"extensions": [
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
index 5cbf79cc..9ac8c770 100644
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -1,9 +1,9 @@
version: "3.8"
services:
- nerfstudio:
+ neurad-studio:
shm_size: '12gb'
- image: nerfstudio:latest
+ image: neurad-studio:latest
build:
context: ..
dockerfile: ./Dockerfile
@@ -25,4 +25,4 @@ services:
devices:
- driver: nvidia
count: 1
- capabilities: [ gpu ]
\ No newline at end of file
+ capabilities: [ gpu ]
diff --git a/.github/workflows/core_code_checks.yml b/.github/workflows/core_code_checks.yml
index 556432bf..1b4e9aae 100644
--- a/.github/workflows/core_code_checks.yml
+++ b/.github/workflows/core_code_checks.yml
@@ -26,9 +26,6 @@ jobs:
- name: Install dependencies
run: |
pip install --upgrade --upgrade-strategy eager -e .[dev]
- - name: Run license checks
- run: |
- ./nerfstudio/scripts/licensing/license_headers.sh --check
- name: Check notebook cell metadata
run: |
python ./nerfstudio/scripts/docs/add_nb_tags.py --check
@@ -36,9 +33,6 @@ jobs:
run: ruff check docs/ nerfstudio/ tests/ --output-format=github
- name: Run Ruff Formatter
run: ruff format docs/ nerfstudio/ tests/ --diff
- - name: Run Pyright
- run: |
- pyright
- name: Test with pytest
run: |
pytest
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
deleted file mode 100644
index 7b46b342..00000000
--- a/.github/workflows/doc.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Docs
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
- workflow_dispatch:
-
-permissions:
- contents: write
-jobs:
- docs:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v3
- with:
- python-version: '3.9'
- - name: Install dependencies
- run: |
- pip install .[docs]
- - name: Sphinx build
- # fail on warnings
- run: |
- sphinx-build docs _build -W --keep-going
- - name: Deploy
- uses: peaceiris/actions-gh-pages@v3
- with:
- publish_branch: gh-pages
- github_token: ${{ secrets.GITHUB_TOKEN }}
- publish_dir: _build/
- force_orphan: true
- cname: docs.nerf.studio
- if: github.event_name != 'pull_request'
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
deleted file mode 100644
index b7598453..00000000
--- a/.github/workflows/publish.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-# This workflows will upload a Python Package using twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-name: Upload Python Package
-
-on:
- release:
- types: [created]
-
-jobs:
- deploy:
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python
- uses: actions/setup-python@v1
- with:
- python-version: '3.8'
- - name: Install dependencies
- run: |
- python -m pip install build twine
- - name: Strip unsupported tags in README
- run: |
- sed -i '//,//d' README.md
- - name: Build and publish
- env:
- PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
- run: |
- python -m build
- twine upload --username __token__ --password $PYPI_TOKEN dist/*
diff --git a/.gitignore b/.gitignore
index 04522bdf..078875ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,6 +165,7 @@ cython_debug/
outputs/
exports/
renders/
+checkpoints/
*.mp4
*.gif
*.zip
diff --git a/Dockerfile b/Dockerfile
index f4bfeca9..802f0d54 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -101,6 +101,7 @@ RUN git clone --branch 3.8 https://github.com/colmap/colmap.git --single-branch
# Upgrade pip and install packages.
RUN python3.10 -m pip install --no-cache-dir --upgrade pip setuptools pathtools promise pybind11
+SHELL ["/bin/bash", "-c"]
# Install pytorch and submodules
RUN CUDA_VER=${CUDA_VERSION%.*} && CUDA_VER=${CUDA_VER//./} && python3.10 -m pip install --no-cache-dir \
torch==2.0.1+cu${CUDA_VER} \
@@ -137,13 +138,18 @@ RUN git clone --recursive https://github.com/cvg/pixel-perfect-sfm.git && \
python3.10 -m pip install --no-cache-dir -e . && \
cd ..
-RUN python3.10 -m pip install --no-cache-dir omegaconf
-# Copy nerfstudio folder and give ownership to user.
+# Install waymo-open-dataset
+RUN python3.10 -m pip install --no-cache-dir waymo-open-dataset-tf-2-11-0==1.6.1
+
+# Copy nerfstudio folder.
ADD . /nerfstudio
# Install nerfstudio dependencies.
RUN cd /nerfstudio && python3.10 -m pip install --no-cache-dir -e .
+# Make sure viser client is built
+RUN python -c "import viser; viser.ViserServer()"
+
# Change working directory
WORKDIR /workspace
diff --git a/README.md b/README.md
index d5896662..600fcd8d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-
[Quickstart](#quickstart) ยท
@@ -33,7 +33,40 @@ This is the official code release of the CVPR 2024 paper _NeuRAD: Neural Renderi
In line with Nerfstudio's mission, this is a contributor-friendly repo with the goal of building a community where users can more easily build upon each other's contributions.
-Do you have feature requests or want to add **your** new AD-NeRF model? Or maybe provide structures for a new dataset? **We welcome [contributions](https://docs.nerf.studio/reference/contributing.html)!**
+Do you have feature requests or want to add **your** new AD-NeRF model? Or maybe provide structures for a new dataset? **We welcome contributions!**
+
+
# Quickstart
@@ -48,10 +81,10 @@ Our installation steps largely follow Nerfstudio, with some added dataset-specif
### Create environment
-NeuRAD requires `python >= 3.8`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/miniconda.html) before proceeding.
+NeuRAD requires `python >= 3.10`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/miniconda.html) before proceeding.
```bash
-conda create --name neurad -y python=3.8
+conda create --name neurad -y python=3.10
conda activate neurad
pip install --upgrade pip
```
@@ -64,18 +97,27 @@ Install PyTorch with CUDA (this repo has been tested with CUDA 11.7 and CUDA 11.
For CUDA 11.8:
```bash
-pip install torch==2.1.2+cu118 torchvision==0.16.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
+pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
conda install -c "nvidia/label/cuda-11.8.0" cuda-toolkit
+
+# Some need to upgrade dill prior to tiny-cuda-nn install
+pip install dill --upgrade
+
pip install ninja git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
```
+For support of Waymo-Open-Dataset v2 (requires python3.10, also dependencies from this package are very strict so cannot add it to pyproject.toml and need install first):
+```bash
+pip install waymo-open-dataset-tf-2-11-0==1.6.1
+```
+
We refer to [Nerfstudio](https://github.com/nerfstudio-project/nerfstudio/blob/v1.0.3/docs/quickstart/installation.md) for more installation support.
### Installing NeuRAD
```bash
-git clone https://github.com/georghess/neurad.git
-cd neurad
+git clone https://github.com/georghess/neurad-studio.git
+cd neurad-studio
pip install --upgrade pip setuptools
pip install -e .
```
@@ -90,7 +132,8 @@ The following will train a _NeuRAD_ model, our recommended model for real world
### Data preparation
-Begin by downloading [PandaSet](https://www.kaggle.com/datasets/usharengaraju/pandaset-dataset/data) and place it under ```data/pandaset```.
+Begin by downloading [PandaSet](https://huggingface.co/datasets/georghess/pandaset) and unzip it under ```data/pandaset```.
+The dataset is no longer hosted by Scale but can be downloaded from the provided huggingface link.
### Training
@@ -113,6 +156,10 @@ Navigating to the link at the end of the terminal will load the webviewer. If yo
+### Troubleshooting
+
+If you run into issues, it could be due to the training taking up too much memory. You can try to adjust the model parameters according to the neurad-tiny [vscode launch config](https://github.com/georghess/neurad-studio/blob/master/.vscode/launch.json#L51).
+
### Resume from checkpoint / visualize existing run
It is possible to load a pretrained model by running
@@ -180,16 +227,23 @@ To add a dataset, create `nerfstudio/data/dataparsers/mydataset.py` containing o
| Data | Cameras | Lidars |
| --------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- |
| ๐ [nuScenes](https://www.nuscenes.org/) | 6 cameras | 32-beam lidar |
-| ๐ [ZOD](https://zod.zenseact.com/) | 1 camera | 128-beam + 2 x 16-beam lidars |
+| ๐ [ZOD](https://zod.zenseact.com/) ([Annotations](https://github.com/user-attachments/files/15773566/auto_annotations.zip)) | 1 camera | 128-beam + 2 x 16-beam lidars |
| ๐ [Argoverse 2](https://www.argoverse.org/av2.html) | 7 ring cameras + 2 stereo cameras | 2 x 32-beam lidars |
-| ๐ [PandaSet](https://pandaset.org/) | 6 cameras | 64-beam lidar |
-| ๐ [KITTIMOT](https://www.cvlibs.net/datasets/kitti/eval_tracking.php) | 2 stereo cameras | 64-beam lidar
-
+| ๐ [PandaSet](https://pandaset.org/) ([huggingface download](https://huggingface.co/datasets/georghess/pandaset)) | 6 cameras | 64-beam lidar |
+| ๐ [KITTIMOT](https://www.cvlibs.net/datasets/kitti/eval_tracking.php) ([Timestamps](https://www.cvlibs.net/datasets/kitti/raw_data.php)) | 2 stereo cameras | 64-beam lidar
+| ๐ [Waymo v2](https://waymo.com/open/) | 5 cameras | 64-beam lidar
+A brief introduction about Waymo dataparser for NeuRAD can be found in [waymo_dataparser.md](./nerfstudio/data//dataparsers/waymo_dataparser.md)
## Adding Methods
-Nerfstudio has made it easy to add new methods, see [here](https://docs.nerf.studio/developer_guides/new_methods.html) for details. We plan to examplify this using our UniSim reimplementation, to be released soon.
+Nerfstudio has made it easy to add new methods, see [here](https://docs.nerf.studio/developer_guides/new_methods.html) for details. We have added [our UniSim reimplementation](https://github.com/carlinds/unisim) as a plugin, which can be run as any other method using the `ns-train` command:
+```bash
+ns-train unisim pandaset-data --data data/pandaset
+```
+and follow the instructions in the terminal.
+
+See [our UniSim repo](https://github.com/carlinds/unisim) for reference on how to add a new method as a plugin.
# Key features
- Dataparser for multiple autonomous driving datasets including
@@ -205,20 +259,17 @@ Nerfstudio has made it easy to add new methods, see [here](https://docs.nerf.stu
# Planned Features/TODOs
-- [ ] UniSim plug-in
+- [ ] 3DGS implementation supporting dynamic objects
+- [x] UniSim plug-in
- [x] Release code
# Built On
-
-
-
-
- Collaboration friendly studio for NeRFs
@@ -240,6 +291,20 @@ If you use this code or find our paper useful, please consider citing:
# Contributors
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+\+ [nerfstudio contributors](https://github.com/nerfstudio-project/nerfstudio/graphs/contributors)
diff --git a/apptainer_recipe b/apptainer_recipe
index 0b75583d..30fb5faf 100644
--- a/apptainer_recipe
+++ b/apptainer_recipe
@@ -57,6 +57,9 @@ TCNN_CUDA_ARCHITECTURES="90;89;86;80;75" python3.10 -m pip install --no-cache-di
# Install nerfstudio dependencies.
cd /nerfstudio && python3.10 -m pip install --no-cache-dir -e .
+# Make sure viser client is built
+python3.10 -c "import viser; viser.ViserServer()"
+
# Change working directory
mkdir -p /workspace
cd /workspace
diff --git a/docs/_static/imgs/EN_Avancez_CH_black.png b/docs/_static/imgs/EN_Avancez_CH_black.png
new file mode 100644
index 00000000..66a32398
Binary files /dev/null and b/docs/_static/imgs/EN_Avancez_CH_black.png differ
diff --git a/docs/_static/imgs/EN_Avancez_CH_white.png b/docs/_static/imgs/EN_Avancez_CH_white.png
new file mode 100644
index 00000000..c06701f1
Binary files /dev/null and b/docs/_static/imgs/EN_Avancez_CH_white.png differ
diff --git a/docs/_static/imgs/LiU_secondary_1_black-PNG.png b/docs/_static/imgs/LiU_secondary_1_black-PNG.png
new file mode 100644
index 00000000..4b556b7d
Binary files /dev/null and b/docs/_static/imgs/LiU_secondary_1_black-PNG.png differ
diff --git a/docs/_static/imgs/LiU_secondary_1_white-PNG.png b/docs/_static/imgs/LiU_secondary_1_white-PNG.png
new file mode 100644
index 00000000..4205f15b
Binary files /dev/null and b/docs/_static/imgs/LiU_secondary_1_white-PNG.png differ
diff --git a/docs/_static/imgs/LundUniversity_C2line_BLACK.png b/docs/_static/imgs/LundUniversity_C2line_BLACK.png
new file mode 100644
index 00000000..5a585120
Binary files /dev/null and b/docs/_static/imgs/LundUniversity_C2line_BLACK.png differ
diff --git a/docs/_static/imgs/LundUniversity_C2line_NEG.png b/docs/_static/imgs/LundUniversity_C2line_NEG.png
new file mode 100644
index 00000000..1c343071
Binary files /dev/null and b/docs/_static/imgs/LundUniversity_C2line_NEG.png differ
diff --git a/docs/_static/imgs/NeuRAD-RS-Waymo-Front.png b/docs/_static/imgs/NeuRAD-RS-Waymo-Front.png
new file mode 100644
index 00000000..10befb02
Binary files /dev/null and b/docs/_static/imgs/NeuRAD-RS-Waymo-Front.png differ
diff --git a/docs/_static/imgs/NeuRAD-RS-Waymo-Left.png b/docs/_static/imgs/NeuRAD-RS-Waymo-Left.png
new file mode 100644
index 00000000..3ed25d9f
Binary files /dev/null and b/docs/_static/imgs/NeuRAD-RS-Waymo-Left.png differ
diff --git a/docs/_static/imgs/NeuRAD-RS-Waymo-Right.png b/docs/_static/imgs/NeuRAD-RS-Waymo-Right.png
new file mode 100644
index 00000000..eb5666d2
Binary files /dev/null and b/docs/_static/imgs/NeuRAD-RS-Waymo-Right.png differ
diff --git a/docs/_static/imgs/WASP-logotype-white.png b/docs/_static/imgs/WASP-logotype-white.png
new file mode 100644
index 00000000..85666ad2
Binary files /dev/null and b/docs/_static/imgs/WASP-logotype-white.png differ
diff --git a/docs/_static/imgs/WASP_logotyp_grey_180116.png b/docs/_static/imgs/WASP_logotyp_grey_180116.png
new file mode 100644
index 00000000..63fdf3ce
Binary files /dev/null and b/docs/_static/imgs/WASP_logotyp_grey_180116.png differ
diff --git a/docs/_static/imgs/ZEN_Vertical_logo_black.svg b/docs/_static/imgs/ZEN_Vertical_logo_black.svg
new file mode 100644
index 00000000..04982695
--- /dev/null
+++ b/docs/_static/imgs/ZEN_Vertical_logo_black.svg
@@ -0,0 +1 @@
+
diff --git a/docs/_static/imgs/ZEN_Vertical_logo_white.svg b/docs/_static/imgs/ZEN_Vertical_logo_white.svg
new file mode 100644
index 00000000..7ad20708
--- /dev/null
+++ b/docs/_static/imgs/ZEN_Vertical_logo_white.svg
@@ -0,0 +1 @@
+
diff --git a/docs/_static/imgs/neurad_logo.png b/docs/_static/imgs/neurad_logo.png
new file mode 100644
index 00000000..d13a720d
Binary files /dev/null and b/docs/_static/imgs/neurad_logo.png differ
diff --git a/docs/_static/imgs/neurad_logo_horizontal_dark.png b/docs/_static/imgs/neurad_logo_horizontal_dark.png
new file mode 100644
index 00000000..20cfc958
Binary files /dev/null and b/docs/_static/imgs/neurad_logo_horizontal_dark.png differ
diff --git a/docs/_static/imgs/neurad_logo_horizontal_light.png b/docs/_static/imgs/neurad_logo_horizontal_light.png
new file mode 100644
index 00000000..74ceb1b9
Binary files /dev/null and b/docs/_static/imgs/neurad_logo_horizontal_light.png differ
diff --git a/docs/_static/imgs/neurad_logo_with_text.png b/docs/_static/imgs/neurad_logo_with_text.png
index 62b5dbfb..581b2395 100644
Binary files a/docs/_static/imgs/neurad_logo_with_text.png and b/docs/_static/imgs/neurad_logo_with_text.png differ
diff --git a/docs/_static/imgs/neurad_logo_with_text_dark.png b/docs/_static/imgs/neurad_logo_with_text_dark.png
index f84fa45e..c384435a 100644
Binary files a/docs/_static/imgs/neurad_logo_with_text_dark.png and b/docs/_static/imgs/neurad_logo_with_text_dark.png differ
diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py
index c865cd13..a723970b 100644
--- a/nerfstudio/cameras/cameras.py
+++ b/nerfstudio/cameras/cameras.py
@@ -921,12 +921,22 @@ def _compute_rays_for_vr180(
if self.metadata and "rolling_shutter_offsets" in self.metadata and "velocities" in self.metadata:
cam_idx = camera_indices.squeeze(-1)
- heights, rows = self.height[cam_idx], coords[..., 0:1]
- duration = self.metadata["rolling_shutter_offsets"][cam_idx].diff()
- time_offsets = rows / heights * duration + self.metadata["rolling_shutter_offsets"][cam_idx][..., 0:1]
+ offsets = self.metadata["rolling_shutter_offsets"][cam_idx]
+ duration = offsets.diff()
+ if "rs_direction" in metadata and metadata["rs_direction"] == "Horizontal":
+ # wod (LEFT_TO_RIGHT or RIGHT_TO_LEFT)
+ width, cols = self.width[cam_idx], coords[..., 1:2]
+ time_offsets = cols / width * duration + offsets[..., 0:1]
+ else:
+ # pandaset (TOP_TO_BOTTOM)
+ heights, rows = self.height[cam_idx], coords[..., 0:1]
+ time_offsets = rows / heights * duration + offsets[..., 0:1]
+
origins = origins + self.metadata["velocities"][cam_idx] * time_offsets
times = times + time_offsets
del metadata["rolling_shutter_offsets"] # it has served its purpose
+ if "rs_direction" in metadata:
+ del metadata["rs_direction"] # it has served its purpose
return RayBundle(
origins=origins,
diff --git a/nerfstudio/cameras/lidars.py b/nerfstudio/cameras/lidars.py
index 811439ce..9e9b5af9 100644
--- a/nerfstudio/cameras/lidars.py
+++ b/nerfstudio/cameras/lidars.py
@@ -22,6 +22,7 @@
import numpy as np
import torch
+#import torch._dynamo
from jaxtyping import Float, Int
from matplotlib import pyplot as plt
from torch import Tensor
@@ -35,6 +36,7 @@
from nerfstudio.utils.misc import strtobool, torch_compile
from nerfstudio.utils.tensor_dataclass import TensorDataclass
+#torch._dynamo.config.suppress_errors = True
TORCH_DEVICE = Union[torch.device, str] # pylint: disable=invalid-name
HORIZONTAL_BEAM_DIVERGENCE = 3.0e-3 # radians, or meters at a distance of 1m
@@ -50,6 +52,7 @@ class LidarType(Enum):
VELODYNE64E = auto()
VELODYNE128 = auto()
PANDAR64 = auto()
+ WOD64 = auto()
LIDAR_MODEL_TO_TYPE = {
@@ -59,6 +62,7 @@ class LidarType(Enum):
"VELODYNE64E": LidarType.VELODYNE64E,
"VELODYNE128": LidarType.VELODYNE128,
"PANDAR64": LidarType.PANDAR64,
+ "WOD64": LidarType.WOD64,
}
diff --git a/nerfstudio/configs/dataparser_configs.py b/nerfstudio/configs/dataparser_configs.py
index efe37b56..23b7846c 100644
--- a/nerfstudio/configs/dataparser_configs.py
+++ b/nerfstudio/configs/dataparser_configs.py
@@ -24,19 +24,19 @@
from nerfstudio.data.dataparsers.argoverse2_dataparser import Argoverse2DataParserConfig
from nerfstudio.data.dataparsers.base_dataparser import DataParserConfig
from nerfstudio.data.dataparsers.kittimot_dataparser import KittiMotDataParserConfig
-from nerfstudio.data.dataparsers.minimal_dataparser import MinimalDataParserConfig
from nerfstudio.data.dataparsers.nuscenes_dataparser import NuScenesDataParserConfig
from nerfstudio.data.dataparsers.pandaset_dataparser import PandaSetDataParserConfig
+from nerfstudio.data.dataparsers.wod_dataparser import WoDParserConfig
from nerfstudio.data.dataparsers.zod_dataparser import ZodDataParserConfig
from nerfstudio.plugins.registry_dataparser import discover_dataparsers
dataparsers = {
- "minimal-parser": MinimalDataParserConfig(),
"kittimot-data": KittiMotDataParserConfig(),
"nuscenes-data": NuScenesDataParserConfig(),
"argoverse2-data": Argoverse2DataParserConfig(),
"zod-data": ZodDataParserConfig(),
"pandaset-data": PandaSetDataParserConfig(),
+ "wod-data": WoDParserConfig(),
}
external_dataparsers, _ = discover_dataparsers()
diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py
index 06579495..26b4d21c 100644
--- a/nerfstudio/configs/external_methods.py
+++ b/nerfstudio/configs/external_methods.py
@@ -43,20 +43,19 @@ class ExternalMethod:
external_methods = []
-# TODO: add UniSim
-# external_methods.append(
-# ExternalMethod(
-# """[bold yellow]UniSim[/bold yellow]
-
-# To enable UniSim, you must install it first by running:
-# [grey]pip install git+https://github.com/.../UniSim[/grey]""",
-# configurations=[
-# ("unisim", "UniSim reproduction, as specified in the paper"),
-# ("unisim-tweaked", "UniSim with tweaked hyperparameters"),
-# ],
-# pip_package="git+https://github.com/.../unisim",
-# )
-# )
+external_methods.append(
+ ExternalMethod(
+ """[bold yellow]UniSim[/bold yellow]
+
+To enable UniSim, you must install it first by running:
+ [grey]pip install git+https://github.com/carlinds/unisim[/grey]""",
+ configurations=[
+ ("unisim", "UniSim reproduction, as specified in the paper"),
+ ("unisim++", "UniSim with some improvements/tweaks"),
+ ],
+ pip_package="git+https://github.com/carlinds/unisim",
+ )
+)
@dataclass
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index 9c6986fd..1f2d4f9a 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -21,6 +21,7 @@
from collections import OrderedDict
from copy import deepcopy
+from pathlib import Path
from typing import Dict, Union
import tyro
@@ -31,7 +32,7 @@
from nerfstudio.data.datamanagers.ad_datamanager import ADDataManagerConfig
from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig
-from nerfstudio.data.dataparsers.ad_dataparser import ADDataParserConfig
+from nerfstudio.data.dataparsers.pandaset_dataparser import PandaSetDataParserConfig
from nerfstudio.engine.optimizers import AdamOptimizerConfig, AdamWOptimizerConfig, RAdamOptimizerConfig
from nerfstudio.engine.schedulers import ExponentialDecaySchedulerConfig
from nerfstudio.engine.trainer import TrainerConfig
@@ -60,7 +61,7 @@
mixed_precision=True,
pipeline=VanillaPipelineConfig(
datamanager=ParallelDataManagerConfig(
- dataparser=ADDataParserConfig(),
+ dataparser=PandaSetDataParserConfig(),
train_num_rays_per_batch=4096,
eval_num_rays_per_batch=4096,
),
@@ -96,7 +97,7 @@
mixed_precision=True,
pipeline=VanillaPipelineConfig(
datamanager=ParallelDataManagerConfig(
- dataparser=ADDataParserConfig(),
+ dataparser=PandaSetDataParserConfig(),
train_num_rays_per_batch=8192,
eval_num_rays_per_batch=4096,
),
@@ -140,7 +141,7 @@
mixed_precision=True,
pipeline=VanillaPipelineConfig(
datamanager=ParallelDataManagerConfig(
- dataparser=ADDataParserConfig(),
+ dataparser=PandaSetDataParserConfig(),
train_num_rays_per_batch=16384,
eval_num_rays_per_batch=4096,
),
@@ -187,6 +188,7 @@
max_num_iterations=30000,
mixed_precision=True,
pipeline=ADPipelineConfig(
+ datamanager=ADDataManagerConfig(dataparser=PandaSetDataParserConfig()),
calc_fid_steps=(99999999,),
model=LidarNerfactoModelConfig(
eval_num_rays_per_chunk=1 << 15,
@@ -221,7 +223,7 @@
mixed_precision=False,
pipeline=VanillaPipelineConfig(
datamanager=FullImageDatamanagerConfig(
- dataparser=ADDataParserConfig(), # load_3D_points=True),
+ dataparser=PandaSetDataParserConfig(sequence="028"), # use static sequence
cache_images_type="uint8",
),
model=SplatfactoModelConfig(),
@@ -270,7 +272,7 @@
mixed_precision=False,
pipeline=VanillaPipelineConfig(
datamanager=FullImageDatamanagerConfig(
- dataparser=ADDataParserConfig(), # load_3D_points=True),
+ dataparser=PandaSetDataParserConfig(sequence="028"), # use static sequence
cache_images_type="uint8",
),
model=SplatfactoModelConfig(
@@ -321,7 +323,7 @@
mixed_precision=True,
pipeline=ADPipelineConfig(
calc_fid_steps=(99999999,),
- datamanager=ADDataManagerConfig(dataparser=ADDataParserConfig(add_missing_points=True)),
+ datamanager=ADDataManagerConfig(dataparser=PandaSetDataParserConfig(add_missing_points=True)),
model=NeuRADModelConfig(
eval_num_rays_per_chunk=1 << 15,
camera_optimizer=CameraOptimizerConfig(mode="off"), # SO3xR3
@@ -360,6 +362,7 @@
method_configs["neurad-scaleopt"].pipeline.model.camera_optimizer = ScaledCameraOptimizerConfig(
weights=(1.0, 1.0, 0.01, 0.01, 0.01, 1.0), # xrot, yrot, zrot, xtrans, ytrans, ztrans
trans_l2_penalty=(1e-2, 1e-2, 1e-3), # x, y, z
+ mode="SO3xR3",
)
@@ -371,6 +374,7 @@ def _scaled_neurad_training(config: TrainerConfig, scale: float, newname: str) -
config.steps_per_eval_image = int(config.steps_per_eval_image * scale)
config.steps_per_eval_all_images = int(config.steps_per_eval_all_images * scale)
config.steps_per_save = int(config.steps_per_save * scale)
+ assert isinstance(config.pipeline, ADPipelineConfig)
config.pipeline.calc_fid_steps = tuple(int(scale * s) for s in config.pipeline.calc_fid_steps)
for optimizer in config.optimizers.values():
optimizer["scheduler"].max_steps = int(optimizer["scheduler"].max_steps * scale)
@@ -380,7 +384,6 @@ def _scaled_neurad_training(config: TrainerConfig, scale: float, newname: str) -
# Bigger, better, longer, stronger
method_configs["neurader"] = _scaled_neurad_training(method_configs["neurad"], 2.5, "neurader")
-method_configs["neurader"].method_name = "neurader"
for optimizer in method_configs["neurader"].optimizers.values():
optimizer["optimizer"].lr *= 0.5
optimizer["scheduler"].lr_final *= 0.5
@@ -402,6 +405,9 @@ def _scaled_neurad_training(config: TrainerConfig, scale: float, newname: str) -
method_configs["neuradest-scaleopt"] = _scaled_neurad_training(
method_configs["neurader-scaleopt"], 3, "neuradest-scaleopt"
)
+for optimizer in method_configs["neuradest-scaleopt"].optimizers.values():
+ optimizer["optimizer"].lr *= 0.5
+ optimizer["scheduler"].lr_final *= 0.5
# Configurations matching the paper (disable temporal appearance and actor flip)
method_configs["neurad-paper"] = deepcopy(method_configs["neurad"])
diff --git a/nerfstudio/data/datamanagers/ad_datamanager.py b/nerfstudio/data/datamanagers/ad_datamanager.py
index 39fe6985..170a80d6 100644
--- a/nerfstudio/data/datamanagers/ad_datamanager.py
+++ b/nerfstudio/data/datamanagers/ad_datamanager.py
@@ -80,9 +80,12 @@ def change_patch_sampler(self, patch_scale: int, patch_size: int):
return
# Change train
- for func_queue in self.func_queues:
- func_queue.put((_worker_change_patch_sampler, (patch_scale, patch_size), {}))
- self.clear_data_queue() # remove any old, invalid, batch
+ if self.use_mp:
+ for func_queue in self.func_queues:
+ func_queue.put((_worker_change_patch_sampler, (patch_scale, patch_size), {}))
+ self.clear_data_queue() # remove any old, invalid, batch
+ else:
+ _worker_change_patch_sampler(self.data_procs[0], patch_scale, patch_size)
# Change eval
self.eval_pixel_sampler.patch_scale = patch_scale
diff --git a/nerfstudio/data/dataparsers/ad_dataparser.py b/nerfstudio/data/dataparsers/ad_dataparser.py
index 94bce83f..73ddb663 100644
--- a/nerfstudio/data/dataparsers/ad_dataparser.py
+++ b/nerfstudio/data/dataparsers/ad_dataparser.py
@@ -130,7 +130,7 @@ class ADDataParser(DataParser):
@property
def actor_transform(self) -> Tensor:
- """The transform to convert from our actor frame (x-right, y-forward, z-up) to the original actor frame."""
+ """Transform to convert from our actor frame (x-right, y-forward, z-up) to the original actor frame (3x4)."""
return torch.eye(4)[:3, :]
def _get_cameras(self) -> Tuple[Cameras, List[Path]]:
@@ -224,7 +224,7 @@ def _generate_dataparser_outputs(self, split="train"):
mask_filenames=None, # TODO: handle masks
dataparser_scale=1.0, # no scaling
dataparser_transform=dataparser_transform,
- actor_transform=self.actor_transform,
+ actor_transform=self.actor_transform[:3, :].float(),
time_offset=time_offset,
metadata={
"lidars": lidars,
@@ -276,8 +276,8 @@ def _filter_based_on_time(
end_time = times.max().item()
start_time = times.min().item()
duration = end_time - start_time
- start_time += duration * self.config.dataset_start_fraction
end_time = start_time + duration * self.config.dataset_end_fraction
+ start_time += duration * self.config.dataset_start_fraction
cameras, img_filenames = _filter_sensordata_on_time(cameras, img_filenames, start_time, end_time)
lidars, pc_filenames = _filter_sensordata_on_time(lidars, pc_filenames, start_time, end_time)
diff --git a/nerfstudio/data/dataparsers/argoverse2_dataparser.py b/nerfstudio/data/dataparsers/argoverse2_dataparser.py
index 138de0a0..7283e3c0 100644
--- a/nerfstudio/data/dataparsers/argoverse2_dataparser.py
+++ b/nerfstudio/data/dataparsers/argoverse2_dataparser.py
@@ -14,7 +14,6 @@
"""Dataparser for the argoverse2 dataset."""
-
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Literal, Tuple, Type
@@ -143,7 +142,7 @@ class Argoverse2DataParserConfig(ADDataParserConfig):
"""target class to instantiate"""
sequence: str = "2b044433-ddc1-3580-b560-d46474934089"
"""name of the sequence to use"""
- data: Path = Path("data/argoverse2")
+ data: Path = Path("data/av2")
"""path to the dataset"""
cameras: Tuple[
Literal[
@@ -187,9 +186,9 @@ class Argoverse2(ADDataParser):
@property
def actor_transform(self) -> torch.Tensor:
"""Argo uses x-forward, so we need to rotate to x-right."""
- wlh_to_lwh = np.eye(4)
+ wlh_to_lwh = np.eye(4, dtype=np.float32)
wlh_to_lwh[:3, :3] = WLH_TO_LWH
- return torch.from_numpy(wlh_to_lwh)
+ return torch.from_numpy(wlh_to_lwh)[:3, :]
def _get_cameras(self) -> Tuple[Cameras, List[Path]]:
"""Returns camera info and image filenames."""
@@ -346,14 +345,13 @@ def _read_lidars(self, lidars: Lidars, filepaths: List[Path]) -> List[torch.Tens
missing_points = []
+ assert lidars.times is not None # typehints
+ assert lidars.metadata is not None # typehints
+ down2up = lidars.metadata["down2up"].view(len(lidars.times), 3, 4)[0]
if self.config.add_missing_points:
- assert lidars.metadata is not None # typehints
- assert lidars.times is not None # typehints
- down2up = lidars.metadata["down2up"].view(len(lidars.times), 3, 4)[0]
poses_down = lidars.metadata["poses_down"].view(len(lidars.times), 4, 4)
poses_up = lidars.lidar_to_worlds
times = lidars.times
-
log_pose_df = io_utils.read_feather(
self.av2._data_dir / self.config.sequence / "city_SE3_egovehicle.feather"
)
diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py
index 9ec52eea..ca5596a8 100644
--- a/nerfstudio/data/dataparsers/base_dataparser.py
+++ b/nerfstudio/data/dataparsers/base_dataparser.py
@@ -68,7 +68,7 @@ class DataparserOutputs:
"""
dataparser_transform: Float[Tensor, "3 4"] = torch.eye(4)[:3, :]
"""Transform applied by the dataparser to the entire scene."""
- actor_transform: Float[Tensor, "3 4"] = torch.eye(4)[:3, :]
+ actor_transform: Float[Tensor, "3 4"] = torch.eye(4, dtype=torch.float32)[:3, :]
"""Transform applied by the dataparser to each actor's local frame."""
dataparser_scale: float = 1.0
"""Scale applied by the dataparser."""
diff --git a/nerfstudio/data/dataparsers/kittimot_dataparser.py b/nerfstudio/data/dataparsers/kittimot_dataparser.py
index ac7520af..4df6850f 100644
--- a/nerfstudio/data/dataparsers/kittimot_dataparser.py
+++ b/nerfstudio/data/dataparsers/kittimot_dataparser.py
@@ -65,7 +65,7 @@
MARS_SEQ_TO_START_FRAME["0006"] = 5 # taken from the paper
MARS_SEQ_TO_END_FRAME["0006"] = 260 # taken from the paper
-RIGHT_FRONT_UP2RIGHT_DOWN_FRONT = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
+RIGHT_FRONT_UP2RIGHT_DOWN_FRONT = np.array([[1.0, 0, 0, 0], [0, 0, -1.0, 0], [0, 1.0, 0, 0], [0, 0, 0, 1.0]])
@dataclass
@@ -77,7 +77,7 @@ class KittiMotDataParserConfig(ADDataParserConfig):
sequence: str = "0006"
"""Name of the scene."""
data: Path = Path("data/kittimot")
- """Path to ZOD dataset."""
+ """Path to KITTI-MOT dataset."""
split: Literal["training"] = "training" # we do not have labels for testing set...
"""Which split to use."""
cameras: Tuple[Literal["image_02", "image_03", "none", "all"], ...] = ("image_02", "image_03")
@@ -107,7 +107,7 @@ class KittiMot(ADDataParser):
@property
def actor_transform(self) -> Tensor:
"""The transform needed to convert the actor poses to our desired format (x-right, y-forward, z-up)."""
- return torch.from_numpy(RIGHT_FRONT_UP2RIGHT_DOWN_FRONT)
+ return torch.from_numpy(RIGHT_FRONT_UP2RIGHT_DOWN_FRONT)[:3, :]
def _get_cameras(self) -> Tuple[Cameras, List[Path]]:
"""Returns camera info and image filenames."""
diff --git a/nerfstudio/data/dataparsers/minimal_dataparser.py b/nerfstudio/data/dataparsers/minimal_dataparser.py
deleted file mode 100644
index afcef15b..00000000
--- a/nerfstudio/data/dataparsers/minimal_dataparser.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Data parser for pre-prepared datasets for all cameras, with no additional processing needed
-Optional fields - semantics, mask_filenames, cameras.distortion_params, cameras.times
-"""
-
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Type
-
-import numpy as np
-import torch
-
-from nerfstudio.cameras.cameras import Cameras
-from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs, Semantics
-from nerfstudio.data.scene_box import SceneBox
-
-
-@dataclass
-class MinimalDataParserConfig(DataParserConfig):
- """Minimal dataset config"""
-
- _target: Type = field(default_factory=lambda: MinimalDataParser)
- """target class to instantiate"""
- data: Path = Path("/home/nikhil/nerfstudio-main/tests/data/lego_test/minimal_parser")
-
-
-@dataclass
-class MinimalDataParser(DataParser):
- """Minimal DatasetParser"""
-
- config: MinimalDataParserConfig
-
- def _generate_dataparser_outputs(self, split="train"):
- filepath = self.config.data / f"{split}.npz"
- data = np.load(filepath, allow_pickle=True)
-
- image_filenames = [filepath.parent / path for path in data["image_filenames"].tolist()]
- mask_filenames = None
- if "mask_filenames" in data.keys():
- mask_filenames = [filepath.parent / path for path in data["mask_filenames"].tolist()]
-
- if "semantics" in data.keys():
- semantics = data["semantics"].item()
- metadata = {
- "semantics": Semantics(
- filenames=[filepath.parent / path for path in semantics["filenames"].tolist()],
- classes=semantics["classes"].tolist(),
- colors=torch.from_numpy(semantics["colors"]),
- mask_classes=semantics["mask_classes"].tolist(),
- )
- }
- else:
- metadata = {}
-
- scene_box_aabb = torch.from_numpy(data["scene_box"])
- scene_box = SceneBox(aabb=scene_box_aabb)
-
- camera_np = data["cameras"].item()
- distortion_params = None
- if "distortion_params" in camera_np.keys():
- distortion_params = torch.from_numpy(camera_np["distortion_params"])
- cameras = Cameras(
- fx=torch.from_numpy(camera_np["fx"]),
- fy=torch.from_numpy(camera_np["fy"]),
- cx=torch.from_numpy(camera_np["cx"]),
- cy=torch.from_numpy(camera_np["cy"]),
- distortion_params=distortion_params,
- height=torch.from_numpy(camera_np["height"]),
- width=torch.from_numpy(camera_np["width"]),
- camera_to_worlds=torch.from_numpy(camera_np["camera_to_worlds"])[:, :3, :4],
- camera_type=torch.from_numpy(camera_np["camera_type"]),
- times=torch.from_numpy(camera_np["times"]) if "times" in camera_np.keys() else None,
- )
-
- applied_scale = 1.0
- applied_transform = torch.eye(4, dtype=torch.float32)[:3, :]
- if "applied_scale" in data.keys():
- applied_scale = float(data["applied_scale"])
- if "applied_transform" in data.keys():
- applied_transform = data["applied_transform"].astype(np.float32)
- assert applied_transform.shape == (3, 4)
-
- dataparser_outputs = DataparserOutputs(
- image_filenames=image_filenames,
- cameras=cameras,
- scene_box=scene_box,
- mask_filenames=mask_filenames,
- dataparser_transform=applied_transform,
- dataparser_scale=applied_scale,
- metadata=metadata,
- )
- return dataparser_outputs
diff --git a/nerfstudio/data/dataparsers/nuscenes_dataparser.py b/nerfstudio/data/dataparsers/nuscenes_dataparser.py
index 53eb2351..a59a35c9 100644
--- a/nerfstudio/data/dataparsers/nuscenes_dataparser.py
+++ b/nerfstudio/data/dataparsers/nuscenes_dataparser.py
@@ -64,10 +64,10 @@
# So we need to rotate the actor coordinate system by 90 degrees around z-axis
WLH_TO_LWH = np.array(
[
- [0, 1, 0, 0],
- [-1, 0, 0, 0],
- [0, 0, 1, 0],
- [0, 0, 0, 1],
+ [0, 1.0, 0, 0],
+ [-1.0, 0, 0, 0],
+ [0, 0, 1.0, 0],
+ [0, 0, 0, 1.0],
]
)
HORIZONTAL_BEAM_DIVERGENCE = 0.00333333333 # radians, given as 4 inches at 100 feet
@@ -129,7 +129,7 @@ class NuScenesDataParserConfig(ADDataParserConfig):
"""target class to instantiate"""
sequence: str = "0103"
"""Name of the scene."""
- data: Path = Path("/mnt/local/NuScenes")
+ data: Path = Path("data/nuscenes")
"""Path to NuScenes dataset."""
version: Literal["v1.0-mini", "v1.0-trainval"] = "v1.0-trainval"
"""Dataset version."""
@@ -183,7 +183,7 @@ class NuScenes(ADDataParser):
@property
def actor_transform(self) -> torch.Tensor:
"""Nuscenes uses x-forward, so we need to rotate to x-right."""
- return torch.from_numpy(WLH_TO_LWH)
+ return torch.from_numpy(WLH_TO_LWH)[:3, :]
def _get_cameras(self) -> Tuple[Cameras, List[Path]]:
if "all" in self.config.cameras:
diff --git a/nerfstudio/data/dataparsers/pandaset_dataparser.py b/nerfstudio/data/dataparsers/pandaset_dataparser.py
index d6fd61fb..2191d3f7 100644
--- a/nerfstudio/data/dataparsers/pandaset_dataparser.py
+++ b/nerfstudio/data/dataparsers/pandaset_dataparser.py
@@ -13,6 +13,7 @@
# limitations under the License.
"""Data parser for PandaSet dataset"""
+import os
from collections import defaultdict
from dataclasses import dataclass, field
from functools import lru_cache
@@ -46,7 +47,7 @@
}
PANDASET_SEQ_LEN = 80
-EXTRINSICS_FILE_PATH = "./nerfstudio/data/dataparsers/pandaset_extrinsics.yaml"
+EXTRINSICS_FILE_PATH = os.path.join(os.path.dirname(__file__), "pandaset_extrinsics.yaml")
MAX_RELECTANCE_VALUE = 255.0
ALLOWED_RIGID_CLASSES = (
@@ -113,6 +114,8 @@ class PandaSetDataParserConfig(ADDataParserConfig):
_target: Type = field(default_factory=lambda: PandaSet)
"""target class to instantiate"""
+ data: Path = Path("data/pandaset")
+ """Directory specifying location of data."""
sequence: str = "001"
"""Name of the scene."""
cameras: Tuple[Literal["front", "front_left", "front_right", "back", "left", "right", "none", "all"], ...] = (
diff --git a/nerfstudio/data/dataparsers/waymo_dataparser.md b/nerfstudio/data/dataparsers/waymo_dataparser.md
new file mode 100644
index 00000000..316f73a2
--- /dev/null
+++ b/nerfstudio/data/dataparsers/waymo_dataparser.md
@@ -0,0 +1,52 @@
+# NeuRAD on Waymo open dataset
+
+## About
+Thanks to the excellent work of NeuRAD, we reproduce some results on the Waymo open dataset.
+
+Our goal in reproducing and open-sourcing this waymo dataparser for NeuRAD is to provide a basic reference for the self-driving community and to inspire more work.
+
+In the same folder, there is [wod_dataparser.py](./wod_dataparser.py) which followed the [README-Adding Datasets](https://github.com/georghess/neurad-studio?tab=readme-ov-file#adding-datasets) suggestions. In addition, we added also [wod_utils.py](./wod_utils.py) which did the main work for converting/exporting Waymo dataset.
+
+In addition, we have also added the rolling shutter support for Waymo dataset as the rolling shutter direction is horizontal instead of the vertical one in Pandaset. Here are some examples of the comparison results (on squence of 10588):
+![](./../../../docs/_static/imgs/NeuRAD-RS-Waymo-Front.png)
+![](./../../../docs/_static/imgs/NeuRAD-RS-Waymo-Left.png)
+![](./../../../docs/_static/imgs/NeuRAD-RS-Waymo-Right.png)
+
+
+### Benchmark between Pandaset & Waymo
+| Dataset | Sequence | Frames | Cameras | PSNR | SSIM | LIPS |
+|--- |--- |--- |--- |--- |--- |--- |
+| Pandaset | 006 |80 | FC |25.1562โ|0.8044โ |0.1575โ|
+| Pandaset | 011 |80 | 360 |26.3919โ|0.8057โ |0.2029โ|
+| Waymo | 10588771936253546636| 50 | FC | 27.5555|0.8547|0.121
+| Waymo | 473735159277431842 | 150| FC | 29.1758|0.8717|0.1592
+| Waymo | 4468278022208380281 | ALL| FC |30.5247โ|0.8787โ|0.1701โ
+
+Notes: All above results were obtained with the same hyperparameters and configurations from NeuRAD paper (**Appendix A**)
+
+### Results
+#### Waymo RGB rendering - Sequence 10588 - 3 cameras (FC_LEFT, FC, FC_RIGHT)
+[![Sequence 10588 - 3 cameras](http://img.youtube.com/vi/eR1bHeh7p8A/0.jpg)](https://www.youtube.com/watch?v=eR1bHeh7p8A)
+> Up is ground truth, bottom is rendered.
+
+#### Actor removal - Sequence 20946โ - FC cameras
+[![Sequence 20946](http://img.youtube.com/vi/mkMdzAvTez4/0.jpg)](https://www.youtube.com/watch?v=mkMdzAvTez4)
+> Left is ground truth, right is rendered.
+
+#### Novel view synthesis - Sequence 20946โ - Ego vehicle 1m up
+[![Ego vehicle 1m up](http://img.youtube.com/vi/U8VRboWLj_c/0.jpg)](https://www.youtube.com/watch?v=U8VRboWLj_c)
+> Left is ground truth, right is rendered.
+
+#### Novel view synthesis - Sequence 20946โ - Ego vehicle 1m left
+[![Ego vehicle 1m left](http://img.youtube.com/vi/q_HFmc6JPzQ/0.jpg)](https://www.youtube.com/watch?v=q_HFmc6JPzQ)
+> Left is ground truth, right is rendered.
+
+## Links
+
+Results has been done with waymo open dataset [v2.0.0, gcloud link](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_2_0_0)
+
+## Contributors
+
+- Lei Lei, Leddartech
+- Julien Stanguennec, Leddartech
+- Pierre Merriaux, Leddartech
\ No newline at end of file
diff --git a/nerfstudio/data/dataparsers/wod_dataparser.py b/nerfstudio/data/dataparsers/wod_dataparser.py
new file mode 100644
index 00000000..9b2602f0
--- /dev/null
+++ b/nerfstudio/data/dataparsers/wod_dataparser.py
@@ -0,0 +1,289 @@
+"""Data parser for Waymo Open Dataset"""
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Literal, Optional, Tuple, Type
+
+import numpy as np
+import torch
+import transforms3d
+from waymo_open_dataset.v2.perception import camera_image
+
+from nerfstudio.cameras.cameras import Cameras, CameraType
+from nerfstudio.cameras.lidars import Lidars, LidarType
+from nerfstudio.data.dataparsers.ad_dataparser import DUMMY_DISTANCE_VALUE, ADDataParser, ADDataParserConfig
+from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
+from nerfstudio.data.dataparsers.wod_utils import (
+ WOD64_ELEVATION_MAPPING,
+ ExportImages,
+ ExportLidar,
+ ObjectsID,
+ ParquetReader,
+ SelectedTimestamp,
+)
+
+WOD_ELEVATION_MAPPING = {"Wod64": WOD64_ELEVATION_MAPPING}
+WOD_AZIMUT_RESOLUTION = {"Wod64": 0.140625}
+WOD_SKIP_ELEVATION_CHANNELS = {"Wod64": ()}
+
+HORIZONTAL_BEAM_DIVERGENCE = 2.4e-3 # radians
+VERTICAL_BEAM_DIVERGENCE = 1.5e-3 # radians
+
+ALLOWED_DEFORMABLE_CLASSES = (
+ "TYPE_PEDESTRIAN",
+ "TYPE_CYCLIST",
+)
+
+ALLOWED_RIGID_CLASSES = (
+ "TYPE_VEHICLE",
+ "TYPE_SIGN",
+)
+WOD_CAMERA_NAME_2_ID = {e.name: e.value for e in camera_image.CameraName if e.name != "UNKNOWN"}
+
+@dataclass
+class WoDParserConfig(ADDataParserConfig):
+ """Waymo Open Dataset config."""
+
+ _target: Type = field(default_factory=lambda: WoD)
+ """target class to instantiate"""
+ sequence: str = "10588771936253546636_2300_000_2320_000"
+ """Name of the scene (ie: so-called context_name)."""
+ data: Path = Path("/data/dataset/wod/")
+ """Raw dataset path to WOD"""
+ parquet_dir: str = "training"
+ """Change to validation when some sequence is in validation"""
+ output_folder: Path = Path("/data/dataset/wod/images")
+ """Output saving folder for images, by defaut it will be set with wod dataset path."""
+ train_split_fraction: float = 0.5
+ """The percent of images to use for training. The remaining images are for eval."""
+ start_frame: int = 0
+ """Start frame"""
+ end_frame: Optional[int] = None
+ """End frame. When set to known end frame will be the last one."""
+ dataset_end_fraction: float = 1.0
+ """At what fraction of the dataset to end. Different value than 1.0 not supported with current implementation of wod dataset."""
+ cameras: Tuple[Literal["FRONT", "FRONT_LEFT", "FRONT_RIGHT", "SIDE_LEFT", "SIDE_RIGHT"], ...] = (
+ "FRONT",
+ "FRONT_LEFT",
+ "FRONT_RIGHT",
+ # "SIDE_LEFT",
+ # "SIDE_RIGHT",
+ )
+ """Which cameras to use."""
+ lidars: Tuple[Literal["Top"], ...] = ("Top",)
+ """Which lidars to use, only lidar TOP is supported."""
+ load_cuboids: bool = True
+ """Whether to load cuboid annotations."""
+ cuboids_ids: Optional[Tuple[int,...]] = None
+ """Selection of cuboids_ids if cuboid_annotations is set to True. If None, all dynamic cuboids will be exported."""
+ annotation_interval: float = 0.1 # 10 Hz of capture
+ """Interval between annotations in seconds."""
+ correct_cuboid_time: bool = True
+ """Whether to correct the cuboid time to match the actual time of observation, not the end of the lidar sweep."""
+ min_lidar_dist: Tuple[float, float, float] = (1.0, 1.0, 1.0)
+ """Wod Top lidar is x-forward, y-left, z-up."""
+ add_missing_points: bool = True
+ """Whether to add missing points (rays that did not return) to the point clouds."""
+ lidar_elevation_mapping: Dict[str, Dict] = field(default_factory=lambda: WOD_ELEVATION_MAPPING)
+ """Elevation mapping for each lidar."""
+ skip_elevation_channels: Dict[str, Tuple] = field(default_factory=lambda: WOD_SKIP_ELEVATION_CHANNELS)
+ """Channels to skip when adding missing points."""
+ lidar_azimuth_resolution: Dict[str, float] = field(default_factory=lambda: WOD_AZIMUT_RESOLUTION)
+ """Azimuth resolution for each lidar."""
+ rolling_shutter_offsets: Tuple[float, float] = (-0.022, 0.022)
+ """In Waymo the image time is captured either left_2_right or right_2_left with cols."""
+ paint_points: bool = True
+ """Whether to paint the points in the point cloud."""
+
+
+@dataclass
+class WoD(ADDataParser):
+ """Waymo Open Dataset DatasetParser"""
+
+ config: WoDParserConfig
+
+ def _get_cameras(self) -> Tuple[Cameras, List[Path]]:
+ ''' Images are exported from parquet files to jpg in the dataset folder, and filepaths are returns with Cameras.'''
+
+ output_folder_name = f"{self.config.sequence}_start{self.config.start_frame}_end{self.config.end_frame}"
+ output_folder_name += "_cameras_" + "_".join([str(id) for id in self.cameras_ids])
+ images_output_folder:Path = self.config.output_folder / output_folder_name # type: ignore
+
+ export_images = ExportImages(
+ self.parquet_reader,
+ output_folder = str(images_output_folder),
+ select_ts = self.select_ts,
+ cameras_ids = self.cameras_ids,
+ )
+
+ data_out, (rolling_shutter, rolling_shutter_direction) = export_images.process()
+ rolling_shutter = round(rolling_shutter, 3)
+ rs_offfsets = (-rolling_shutter, rolling_shutter)
+
+ # rolling shutter offset reverse when right to left
+ if rolling_shutter_direction == 4: # RIGHT_TO_LEFT
+ rs_offfsets = (rolling_shutter, -rolling_shutter)
+
+ self.config.rolling_shutter_offsets = rs_offfsets
+ rs_direction = "Horizontal" if rolling_shutter_direction in (2, 4) else "Vertical"
+
+ img_filenames = []
+ intrinsics = []
+ poses = []
+ idxs = []
+ heights = []
+ widths = []
+ times = []
+ for frame in data_out["frames"]:
+ img_filenames.append(str(images_output_folder/frame["file_path"]))
+ poses.append(frame["transform_matrix"])
+ intrinsic = np.array(
+ [
+ [frame["f_u"], 0, frame["c_u"]],
+ [0, frame["f_v"], frame["c_v"]],
+ [0, 0, 1],
+ ]
+ )
+ intrinsics.append(intrinsic)
+ idxs.append(frame["sensor_id"])
+ heights.append(frame["h"])
+ widths.append(frame["w"])
+ times.append(frame["time"])
+
+ intrinsics = torch.tensor(np.array(intrinsics), dtype=torch.float32)
+ poses = torch.tensor(np.array(poses), dtype=torch.float32)
+ times = torch.tensor(times, dtype=torch.float64)
+ idxs = torch.tensor(idxs).int().unsqueeze(-1)
+ cameras = Cameras(
+ fx=intrinsics[:, 0, 0],
+ fy=intrinsics[:, 1, 1],
+ cx=intrinsics[:, 0, 2],
+ cy=intrinsics[:, 1, 2],
+ height=torch.tensor(heights),
+ width=torch.tensor(widths),
+ camera_to_worlds=poses[:, :3, :4],
+ camera_type=CameraType.PERSPECTIVE,
+ times=times,
+ metadata={
+ "sensor_idxs": idxs,
+ "rs_direction": rs_direction,
+ },
+ )
+ return cameras, img_filenames
+
+ def _get_lidars(self) -> Tuple[Lidars, Tuple[List[torch.Tensor], List[torch.Tensor]]]:
+ ''' The implementation of _get_lidar for WoD dataset actually returns directly tensors for pts_lidar and pts_missing, while
+ other dataparser provide links to files containing the point-cloud which are then processed with _read_lidar function in
+ _generate_dataparser_output. With WoD all lidar point-cloud are stored in parquet files, and points cloud are eventually
+ stored in memory in DataParserOutput object. So most of the job is done within _get_lidars function.
+
+ :return: Tuple[Lidars, Tuple[List[Point-clouds],List[MissingPointsPcd]]]
+ '''
+ if self.config.load_cuboids:
+ objects_id_to_extract = list(self.config.cuboids_ids) if self.config.cuboids_ids != None else self.objects_id.dynamic_id
+ else:
+ objects_id_to_extract = []
+
+ export_lidar = ExportLidar(self.parquet_reader, self.select_ts, self.objects_id, self.config.output_folder)
+ poses, pts_lidar_list, missing_pts_list, times, actors = export_lidar.process(objects_id_to_extract=objects_id_to_extract)
+
+ # save actors for later trajectories calculation
+ self.actors = actors
+
+ pts_lidar_list = [torch.from_numpy(pts) for pts in pts_lidar_list]
+ missing_pts_list = [torch.from_numpy(pts) for pts in missing_pts_list]
+
+ times = torch.tensor(times, dtype=torch.float64)
+ idxs = torch.zeros_like(times).int().unsqueeze(-1)
+
+ poses = torch.from_numpy(np.array(poses))
+ lidars = Lidars(
+ lidar_to_worlds=poses[:, :3, :4],
+ lidar_type=LidarType.WOD64,
+ times=times,
+ metadata={"sensor_idxs": idxs},
+ horizontal_beam_divergence=HORIZONTAL_BEAM_DIVERGENCE,
+ vertical_beam_divergence=VERTICAL_BEAM_DIVERGENCE,
+ valid_lidar_distance_threshold=DUMMY_DISTANCE_VALUE / 2,
+ )
+ return lidars, (pts_lidar_list, missing_pts_list)
+
+ def _read_lidars(
+ self, lidars: Lidars, pts_list_tuple: Tuple[List[torch.Tensor], List[torch.Tensor]]
+ ) -> List[torch.Tensor]:
+ """Reads the point clouds from the given filenames. Should be in x,y,z,r,t order. t is optional."""
+
+ pts_lidar_list, missing_pts_list = pts_list_tuple
+ if self.config.add_missing_points:
+ """Currently this part has been done during wod_export, here we only concatenate together.
+ For future modification, refer to _read_lidars method from pandaset_dataparser.py
+ """
+ point_clouds = [torch.cat([pc, missing], dim=0) for pc, missing in zip(pts_lidar_list, missing_pts_list)]
+ else:
+ point_clouds = pts_lidar_list
+
+ lidars.lidar_to_worlds = lidars.lidar_to_worlds.float()
+ return point_clouds
+
+ def _get_actor_trajectories(self) -> List[Dict]:
+ """Returns a list of actor trajectories.
+
+ Each trajectory is a dictionary with the following keys:
+ - poses: the poses of the actor (float32)
+ - timestamps: the timestamps of the actor (float64)
+ - dims: the dimensions of the actor, wlh order (float32)
+ - label: the label of the actor (str)
+ - stationary: whether the actor is stationary (bool)
+ - symmetric: whether the actor is expected to be symmetric (bool)
+ - deformable: whether the actor is expected to be deformable (e.g. pedestrian)
+ """
+ trajs_list = []
+ allowed_classes = ALLOWED_RIGID_CLASSES
+ if self.config.include_deformable_actors:
+ allowed_classes += ALLOWED_DEFORMABLE_CLASSES
+
+ rot_minus_90 = np.eye(4)
+ rot_minus_90[:3, :3] = transforms3d.euler.euler2mat(0.0, 0.0, -np.pi/2)
+
+ for index, actor in self.actors.items():
+ actor_type = actor["label"]
+
+ if actor_type not in allowed_classes:
+ continue
+ poses = np.array(actor["poses"]) @ rot_minus_90
+ timestamps = actor["timestamps"]
+ actor_dimensions = self.objects_id.id2box_dimensions[index] # (length, width, height)
+ l, w, h = actor_dimensions.values()
+ dims = np.array([w, l, h], dtype=np.float32)
+
+ symmetric = actor_type == "TYPE_VEHICLE"
+ deformable = actor_type in ALLOWED_DEFORMABLE_CLASSES
+
+ trajs_list.append(
+ {
+ "poses": torch.tensor(poses).float(),
+ "timestamps": torch.tensor(timestamps, dtype=torch.float64),
+ "dims": torch.tensor(dims, dtype=torch.float32),
+ "label": actor_type,
+ "stationary": False, # Only 'export' dynamic objects from ExportLidar
+ "symmetric": symmetric,
+ "deformable": deformable,
+ }
+ )
+ return trajs_list
+
+ def _generate_dataparser_outputs(self, split="train") -> DataparserOutputs:
+ assert self.config.dataset_end_fraction == 1.0, f"Wod data parser only support dataset_end_fraction == 1.0, value received {self.config.dataset_end_fraction}"
+ self.cameras_ids = [WOD_CAMERA_NAME_2_ID[cam] for cam in self.config.cameras]
+ parquet_dir = str(self.config.data / self.config.parquet_dir)
+ self.parquet_reader = ParquetReader(self.config.sequence, dataset_dir=parquet_dir)
+ self.select_ts = SelectedTimestamp(self.parquet_reader, self.config.start_frame, self.config.end_frame)
+ self.objects_id = ObjectsID(self.parquet_reader, self.select_ts)
+
+ return super()._generate_dataparser_outputs(split)
+
+
+if __name__ == "__main__":
+ wod_test = WoD(config=WoDParserConfig())
+ do = wod_test._generate_dataparser_outputs()
+ print(do)
diff --git a/nerfstudio/data/dataparsers/wod_utils.py b/nerfstudio/data/dataparsers/wod_utils.py
new file mode 100644
index 00000000..a42244ec
--- /dev/null
+++ b/nerfstudio/data/dataparsers/wod_utils.py
@@ -0,0 +1,697 @@
+from __future__ import annotations
+
+import warnings
+from copy import deepcopy
+from typing import Optional
+
+# Disable annoying warnings from PyArrow using under the hood.
+warnings.simplefilter(action="ignore", category=FutureWarning)
+import glob
+import os
+from dataclasses import asdict, dataclass
+from typing import Dict, List, Literal, Tuple, Type, TypedDict
+
+import dask.dataframe as dd
+import numpy as np
+import numpy.typing as npt
+import tensorflow as tf
+tf.config.set_visible_devices([], 'GPU') # Not useful for parsing data.
+import transforms3d
+from tqdm import tqdm
+from typing_extensions import NotRequired
+from waymo_open_dataset import v2
+from waymo_open_dataset.utils import box_utils, transform_utils
+from waymo_open_dataset.v2.perception import box as _v2_box
+from waymo_open_dataset.v2.perception import camera_image as _v2_camera_image
+from waymo_open_dataset.v2.perception import context as _v2_context
+from waymo_open_dataset.v2.perception import lidar as _v2_lidar
+from waymo_open_dataset.v2.perception import pose as _v2_pose
+from waymo_open_dataset.v2.perception.utils.lidar_utils import convert_range_image_to_cartesian
+from waymo_open_dataset.wdl_limited.camera.ops import py_camera_model_ops
+
+WOD64_ELEVATION_MAPPING = {
+ 0: 2.5028389775650304,
+ 1: 2.321411751659905,
+ 2: 2.160192256145731,
+ 3: 1.9888398480248883,
+ 4: 1.8209349283573786,
+ 5: 1.6502418044970433,
+ 6: 1.4938679389287557,
+ 7: 1.3221564279311344,
+ 8: 1.1632512247221256,
+ 9: 0.9913750200128197,
+ 10: 0.8101498633691424,
+ 11: 0.6482041237244122,
+ 12: 0.48336997052669073,
+ 13: 0.3201589105532588,
+ 14: 0.16462286430089693,
+ 15: -0.011621928777127347,
+ 16: -0.1892787856748749,
+ 17: -0.34201145065403127,
+ 18: -0.5054471288374568,
+ 19: -0.6827621682735187,
+ 20: -0.8449790324744345,
+ 21: -1.0197501521052226,
+ 22: -1.1886280361746464,
+ 23: -1.3669402000816122,
+ 24: -1.5409274243550963,
+ 25: -1.7570629940063032,
+ 26: -1.9649363657632477,
+ 27: -2.1894398590475905,
+ 28: -2.4374471868305987,
+ 29: -2.6683997977793497,
+ 30: -2.9254801778651274,
+ 31: -3.208793362354923,
+ 32: -3.4652440977914574,
+ 33: -3.770654905928011,
+ 34: -4.068046596015399,
+ 35: -4.365557254206326,
+ 36: -4.68136205944531,
+ 37: -5.023904856877318,
+ 38: -5.360837632630594,
+ 39: -5.715495138382295,
+ 40: -6.091110098376429,
+ 41: -6.457270941426794,
+ 42: -6.8451480987631,
+ 43: -7.24803061771811,
+ 44: -7.645534995724646,
+ 45: -8.08179034271091,
+ 46: -8.522502366939104,
+ 47: -8.957247796204939,
+ 48: -9.421474930460981,
+ 49: -9.885265834826649,
+ 50: -10.369068098135806,
+ 51: -10.829727642824542,
+ 52: -11.332199121554261,
+ 53: -11.822915504645561,
+ 54: -12.364441979859368,
+ 55: -12.908557767713962,
+ 56: -13.437836414956127,
+ 57: -13.983840803683233,
+ 58: -14.537462865288743,
+ 59: -15.076443690248071,
+ 60: -15.689281398977771,
+ 61: -16.300273448699592,
+ 62: -16.911934322750316,
+ 63: -17.546811286086175,
+} # degrees
+
+DATA_FREQUENCY = 10.0 # 10 Hz
+DUMMY_DISTANCE_VALUE = 2e3 # meters, used for missing points
+TIME_OFFSET = 50e-3 # => 50ms ,time offset in sec; half scanning period
+
+class ActorsDict(TypedDict):
+ poses: List[np.ndarray]
+ timestamps: List[float]
+ label: str
+
+class ImageFrame(TypedDict):
+ file_path:str
+ transform_matrix:List[List[float]]
+ frame_id:int
+ time:float
+ sensor_id:int
+ f_u:float
+ f_v:float
+ c_u:float
+ c_v:float
+ k1:float
+ k2:float
+ p1:float
+ p2:float
+ k3:float
+ h:int
+ w:int
+
+
+def get_camera_names():
+ return [f"{e.value}:{e.name}" for e in _v2_camera_image.CameraName if e.name != "UNKNOWN"]
+
+def get_mock_timestamps(points: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
+ """Get mock relative timestamps for the wod points."""
+ # the wod has x forward, y left, z up and the sweep is split behind the car.
+ # it is also rotating clockwise, meaning that the angles close to -pi are the
+ # first ones in the sweep and the ones close to pi are the last ones in the sweep.
+ angles = -np.arctan2(points[:, 1], points[:, 0]) # N, [-pi, pi]
+ # angles += np.pi # N, [0, 2pi]
+ # see how much of the rotation have finished
+ fraction_of_rotation = angles / (2 * np.pi) # N, [0, 1]
+ # get the pseudo timestamps based on the total rotation time
+ timestamps = fraction_of_rotation * 1.0 / DATA_FREQUENCY
+ return timestamps
+
+class ParquetReader:
+ def __init__(self, context_name: str, dataset_dir: str = "/data/dataset/wod/training", nb_partitions: int = 120):
+ self.context_name = context_name
+ self.dataset_dir = dataset_dir
+ self.nb_partitions = nb_partitions
+
+ def read(self, tag: str) -> dd.DataFrame:
+ """Creates a Dask DataFrame for the component specified by its tag."""
+ paths = glob.glob(f"{self.dataset_dir}/{tag}/{self.context_name}.parquet")
+ return dd.read_parquet(paths, npartitions=self.nb_partitions) # type: ignore
+
+ def __call__(self, tag: str) -> dd.DataFrame:
+ return self.read(tag)
+
+
+class SelectedTimestamp:
+ def __init__(self, reader: ParquetReader, start_frame: int = 0, end_frame: Optional[int] = None):
+ cam_image_df = reader("camera_image")
+ cam_image_df = cam_image_df["key.frame_timestamp_micros"]
+ self.ts_list = np.unique(np.array(cam_image_df.compute()))
+ self.ts_selected = self.ts_list[start_frame:end_frame]
+ pass
+
+ def __len__(self) -> int:
+ return len(self.ts_selected)
+
+ def sequence_len(self) -> int:
+ return len(self.ts_list)
+
+ def get_selected_ts(self) -> List[int]:
+ return self.ts_selected.tolist()
+
+ def is_selected(self, ts: int) -> bool:
+ return ts in self.ts_selected
+
+ def ts2frame_idx(self, ts: int) -> int:
+ if ts not in self.ts_selected:
+ raise IndexError(f"{ts} is not in selected timestamps")
+ return np.where(self.ts_selected == ts)[0][0]
+
+
+class ObjectsID:
+ ''' Helper extraction object static/dynamic IDs to be processed by ExportLidar class.'''
+
+ def __init__(self, reader: ParquetReader, selected_ts: SelectedTimestamp, speed_static_threshold: float = 0.2):
+ self.reader = reader
+ self.speed_static_threshold = speed_static_threshold
+ self.dynamic_id: list[int] = []
+ self.dynamic_uuid: list[str] = []
+ self.dynamic_type: list[str] = []
+ self.id2uuid: dict[int, str] = {}
+ self.uuid2id: dict[str, int] = {}
+ self.id2box_dimensions: dict[int, dict[str, float]] = {}
+ self.selected_ts = selected_ts
+ self.keep_id_after_lidar_extraction = []
+ self.build_dict()
+
+ def build_dict(self):
+ lidar_box_df = self.reader("lidar_box")
+
+ lidar_box_df2 = (
+ lidar_box_df.groupby(["key.segment_context_name", "key.laser_object_id"]).agg(list).reset_index()
+ )
+
+ for object_id, (_, r) in enumerate(lidar_box_df2.iterrows()):
+ LiDARBoxCom = v2.LiDARBoxComponent.from_dict(r)
+ ts_mask = np.isin(np.array(LiDARBoxCom.key.frame_timestamp_micros), self.selected_ts.get_selected_ts())
+ if not np.any(ts_mask):
+ continue
+ dimensions = LiDARBoxCom.box.size
+
+ length, width, height = (
+ np.array(dimensions.x)[ts_mask][0],
+ np.array(dimensions.y)[ts_mask][0],
+ np.array(dimensions.z)[ts_mask][0],
+ )
+
+ self.id2box_dimensions[object_id] = {"length": length, "width": width, "height": height}
+ object_uuid = LiDARBoxCom.key.laser_object_id
+ self.id2uuid[object_id] = object_uuid
+ # object is considered static if static in frames selection ( < speed threshold )
+ speed = np.array(
+ [
+ np.array(LiDARBoxCom.speed.x)[ts_mask], # type: ignore
+ np.array(LiDARBoxCom.speed.y)[ts_mask], # type: ignore
+ np.array(LiDARBoxCom.speed.z)[ts_mask], # type: ignore
+ ]
+ )
+ speed = speed[~np.isnan(speed).any(axis=1)]
+ speed = np.linalg.norm(speed, axis=0)
+ dynamic = np.any(speed > self.speed_static_threshold)
+ if dynamic:
+ self.dynamic_id.append(object_id)
+ self.dynamic_uuid.append(object_uuid)
+ self.dynamic_type.append(_v2_box.BoxType(LiDARBoxCom.type[0]).name) # type: ignore
+
+ for id, uuid in self.id2uuid.items():
+ self.uuid2id[uuid] = id
+
+ def is_dynamic(self, id: int | str):
+ if isinstance(id, int):
+ return id in self.dynamic_id
+ if isinstance(id, str):
+ return self.uuid2id[id] in self.dynamic_id
+
+ def get_box_dimensions(self, id: int | str):
+ if isinstance(id, int):
+ return self.id2box_dimensions[id]
+ if isinstance(id, str):
+ return self.id2box_dimensions[self.uuid2id[id]]
+
+ def get_box_coordinates(self, dynamic_only: bool = True) -> Dict[str, np.ndarray]:
+ lidar_box_df = self.reader("lidar_box")
+
+ lidar_box_df2 = (
+ lidar_box_df.groupby(["key.segment_context_name", "key.laser_object_id"]).agg(list).reset_index()
+ )
+
+ objects_coordinates = {}
+ for object_id, (_, r) in enumerate(lidar_box_df2.iterrows()):
+ LiDARBoxCom = v2.LiDARBoxComponent.from_dict(r)
+ ts_mask = np.isin(np.array(LiDARBoxCom.key.frame_timestamp_micros), self.selected_ts.get_selected_ts())
+ if not np.any(ts_mask):
+ continue
+
+ object_uuid = LiDARBoxCom.key.laser_object_id
+ object_id = self.uuid2id[object_uuid]
+
+ if dynamic_only:
+ if object_id in self.dynamic_id:
+ objects_coordinates[object_id] = LiDARBoxCom.box.center
+ else:
+ objects_coordinates[object_id] = LiDARBoxCom.box.center
+
+ return objects_coordinates
+
+ def print_dynamic(self):
+ for id, type in zip(self.dynamic_id, self.dynamic_type):
+ print(f"{id}:{type}, ", end="")
+
+
+class ExportImages:
+ '''
+ Used to create folder and save image in images, and returns a tuple with:
+ - a list of images dict with (image path, frame_id, time, pose (nerf), sensor_id, intrinsic))
+ - a tuple of rolling shutter information (duration and direction)
+
+ :param reader: ParquetReader object
+ :param select_ts: SelectedTimestamp object
+ :param output_folder: Root folder images where will be saved.
+ :param cameras_ids: Select which cameras_ids to export, defaults to list(range(1, len(get_camera_names()) + 1))
+ '''
+
+ IMAGE_FOLDER = "images"
+
+ def __init__(
+ self,
+ reader: ParquetReader,
+ select_ts: SelectedTimestamp,
+ output_folder: str,
+ cameras_ids: List[int] = list(range(1, len(get_camera_names()) + 1)),
+ ):
+ self.reader: ParquetReader = reader
+ self.select_ts = select_ts
+ self.cameras_ids = cameras_ids
+
+ self.output_folder = os.path.join(output_folder, self.IMAGE_FOLDER)
+ if not os.path.exists(self.output_folder):
+ os.makedirs(self.output_folder)
+
+ def process(self) -> Tuple[dict[str,List[ImageFrame]], Tuple[float, int]]:
+ cam_calib = self.reader("camera_calibration")
+ camera_calib = {}
+ data_out:dict[str,List[ImageFrame]] = {}
+
+ data_out["frames"] = []
+ for i, (_, r) in enumerate(cam_calib.iterrows()):
+ calib = v2.CameraCalibrationComponent.from_dict(r)
+ camera_calib["cam" + v2.perception.camera_image.CameraName(calib.key.camera_name).name] = ( # type: ignore
+ calib.extrinsic.transform.reshape(4, 4) # type: ignore
+ )
+ camera_calib["cam" + v2.perception.camera_image.CameraName(calib.key.camera_name).name + "_intrinsics"] = ( # type: ignore
+ asdict(calib.intrinsic) | {"h": calib.height, "w": calib.width} # type: ignore
+ )
+ # rolling shutter direction for offset calculation
+ rolling_shutter_direction = calib.rolling_shutter_direction
+
+ print("Camera processing...")
+ cam_image_df = self.reader("camera_image")
+ cam_image_df = cam_image_df[
+ (cam_image_df["key.camera_name"].isin(self.cameras_ids)) # type: ignore
+ & (cam_image_df["key.frame_timestamp_micros"].isin(self.select_ts.get_selected_ts())) # type: ignore
+ ]
+ camera_poses = []
+ rolling_shutter_list = []
+ for i, (_, r) in tqdm(enumerate(cam_image_df.iterrows())): # type: ignore
+ CamComp = v2.CameraImageComponent.from_dict(r)
+ tr_image = CamComp.pose.transform.reshape(4, 4) # type: ignore
+ delta_time = (
+ CamComp.rolling_shutter_params.camera_readout_done_time
+ + CamComp.rolling_shutter_params.camera_trigger_time
+ ) / 2 - CamComp.pose_timestamp
+
+ rolling_shutter = (
+ CamComp.rolling_shutter_params.camera_readout_done_time
+ - CamComp.rolling_shutter_params.camera_trigger_time
+ ) / 2
+ rolling_shutter_list.append(rolling_shutter)
+
+ avx, avy, avz = (
+ CamComp.velocity.angular_velocity.x,
+ CamComp.velocity.angular_velocity.y,
+ CamComp.velocity.angular_velocity.z,
+ )
+ skm = np.array([[0, -avz, avy], [avz, 0, -avx], [-avy, avx, 0]])
+ r_image = tr_image[:3, :3]
+
+ r_updated = (
+ np.eye(3) + delta_time * skm
+ ) @ r_image # probably another way to do it : R_derivative = skm@r_image; r_image + delta_time * R_derivative
+ t_updated = tr_image[:3, 3] + delta_time * np.array(
+ [
+ CamComp.velocity.linear_velocity.x,
+ CamComp.velocity.linear_velocity.y,
+ CamComp.velocity.linear_velocity.z,
+ ]
+ )
+ tr_updated = np.eye(4)
+ tr_updated[:3, 3] = t_updated
+ tr_updated[:3, :3] = r_updated
+
+ frame_id = self.select_ts.ts2frame_idx(CamComp.key.frame_timestamp_micros)
+ filename = f"{v2.perception.camera_image.CameraName(CamComp.key.camera_name).name}_{frame_id:08d}.jpg" # type: ignore
+
+ nerfstudio2waymo = np.eye(4)
+ nerfstudio2waymo[:3, :3] = np.array([[0, -1, 0], [0, 0, 1], [-1, 0, 0]]).T
+ # opencv2waymo = np.eye(4)
+ # opencv2waymo[:3,:3] = np.array([[0,-1,0],[0,0,-1],[1,0,0]]).T
+ calib = camera_calib["cam" + v2.perception.camera_image.CameraName(CamComp.key.camera_name).name] # type: ignore
+ camera_poses.append(tr_updated @ calib @ nerfstudio2waymo)
+ data_out["frames"].append(
+ {
+ "file_path": os.path.join(self.IMAGE_FOLDER, filename),
+ "transform_matrix": (camera_poses[-1]).tolist(),
+ "frame_id": int(frame_id),
+ "time": delta_time + CamComp.pose_timestamp,
+ "sensor_id": CamComp.key.camera_name - 1, # sensor_id for NeuRAD, WOD 0 == Unkown
+ }
+ | camera_calib[
+ "cam" + v2.perception.camera_image.CameraName(CamComp.key.camera_name).name + "_intrinsics" # type: ignore
+ ]
+ )
+
+ save_file = os.path.join(self.output_folder, filename)
+ if not os.path.exists(save_file):
+ with open(save_file, "wb") as binary_file:
+ binary_file.write(CamComp.image)
+
+ # get the mean value for rolling shutter
+ rolling_shutter = sum(rolling_shutter_list) / (i + 1)
+ return (data_out, (rolling_shutter, rolling_shutter_direction))
+
+
+class ExportLidar:
+ ''' Utility class for extracting lidar point-cloud and objects from parquet files of WoD v2 dataset.'''
+
+ def __init__(
+ self,
+ reader: ParquetReader,
+ select_ts: SelectedTimestamp,
+ objects_id: ObjectsID,
+ output_folder: str,
+ extract_objects=True,
+ cameras_ids: List[int] = list(range(1, len(get_camera_names()) + 1)),
+ ):
+ self.reader: ParquetReader = reader
+ self.select_ts = select_ts
+ self.cameras_ids = cameras_ids
+
+ self.output_folder = output_folder
+ self.extract_objects = extract_objects
+ self.objects_id = objects_id
+ self.cameras_calibration = None
+
+ def convert_range_image_to_point_cloud(
+ self,
+ range_image: _v2_lidar.RangeImage,
+ calibration: _v2_context.LiDARCalibrationComponent,
+ pixel_pose: Optional[_v2_lidar.PoseRangeImage] = None,
+ frame_pose: Optional[_v2_pose.VehiclePoseComponent] = None,
+ keep_polar_features=False,
+ ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+ """Converts one range image from polar coordinates to point cloud.
+ same as in wod api, but return the mask in addition plus channel id
+
+ Args:
+ range_image: One range image return captured by a LiDAR sensor.
+ calibration: Parameters for calibration of a LiDAR sensor.
+ pixel_pose: If not none, it sets pose for each range image pixel.
+ frame_pose: This must be set when `pose` is set.
+ keep_polar_features: If true, keep the features from the polar range image
+ (i.e. range, intensity, and elongation) as the first features in the
+ output range image.
+
+ Returns:
+ A 3 [N, D] tensor of 3D LiDAR points. D will be 3 if keep_polar_features is
+ False (x, y, z) and 6 if keep_polar_features is True (range, intensity,
+ elongation, x, y, z).
+ 1. Lidar points-cloud
+ 2. Missing points points-cloud
+ 3. Range image mask above dummy distance.
+
+ """
+
+ # missing points are found directly from range image
+ val_clone = deepcopy(range_image.tensor.numpy()) # type: ignore
+ no_return = val_clone[..., 0] == -1 # where range is -1
+ val_clone[..., 0][no_return] = DUMMY_DISTANCE_VALUE
+ # re-assign the field
+ object.__setattr__(range_image, "values", val_clone.flatten())
+
+ # From range image, missing points do not have a pose.
+ # So we replace their pose with the vehicle pose.
+ # pixel pose & frame pose
+ pixel_pose_clone = deepcopy(pixel_pose.tensor.numpy()) # type: ignore
+ pixel_pose_mask = pixel_pose_clone[..., 0] == 0
+ tr_orig = frame_pose.world_from_vehicle.transform.reshape(4, 4) # type: ignore
+ rot = tr_orig[:3, :3]
+ x, y, z = tr_orig[:3, 3]
+ yaw, pitch, roll = transforms3d.euler.mat2euler(rot, "szyx")
+ # ` [roll, pitch, yaw, x, y, z]`
+ pixel_pose_clone[..., 0][pixel_pose_mask] = roll
+ pixel_pose_clone[..., 1][pixel_pose_mask] = pitch
+ pixel_pose_clone[..., 2][pixel_pose_mask] = yaw
+ pixel_pose_clone[..., 3][pixel_pose_mask] = x
+ pixel_pose_clone[..., 4][pixel_pose_mask] = y
+ pixel_pose_clone[..., 5][pixel_pose_mask] = z
+ # re-assign the field
+ object.__setattr__(pixel_pose, "values", pixel_pose_clone.flatten())
+
+ range_image_cartesian = convert_range_image_to_cartesian(
+ range_image=range_image,
+ calibration=calibration,
+ pixel_pose=pixel_pose,
+ frame_pose=frame_pose,
+ keep_polar_features=keep_polar_features,
+ )
+
+ range_image_tensor = range_image.tensor
+ range_image_mask = DUMMY_DISTANCE_VALUE / 2 > range_image_tensor[..., 0] # 0 # type: ignore
+ points_tensor = tf.gather_nd(range_image_cartesian, tf.compat.v1.where(range_image_mask))
+ missing_points_tensor = tf.gather_nd(range_image_cartesian, tf.compat.v1.where(~range_image_mask))
+
+ return points_tensor, missing_points_tensor, range_image_mask
+
+ def is_within_box_3d(self, point, box, name=None):
+ """Checks whether a point is in a 3d box given a set of points and boxes.
+
+ Args:
+ point: [N, 3] tensor. Inner dims are: [x, y, z].
+ box: [M, 7] tensor. Inner dims are: [center_x, center_y, center_z, length,
+ width, height, heading].
+ name: tf name scope.
+
+ Returns:
+ point_in_box; [N, M] boolean tensor.
+
+ """
+
+ with tf.compat.v1.name_scope(name, "IsWithinBox3D", [point, box]):
+ center = box[:, 0:3]
+ dim = box[:, 3:6]
+ heading = box[:, 6]
+ # [M, 3, 3]
+ rotation = transform_utils.get_yaw_rotation(heading)
+ # [M, 4, 4]
+ transform = transform_utils.get_transform(rotation, center)
+ # [M, 4, 4]
+ transform = tf.linalg.inv(transform)
+ # [M, 3, 3]
+ rotation = transform[:, 0:3, 0:3] # type: ignore
+ # [M, 3]
+ translation = transform[:, 0:3, 3] # type: ignore
+
+ # [N, M, 3]
+ point_in_box_frame = tf.einsum("nj,mij->nmi", point, rotation) + translation
+ # [N, M, 3]
+ point_in_box = tf.logical_and(
+ tf.logical_and(point_in_box_frame <= dim * 0.5, point_in_box_frame >= -dim * 0.5),
+ tf.reduce_all(tf.not_equal(dim, 0), axis=-1, keepdims=True),
+ )
+ # [N, M]
+ point_in_box = tf.cast(
+ tf.reduce_prod(input_tensor=tf.cast(point_in_box, dtype=tf.uint8), axis=-1), dtype=tf.bool
+ )
+
+ return point_in_box, point_in_box_frame[point_in_box]
+
+ def _load_camera_calibration(self):
+ ''' Loads camera calibration from parquet file to dictionnary. '''
+ cam_calib_df = self.reader("camera_calibration").compute()
+ self.cameras_calibration = {}
+ for i, (_, r) in enumerate(cam_calib_df.iterrows()):
+ calib = v2.CameraCalibrationComponent.from_dict(r)
+ self.cameras_calibration[calib.key.camera_name] = calib
+
+ def process(
+ self,
+ objects_id_to_extract: List[int] = []
+ ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[float], Dict[int,ActorsDict]]:
+
+ print("Lidar processing...")
+ objects_uuid_to_extract = [self.objects_id.id2uuid[object_id_to_extract] for object_id_to_extract in objects_id_to_extract]
+
+ self._load_camera_calibration()
+ lidar_calib = self.reader("lidar_calibration").compute()
+
+ lidar_df = self.reader("lidar").compute()
+ lidar_df = lidar_df[
+ (lidar_df["key.laser_name"] == _v2_lidar.LaserName.TOP.value) # Only lidar TOP is used
+ & (lidar_df["key.frame_timestamp_micros"].isin(self.select_ts.get_selected_ts()))
+ ]
+
+ lidar_pose_df = self.reader("lidar_pose").compute()
+
+ vehicle_pose_df = self.reader("vehicle_pose").compute()
+ vehicle_pose_df = vehicle_pose_df[
+ vehicle_pose_df["key.frame_timestamp_micros"].isin(self.select_ts.get_selected_ts())
+ ]
+
+ lidar_box_df = self.reader("lidar_box").compute()
+ lidar_box_df = lidar_box_df[lidar_box_df["key.frame_timestamp_micros"].isin(self.select_ts.get_selected_ts())]
+
+ pts_lidar_list = []
+ missing_pts_list = []
+ poses = []
+ times = []
+
+ # Neurad actor trajectories
+ actors:Dict[int,ActorsDict] = {}
+
+ for i, (_, r) in tqdm(enumerate(lidar_df.iterrows())):
+ LidarComp = v2.LiDARComponent.from_dict(r)
+ lidar_pose_df_ = lidar_pose_df[
+ (lidar_pose_df["key.frame_timestamp_micros"] == LidarComp.key.frame_timestamp_micros)
+ & (lidar_pose_df["key.laser_name"] == _v2_lidar.LaserName.TOP.value)
+ ]
+ LidarPoseComp = v2.LiDARPoseComponent.from_dict(lidar_pose_df_.iloc[0])
+ lidar_calib_ = lidar_calib[lidar_calib["key.laser_name"] == _v2_lidar.LaserName.TOP.value]
+ LidarCalibComp = v2.LiDARCalibrationComponent.from_dict(lidar_calib_.iloc[0])
+ vehicle_pose_df_ = vehicle_pose_df[
+ vehicle_pose_df["key.frame_timestamp_micros"] == LidarComp.key.frame_timestamp_micros
+ ]
+ VehiclePoseCom = v2.VehiclePoseComponent.from_dict(vehicle_pose_df_.iloc[0])
+
+ lidar_box_df_ = lidar_box_df[
+ (lidar_box_df["key.frame_timestamp_micros"] == LidarComp.key.frame_timestamp_micros)
+ & (lidar_box_df["key.laser_object_id"].isin(self.objects_id.dynamic_uuid))
+ ]
+
+ pts_lidar, missing_pts, _ = self.convert_range_image_to_point_cloud(
+ LidarComp.range_image_return1,
+ LidarCalibComp,
+ LidarPoseComp.range_image_return1,
+ VehiclePoseCom,
+ keep_polar_features=True,
+ )
+ missing_pts = missing_pts.numpy()
+
+ # compute timestamp for each lidar frame
+ time = LidarComp.key.frame_timestamp_micros / 1e6 + TIME_OFFSET # convert to seconds
+ times.append(time)
+
+ timestamps = get_mock_timestamps(pts_lidar[:, 3:6]) # (N, 6)->(..., x,y,z)
+ timestamps = np.expand_dims(timestamps, axis=1)
+
+ timestamps_miss = get_mock_timestamps(missing_pts[:, 3:6]) # (N, 6)->(..., x,y,z)
+ timestamps_miss = np.expand_dims(timestamps_miss, axis=1)
+
+ pts_lidar = pts_lidar.numpy()
+ intensity = pts_lidar[:, 1:2] # (range, intensity, elongation, x, y, z) => (N, 1)
+ intensity = self._normalize(intensity) # => [0.0, 1.0]
+
+ pts_lidar = np.hstack((pts_lidar[:, 3:6], np.ones((pts_lidar.shape[0], 1))))
+
+ pts_lidar_in_vehicle = pts_lidar
+ l2v = LidarCalibComp.extrinsic.transform.reshape(4, 4) # type: ignore
+ pts_lidar_sensor = (np.linalg.inv(l2v) @ pts_lidar_in_vehicle.T).T[:, :3]
+ v2w = VehiclePoseCom.world_from_vehicle.transform.reshape(4, 4) # type: ignore
+ l2w = v2w @ l2v
+
+ pts_lidar_world = (v2w @ pts_lidar_in_vehicle.T).T[:, :3]
+
+ lidar_box_df_selected_boxes = lidar_box_df_[lidar_box_df_['key.laser_object_id'].isin(objects_uuid_to_extract)]
+ for _, lidar_box in lidar_box_df_selected_boxes.iterrows():
+
+ v1_box = tf.transpose(
+ tf.constant(
+ [
+ lidar_box["[LiDARBoxComponent].box.center.x"],
+ lidar_box["[LiDARBoxComponent].box.center.y"],
+ lidar_box["[LiDARBoxComponent].box.center.z"],
+ lidar_box["[LiDARBoxComponent].box.size.x"],
+ lidar_box["[LiDARBoxComponent].box.size.y"],
+ lidar_box["[LiDARBoxComponent].box.size.z"],
+ lidar_box["[LiDARBoxComponent].box.heading"],
+ ],
+ dtype=tf.float32,
+ )
+ )
+ v1_box = tf.reshape(v1_box,(1,-1))
+ v1_box_world = box_utils.transform_box(
+ v1_box, VehiclePoseCom.world_from_vehicle.transform.reshape((4, 4)).astype("float32"), tf.eye(4) # type: ignore
+ )
+ mask_object = box_utils.is_within_box_3d(pts_lidar_world[:, :3], v1_box_world).numpy() # type: ignore
+ mask_object = np.any(mask_object, axis=1)
+
+ mean_ts_from_lidar_pts = timestamps[mask_object].mean() #timestamp of actor is taken from mean of lidar points inside the bbox
+ object_timestamp = time + mean_ts_from_lidar_pts if np.any(mask_object) else time #If no lidar points in box, timestamp of frame
+
+ # actor pose
+ # actor ids
+ uuids = lidar_box["key.laser_object_id"]
+ actor_id = self.objects_id.uuid2id[uuids]
+
+ # actor type
+ type_ = lidar_box["[LiDARBoxComponent].type"]
+ type_names = _v2_box.BoxType(type_).name
+
+ tr_object = np.eye(4)
+ tr_object[:3, :3] = transforms3d.euler.euler2mat(0, 0, v1_box_world.numpy().ravel()[6]) # type: ignore
+ tr_object[:3, 3] = v1_box_world.numpy().ravel()[:3] # type: ignore
+
+ if actor_id in actors:
+ actors[actor_id]["poses"].append(tr_object)
+ actors[actor_id]["timestamps"].append(object_timestamp)
+ else:
+ actors[actor_id] = {"poses": [tr_object], "timestamps": [object_timestamp], "label": type_names}
+
+ pts_lidar = np.hstack((pts_lidar_sensor, intensity, timestamps)) # => (N, 5) == (x, y, z, int, t)
+ pts_lidar_list.append(pts_lidar)
+
+ missing_intensity = np.zeros_like(missing_pts[:, 1:2]) # 0 for missing point intensity
+ missing_pts_list.append(np.hstack((missing_pts[:, 3:6], missing_intensity, timestamps_miss)))
+
+ poses.append(l2w)
+
+ return poses, pts_lidar_list, missing_pts_list, times, actors
+
+ def _normalize(self, points: np.ndarray) -> np.ndarray:
+ max_ = points.max()
+ min_ = points.min()
+
+ points = (points - min_) / (max_ - min_)
+ return points
diff --git a/nerfstudio/data/dataparsers/zod_dataparser.py b/nerfstudio/data/dataparsers/zod_dataparser.py
index 3487072d..2ec4273b 100644
--- a/nerfstudio/data/dataparsers/zod_dataparser.py
+++ b/nerfstudio/data/dataparsers/zod_dataparser.py
@@ -61,9 +61,6 @@
)
} # these are channel indices that correspond to a low elevation angle, as per the VLS128 manual.
-HOOD_HEIGHT = 740
-
-
HORIZONTAL_BEAM_DIVERGENCE = 3.0e-3 # radians, or meters at a distance of 1m
VERTICAL_BEAM_DIVERGENCE = 1.5e-3 # radians, or meters at a distance of 1m
HOOD_HEIGHT = 750 # px
@@ -194,7 +191,7 @@ class Zod(ADDataParser):
@property
def actor_transform(self) -> torch.Tensor:
"""ZOD uses x-forward, so we need to rotate to x-right."""
- return torch.from_numpy(WLH_TO_LWH)
+ return torch.from_numpy(WLH_TO_LWH)[:3, :]
def _get_lane_shift_sign(self, sequence: str) -> Literal[-1, 1]:
return LANE_SHIFT_SIGN.get(sequence, 1)
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 48f1bb0b..565638e3 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -60,8 +60,8 @@ class MetricTrackerConfig(InstantiateConfig):
_target: Type = field(default_factory=lambda: MetricTracker)
"""target class to instantiate"""
- metric: Optional[str] = "psnr"
- """The metric to track for early stopping and checkpoint saving."""
+ metric: Optional[str] = None
+ """The metric to track for early stopping and checkpoint saving. None means no metric tracking."""
higher_is_better: bool = True
"""Whether a higher value of the metric is better."""
margin: float = 0.0
@@ -76,6 +76,8 @@ def __init__(self, config: MetricTrackerConfig) -> None:
self.best, self.latest = None, None
def did_degrade(self, fallback: bool = False) -> bool:
+ if self.config.metric is None:
+ return False # no metric to track
if (self.latest is None) or (self.best is None):
return fallback # we can't tell
# apply margin to the best value (to be robust to noise in the metric)
@@ -87,10 +89,10 @@ def reset_latest(self) -> None:
def update(self, metrics: Dict[str, float]) -> None:
self.latest = metrics.get(self.config.metric, None) if self.config.metric else None
- if isinstance(self.latest, torch.Tensor):
- self.latest = self.latest.item()
if self.latest is None:
return
+ if isinstance(self.latest, torch.Tensor):
+ self.latest = self.latest.item()
if self.best is None:
self.best = self.latest
elif self._is_new_better(self.best, self.latest):
@@ -143,7 +145,6 @@ class TrainerConfig(ExperimentConfig):
gradient_accumulation_steps: Dict[str, int] = field(default_factory=lambda: {})
"""Number of steps to accumulate gradients over. Contains a mapping of {param_group:num}"""
-
class Trainer:
"""Trainer class
diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py
index 40a12212..44b3cca3 100644
--- a/nerfstudio/exporter/exporter_utils.py
+++ b/nerfstudio/exporter/exporter_utils.py
@@ -1,3 +1,4 @@
+# Copyright 2024 the authors of NeuRAD and contributors.
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,12 +20,12 @@
from __future__ import annotations
+import platform
import sys
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
import numpy as np
-import pymeshlab
import torch
from jaxtyping import Float
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn
@@ -42,6 +43,12 @@
# need it.
import open3d as o3d
+try:
+ import pymeshlab
+except ImportError:
+ if platform.machine() not in ["aarch64", "arm64"]:
+ raise # pymeshlab is not available on ARM, so import error is expected
+
@dataclass
class Mesh:
diff --git a/nerfstudio/exporter/tsdf_utils.py b/nerfstudio/exporter/tsdf_utils.py
index 30159131..48107e97 100644
--- a/nerfstudio/exporter/tsdf_utils.py
+++ b/nerfstudio/exporter/tsdf_utils.py
@@ -1,3 +1,4 @@
+# Copyright 2024 the authors of NeuRAD and contributors.
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,12 +20,12 @@
from __future__ import annotations
+import platform
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Tuple, Union
import numpy as np
-import pymeshlab
import torch
import torch.nn.functional as F
from jaxtyping import Bool, Float
@@ -35,6 +36,12 @@
from nerfstudio.pipelines.base_pipeline import Pipeline
from nerfstudio.utils.rich_utils import CONSOLE
+try:
+ import pymeshlab
+except ImportError:
+ if platform.machine() not in ["aarch64", "arm64"]:
+ raise # pymeshlab is not available on ARM, so import error is expected
+
TORCH_DEVICE = Union[torch.device, str]
diff --git a/nerfstudio/field_components/neurad_encoding.py b/nerfstudio/field_components/neurad_encoding.py
index 6e7b6d41..672a9331 100644
--- a/nerfstudio/field_components/neurad_encoding.py
+++ b/nerfstudio/field_components/neurad_encoding.py
@@ -26,6 +26,7 @@
from nerfstudio.model_components.dynamic_actors import DynamicActors
from nerfstudio.utils.math import GaussiansStd
from nerfstudio.utils.poses import inverse as pose_inverse
+from nerfstudio.utils.rich_utils import CONSOLE
EPS = 1.0e-7
@@ -106,8 +107,9 @@ def __init__(
max_res=config.static.max_res,
log2_hashmap_size=config.static.log2_hashmap_size,
)
-
- if config.actor.use_4d_hashgrid:
+ if config.actor.use_4d_hashgrid and implementation == "torch":
+ CONSOLE.print("4D hashgrid is not supported with torch implementation, falling back multiple grids.")
+ if config.actor.use_4d_hashgrid and implementation == "tcnn":
self._get_actor_features = self._get_actor_features_fast
n_grids, n_input_dims = 1, 4
else:
diff --git a/nerfstudio/model_components/dynamic_actors.py b/nerfstudio/model_components/dynamic_actors.py
index 3fbb4352..76520b02 100644
--- a/nerfstudio/model_components/dynamic_actors.py
+++ b/nerfstudio/model_components/dynamic_actors.py
@@ -52,6 +52,7 @@ def __init__(self, config: DynamicActorsConfig, trajectories: List[dict]):
"lateral": 0.0,
"longitudinal": 0.0,
"rotation": 0.0,
+ "index": -1.0,
}
self.actor_lateral_shift = ViewerSlider(
name="Actor lateral shift (m)",
@@ -80,6 +81,15 @@ def __init__(self, config: DynamicActorsConfig, trajectories: List[dict]):
cb_hook=lambda obj: self.actor_editing.update({"rotation": obj.value}),
)
+ self.actor_index_to_edit = ViewerSlider(
+ name="Actor index to edit",
+ default_value=self.actor_editing["index"],
+ min_value=-1.0,
+ max_value=len(trajectories),
+ step=1.0,
+ cb_hook=lambda obj: self.actor_editing.update({"index": obj.value}),
+ )
+
def actor_bounds(self):
return self.actor_sizes / 2 + self.actor_padding
@@ -145,8 +155,13 @@ def get_world2boxes(self, query_times: Tensor, flatten: bool = True):
def edit_boxes2world(self, boxes2world: Tensor):
with torch.no_grad():
+ if self.actor_editing["index"] == -1.0:
+ indices = torch.arange(self.n_actors, device=boxes2world.device)
+ else:
+ indices = torch.tensor([self.actor_editing["index"]], device=boxes2world.device, dtype=torch.int)
+
if abs(self.actor_editing["longitudinal"]) > 0.0 or abs(self.actor_editing["lateral"]) > 0.0:
- boxes2world[..., 3] = boxes2world @ torch.tensor(
+ boxes2world[:, indices, :, 3] = boxes2world[:, indices] @ torch.tensor(
[self.actor_editing["lateral"], self.actor_editing["longitudinal"], 0.0, 1.0],
device=boxes2world.device,
)
@@ -160,7 +175,7 @@ def edit_boxes2world(self, boxes2world: Tensor):
],
device=boxes2world.device,
)
- boxes2world[..., :3, :3] = rotation_yaw @ boxes2world[..., :3, :3]
+ boxes2world[:, indices, :3, :3] = rotation_yaw @ boxes2world[:, indices, :3, :3]
return boxes2world
def get_boxes2world(self, query_times: Tensor, flatten: bool = True):
diff --git a/nerfstudio/models/neurad.py b/nerfstudio/models/neurad.py
index acd9b983..b7854471 100644
--- a/nerfstudio/models/neurad.py
+++ b/nerfstudio/models/neurad.py
@@ -54,7 +54,9 @@
from nerfstudio.model_components.renderers import AccumulationRenderer, DepthRenderer, FeatureRenderer, NormalsRenderer
from nerfstudio.models.ad_model import ADModel, ADModelConfig
from nerfstudio.utils import colormaps
+from nerfstudio.utils.external import TCNN_EXISTS
from nerfstudio.utils.math import chamfer_distance
+from nerfstudio.utils.printing import print_tcnn_speed_warning
from nerfstudio.viewer.server.viewer_elements import ViewerSlider
EPS = 1e-7
@@ -172,6 +174,9 @@ class NeuRADModel(ADModel):
def populate_modules(self):
"""Set the fields and modules."""
super().populate_modules()
+ if self.config.implementation == "tcnn" and not TCNN_EXISTS:
+ print_tcnn_speed_warning("NeuRAD")
+ self.config.implementation = "torch"
self.field = self.config.field.setup(
actors=self.dynamic_actors,
static_scale=self.scene_box.aabb.max(),
diff --git a/nerfstudio/scripts/closed_loop/main.py b/nerfstudio/scripts/closed_loop/main.py
index b4f3c590..00006266 100644
--- a/nerfstudio/scripts/closed_loop/main.py
+++ b/nerfstudio/scripts/closed_loop/main.py
@@ -13,29 +13,31 @@
# limitations under the License.
from __future__ import annotations
+import base64
import io
+from typing import Literal, Union
import numpy as np
import torch
import tyro
import uvicorn
-from fastapi import FastAPI, Response
+from fastapi import FastAPI, HTTPException, Response
from PIL import Image
from torch import Tensor
-from nerfstudio.scripts.closed_loop.models import ActorTrajectory, RenderInput
+from nerfstudio.scripts.closed_loop.models import ActorTrajectory, ImageFormat, RenderInput
from nerfstudio.scripts.closed_loop.server import ClosedLoopServer
app = FastAPI()
@app.get("/alive")
-async def alive() -> bool:
+def alive() -> bool:
return True
@app.get("/get_actors")
-async def get_actors() -> list[ActorTrajectory]:
+def get_actors() -> list[ActorTrajectory]:
"""Get actor trajectories."""
actor_trajectories = cl_server.get_actor_trajectories()
actor_trajectories = [ActorTrajectory.from_torch(act_traj) for act_traj in actor_trajectories]
@@ -43,31 +45,54 @@ async def get_actors() -> list[ActorTrajectory]:
@app.post("/update_actors")
-async def update_actors(actor_trajectories: list[ActorTrajectory]) -> None:
+def update_actors(actor_trajectories: list[ActorTrajectory]) -> None:
"""Update actor trajectories (keys correspond to actor uuids)."""
torch_actor_trajectories = [act_traj.to_torch() for act_traj in actor_trajectories]
cl_server.update_actor_trajectories(torch_actor_trajectories)
-@app.post("/render_image", response_class=Response, responses={200: {"content": {"image/png": {}}}})
-async def render_image(data: RenderInput) -> Response:
+@app.post(
+ "/render_image",
+ response_class=Response,
+ responses={200: {"content": {"text/plain": {}, "image/png": {}, "image/jpeg": {}}}},
+)
+def get_image(data: RenderInput) -> Response:
torch_pose = torch.tensor(data.pose, dtype=torch.float32)
render = cl_server.get_image(torch_pose, data.timestamp, data.camera_name)
- return Response(content=_torch_to_png(render), media_type="image/png")
+ if data.image_format == ImageFormat.raw:
+ return Response(content=_torch_to_bytestr(render), media_type="text/plain")
+ elif data.image_format == ImageFormat.png:
+ return Response(content=_torch_to_img(render, "png"), media_type="image/png")
+ elif data.image_format in (ImageFormat.jpg, ImageFormat.jpeg):
+ return Response(content=_torch_to_img(render, "jpeg"), media_type="text/jpeg")
+ else:
+ raise HTTPException(
+ status_code=400, detail=f"Invalid image format: {data.image_format}, must be 'raw', 'png', 'jpg', or 'jpeg'"
+ )
@app.get("/start_time")
-async def get_start_time() -> int:
+def get_start_time() -> int:
return int(cl_server.min_time * 1e6)
-def _torch_to_png(render: Tensor) -> bytes:
- """Convert a torch tensor to a PNG image."""
+def _torch_to_bytestr(render: Tensor) -> bytes:
+ """Convert a torch tensor to a base64 encoded bytestring."""
+ buff = io.BytesIO()
+ img = (render * 255).to(torch.uint8).cpu()
+ torch.save(img, buff)
+ return base64.b64encode(buff.getvalue())
+
+
+def _torch_to_img(render: Tensor, format: Union[Literal["jpeg"], Literal["png"]]) -> bytes:
+ """Convert a torch tensor to a PNG or JPG image."""
+ if format not in ("jpeg", "png"):
+ raise ValueError(f"Invalid format: {format}")
+
img = Image.fromarray((render * 255).cpu().numpy().astype(np.uint8))
- image_stream = io.BytesIO()
- img.save(image_stream, format="PNG")
- image_bytes = image_stream.getvalue()
- return image_bytes
+ buff = io.BytesIO()
+ img.save(buff, format=format.upper())
+ return buff.getvalue()
if __name__ == "__main__":
diff --git a/nerfstudio/scripts/closed_loop/models.py b/nerfstudio/scripts/closed_loop/models.py
index 50297a0b..16490c10 100644
--- a/nerfstudio/scripts/closed_loop/models.py
+++ b/nerfstudio/scripts/closed_loop/models.py
@@ -13,6 +13,7 @@
# limitations under the License.
from __future__ import annotations
+from enum import Enum
from typing import List, TypedDict
import torch
@@ -20,6 +21,13 @@
from torch import Tensor
+class ImageFormat(str, Enum):
+ raw = "raw" # will return a raw tensor, works good when sending across same machine
+ png = "png" # more suitable if sent over network
+ jpg = "jpg" # more suitable if sent over network, pseudo for jpeg
+ jpeg = "jpeg" # more suitable if sent over network
+
+
class TrajectoryDict(TypedDict):
uuid: str
poses: Tensor
@@ -66,3 +74,5 @@ class RenderInput(BaseModel):
"""Timestamp in microseconds"""
camera_name: str
"""Camera name"""
+ image_format: ImageFormat = ImageFormat.raw
+ """What format to return the image in. Defaults to raw tensor."""
diff --git a/nerfstudio/scripts/closed_loop/server.py b/nerfstudio/scripts/closed_loop/server.py
index 14bf6f16..507a16b3 100644
--- a/nerfstudio/scripts/closed_loop/server.py
+++ b/nerfstudio/scripts/closed_loop/server.py
@@ -113,7 +113,7 @@ def get_image(self, pose: Tensor, timestamp: int, camera_name: str) -> Tensor:
)
correction_matrices = self.model.camera_optimizer(
torch.tensor([nearest_train_cam_idx], device=self.model.device)
- )
+ ).to(self.model.device)
ray_bundle.origins = ray_bundle.origins + correction_matrices[:, :3, 3]
ray_bundle.directions = torch.einsum("ij,...j->...i", correction_matrices[0, :3, :3], ray_bundle.directions)
@@ -125,12 +125,11 @@ def update_actor_trajectories(self, new_trajectories: list[TrajectoryDict]):
device = self.model.device
modified_trajectories = []
actor_ids = []
- actor_transform = self.actor_transform.to(device)
for traj in new_trajectories:
timestamps_in_seconds = traj["timestamps"].to(torch.float64) / 1e6
modified_trajectories.append(
{
- "poses": self.world_transform @ traj["poses"].to(device) @ actor_transform,
+ "poses": self.world_transform @ traj["poses"].to(device) @ self.actor_transform,
"timestamps": (timestamps_in_seconds - self.min_time).to(device),
"dims": traj["dims"].to(device),
"symmetric": True, # TODO
@@ -159,7 +158,7 @@ def get_actor_trajectories(self) -> list[TrajectoryDict]:
for actor_idx in range(poses.shape[1]):
trajs.append(
{
- "poses": (world_inverse @ poses[:, actor_idx]).cpu() @ actor_transform_inv,
+ "poses": (world_inverse @ poses[:, actor_idx] @ actor_transform_inv).cpu(),
"timestamps": timestamps.cpu().long(),
"dims": self.model.dynamic_actors.actor_sizes[actor_idx].cpu(),
"uuid": self.actor_uuids[actor_idx],
diff --git a/nerfstudio/scripts/completions/install.py b/nerfstudio/scripts/completions/install.py
index 67e0189d..14c746b9 100644
--- a/nerfstudio/scripts/completions/install.py
+++ b/nerfstudio/scripts/completions/install.py
@@ -1,3 +1,4 @@
+# Copyright 2024 the authors of NeuRAD and contributors.
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -45,7 +46,7 @@
def _get_all_entry_points() -> List[str]:
# TODO: we should filter out entrypoints that are not tyro CLIs.
- entry_points = importlib_metadata.distribution("nerfstudio").entry_points
+ entry_points = importlib_metadata.distribution("neurad-studio").entry_points
return [x.name for x in entry_points]
diff --git a/nerfstudio/scripts/datasets/process_project_aria.py b/nerfstudio/scripts/datasets/process_project_aria.py
deleted file mode 100644
index fe487483..00000000
--- a/nerfstudio/scripts/datasets/process_project_aria.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import sys
-import threading
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, cast
-
-import numpy as np
-import open3d as o3d
-import tyro
-from PIL import Image
-
-try:
- from projectaria_tools.core import mps
- from projectaria_tools.core.data_provider import VrsDataProvider, create_vrs_data_provider
- from projectaria_tools.core.mps.utils import filter_points_from_confidence
- from projectaria_tools.core.sophus import SE3
-except ImportError:
- print("projectaria_tools import failed, please install with pip3 install projectaria-tools'[all]'")
- sys.exit(1)
-
-ARIA_CAMERA_MODEL = "FISHEYE624"
-
-# The Aria coordinate system is different than the Blender/NerfStudio coordinate system.
-# Blender / Nerfstudio: +Z = back, +Y = up, +X = right
-# Surreal: +Z = forward, +Y = down, +X = right
-T_ARIA_NERFSTUDIO = SE3.from_matrix(
- np.array(
- [
- [1.0, 0.0, 0.0, 0.0],
- [0.0, -1.0, 0.0, 0.0],
- [0.0, 0.0, -1.0, 0.0],
- [0.0, 0.0, 0.0, 1.0],
- ]
- )
-)
-
-
-@dataclass
-class AriaCameraCalibration:
- fx: float
- fy: float
- cx: float
- cy: float
- distortion_params: np.ndarray
- width: int
- height: int
- t_device_camera: SE3
-
-
-@dataclass
-class AriaImageFrame:
- camera: AriaCameraCalibration
- file_path: str
- t_world_camera: SE3
- timestamp_ns: float
-
-
-@dataclass
-class TimedPoses:
- timestamps_ns: np.ndarray
- t_world_devices: List[SE3]
-
-
-def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibration]:
- """Retrieve the per-camera factory calibration from within the VRS."""
-
- factory_calib = {}
- name = "camera-rgb"
- device_calib = provider.get_device_calibration()
- assert device_calib is not None, "Could not find device calibration"
- sensor_calib = device_calib.get_camera_calib(name)
- assert sensor_calib is not None, f"Could not find sensor calibration for {name}"
-
- width = sensor_calib.get_image_size()[0].item()
- height = sensor_calib.get_image_size()[1].item()
- intrinsics = sensor_calib.projection_params()
-
- factory_calib[name] = AriaCameraCalibration(
- fx=intrinsics[0],
- fy=intrinsics[0],
- cx=intrinsics[1],
- cy=intrinsics[2],
- distortion_params=intrinsics[3:15],
- width=width,
- height=height,
- t_device_camera=sensor_calib.get_transform_device_camera(),
- )
-
- return factory_calib
-
-
-def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses:
- closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv) # type: ignore
-
- timestamps_secs, poses = zip(
- *[(it.tracking_timestamp.total_seconds(), it.transform_world_device) for it in closed_loop_traj]
- )
-
- SEC_TO_NANOSEC = 1e9
- return TimedPoses(
- timestamps_ns=(np.array(timestamps_secs) * SEC_TO_NANOSEC).astype(int),
- t_world_devices=poses,
- )
-
-
-def to_aria_image_frame(
- provider: VrsDataProvider,
- index: int,
- name_to_camera: Dict[str, AriaCameraCalibration],
- t_world_devices: TimedPoses,
- output_dir: Path,
-) -> AriaImageFrame:
- name = "camera-rgb"
-
- camera_calibration = name_to_camera[name]
- stream_id = provider.get_stream_id_from_label(name)
- assert stream_id is not None, f"Could not find stream {name}"
-
- # Get the image corresponding to this index
- image_data = provider.get_image_data_by_index(stream_id, index)
- img = Image.fromarray(image_data[0].to_numpy_array())
- capture_time_ns = image_data[1].capture_timestamp_ns
-
- file_path = f"{output_dir}/{name}_{capture_time_ns}.jpg"
- threading.Thread(target=lambda: img.save(file_path)).start()
-
- # Find the nearest neighbor pose with the closest timestamp to the capture time.
- nearest_pose_idx = np.searchsorted(t_world_devices.timestamps_ns, capture_time_ns)
- nearest_pose_idx = np.minimum(nearest_pose_idx, len(t_world_devices.timestamps_ns) - 1)
- assert nearest_pose_idx != -1, f"Could not find pose for {capture_time_ns}"
- t_world_device = t_world_devices.t_world_devices[nearest_pose_idx]
-
- # Compute the world to camera transform.
- t_world_camera = t_world_device @ camera_calibration.t_device_camera @ T_ARIA_NERFSTUDIO
-
- return AriaImageFrame(
- camera=camera_calibration,
- file_path=file_path,
- t_world_camera=t_world_camera,
- timestamp_ns=capture_time_ns,
- )
-
-
-def to_nerfstudio_frame(frame: AriaImageFrame) -> Dict:
- return {
- "fl_x": frame.camera.fx,
- "fl_y": frame.camera.fy,
- "cx": frame.camera.cx,
- "cy": frame.camera.cy,
- "distortion_params": frame.camera.distortion_params.tolist(),
- "w": frame.camera.width,
- "h": frame.camera.height,
- "file_path": frame.file_path,
- "transform_matrix": frame.t_world_camera.to_matrix().tolist(),
- "timestamp": frame.timestamp_ns,
- }
-
-
-@dataclass
-class ProcessProjectAria:
- """Processes Project Aria data i.e. a VRS of the raw recording streams and the MPS attachments
- that provide poses, calibration, and 3d points. More information on MPS data can be found at:
- https://facebookresearch.github.io/projectaria_tools/docs/ARK/mps.
- """
-
- vrs_file: Path
- """Path to the VRS file."""
- mps_data_dir: Path
- """Path to Project Aria Machine Perception Services (MPS) attachments."""
- output_dir: Path
- """Path to the output directory."""
-
- def main(self) -> None:
- """Generate a nerfstudio dataset from ProjectAria data (VRS) and MPS attachments."""
- # Create output directory if it doesn't exist.
- self.output_dir = self.output_dir.absolute()
- self.output_dir.mkdir(parents=True, exist_ok=True)
-
- provider = create_vrs_data_provider(str(self.vrs_file.absolute()))
- assert provider is not None, "Cannot open file"
-
- name_to_camera = get_camera_calibs(provider)
-
- print("Getting poses from closed loop trajectory CSV...")
- trajectory_csv = self.mps_data_dir / "closed_loop_trajectory.csv"
- t_world_devices = read_trajectory_csv_to_dict(str(trajectory_csv.absolute()))
-
- name = "camera-rgb"
- stream_id = provider.get_stream_id_from_label(name)
-
- # create an AriaImageFrame for each image in the VRS.
- print("Creating Aria frames...")
- aria_frames = [
- to_aria_image_frame(provider, index, name_to_camera, t_world_devices, self.output_dir)
- for index in range(0, provider.get_num_data(stream_id))
- ]
-
- # create the NerfStudio frames from the AriaImageFrames.
- print("Creating NerfStudio frames...")
- CANONICAL_RGB_VALID_RADIUS = 707.5
- CANONICAL_RGB_WIDTH = 1408
- rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (aria_frames[0].camera.width / CANONICAL_RGB_WIDTH)
- nerfstudio_frames = {
- "camera_model": ARIA_CAMERA_MODEL,
- "frames": [to_nerfstudio_frame(frame) for frame in aria_frames],
- "fisheye_crop_radius": rgb_valid_radius,
- }
-
- # save global point cloud, which is useful for Gaussian Splatting.
- points_path = self.mps_data_dir / "global_points.csv.gz"
- if not points_path.exists():
- # MPS point cloud output was renamed in Aria's December 4th, 2023 update.
- # https://facebookresearch.github.io/projectaria_tools/docs/ARK/sw_release_notes#project-aria-updates-aria-mobile-app-v140-and-changes-to-mps
- points_path = self.mps_data_dir / "semidense_points.csv.gz"
-
- if points_path.exists():
- print("Found global points, saving to PLY...")
- points_data = mps.read_global_point_cloud(str(points_path)) # type: ignore
- points_data = filter_points_from_confidence(points_data)
- pcd = o3d.geometry.PointCloud()
- pcd.points = o3d.utility.Vector3dVector(np.array([cast(Any, it).position_world for it in points_data]))
- ply_file_path = self.output_dir / "global_points.ply"
- o3d.io.write_point_cloud(str(ply_file_path), pcd)
-
- nerfstudio_frames["ply_file_path"] = "global_points.ply"
- else:
- print("No global points found!")
-
- # write the json out to disk as transforms.json
- print("Writing transforms.json")
- transform_file = self.output_dir / "transforms.json"
- with open(transform_file, "w", encoding="UTF-8"):
- transform_file.write_text(json.dumps(nerfstudio_frames))
-
-
-if __name__ == "__main__":
- tyro.extras.set_accent_color("bright_yellow")
- tyro.cli(ProcessProjectAria).main()
diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
index 467ad88f..4e3337f7 100644
--- a/nerfstudio/scripts/exporter.py
+++ b/nerfstudio/scripts/exporter.py
@@ -1,3 +1,4 @@
+# Copyright 2024 the authors of NeuRAD and contributors.
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,7 +17,6 @@
Script for exporting NeRF into other formats.
"""
-
from __future__ import annotations
import json
@@ -40,7 +40,6 @@
from nerfstudio.exporter import texture_utils, tsdf_utils
from nerfstudio.exporter.exporter_utils import collect_camera_poses, generate_point_cloud, get_mesh_from_filename
from nerfstudio.exporter.marching_cubes import generate_mesh_with_multires_marching_cubes
-from nerfstudio.fields.sdf_field import SDFField # noqa
from nerfstudio.models.splatfacto import SplatfactoModel
from nerfstudio.pipelines.base_pipeline import Pipeline, VanillaPipeline
from nerfstudio.utils.eval_utils import eval_setup
diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py
index 9956a908..ffbc284f 100644
--- a/nerfstudio/scripts/process_data.py
+++ b/nerfstudio/scripts/process_data.py
@@ -1,3 +1,4 @@
+# Copyright 2024 the authors of NeuRAD and contributors.
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -493,29 +494,6 @@ def main(self) -> None:
Annotated[ProcessODM, tyro.conf.subcommand(name="odm")],
]
-# Add aria subcommand if projectaria_tools is installed.
-try:
- import projectaria_tools
-except ImportError:
- projectaria_tools = None
-
-if projectaria_tools is not None:
- from nerfstudio.scripts.datasets.process_project_aria import ProcessProjectAria
-
- # Note that Union[A, Union[B, C]] == Union[A, B, C].
- Commands = Union[Commands, Annotated[ProcessProjectAria, tyro.conf.subcommand(name="aria")]]
-else:
- Commands = Union[
- Commands,
- Annotated[
- NotInstalled,
- tyro.conf.subcommand(
- name="aria",
- description="**Not installed.** Processing Project Aria data requires `pip install projectaria_tools'[all]'`.",
- ),
- ],
- ]
-
def entrypoint():
"""Entrypoint for use with pyproject scripts."""
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 13b29f69..91ac52c9 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -34,6 +34,7 @@
import mediapy as media
import numpy as np
+import plotly.graph_objs as go
import torch
import tyro
import viser.transforms as tf
@@ -619,8 +620,10 @@ class RenderInterpolated(BaseRender):
"""Shift to apply to all actor poses."""
actor_removal_time: Optional[float] = None
"""Time at which to remove all actors."""
- actor_stop_time: Optional[float] = 3.2
+ actor_stop_time: Optional[float] = None
"""Time at which to stop all actors."""
+ actor_indices: Optional[List[int]] = None
+ """Indices of actors to modify. If None, modify all actors."""
def main(self) -> None:
"""Main function."""
@@ -662,7 +665,7 @@ def main(self) -> None:
sensor_indices = torch.tensor([0]).unsqueeze(0)
cameras.metadata["sensor_idxs"] = torch.zeros_like(cameras.camera_type, dtype=torch.int64)
- modify_actors(pipeline, self.actor_shift, self.actor_removal_time, self.actor_stop_time)
+ modify_actors(pipeline, self.actor_shift, self.actor_removal_time, self.actor_stop_time, self.actor_indices)
for sensor_index in sensor_indices:
mask = (cameras.metadata["sensor_idxs"] == sensor_index).squeeze(-1)
@@ -722,19 +725,24 @@ def main(self) -> None:
)
-def modify_actors(pipeline, actor_shift, actor_removal_time, actor_stop_time):
+def modify_actors(pipeline, actor_shift, actor_removal_time, actor_stop_time, actor_indices):
actor_shift = torch.nn.Parameter(torch.tensor(actor_shift, dtype=torch.float32, device=pipeline.model.device))
with torch.no_grad():
- pipeline.model.dynamic_actors.actor_positions += actor_shift
+ if actor_indices is not None:
+ indices = torch.tensor(actor_indices, device=pipeline.model.device, dtype=torch.int)
+ else:
+ indices = torch.arange(pipeline.model.dynamic_actors.actor_positions.shape[1], device=pipeline.model.device)
+
+ pipeline.model.dynamic_actors.actor_positions[:, indices, :] += actor_shift
if actor_removal_time is not None:
no_actor_mask = pipeline.model.dynamic_actors.unique_timestamps > actor_removal_time
- pipeline.model.dynamic_actors.actor_present_at_time[no_actor_mask, :] = False
+ pipeline.model.dynamic_actors.actor_present_at_time[no_actor_mask, indices] = False
if actor_stop_time is not None:
actor_stop_idx = torch.searchsorted(pipeline.model.dynamic_actors.unique_timestamps, actor_stop_time)
- freeze_position = pipeline.model.dynamic_actors.actor_positions[actor_stop_idx].unsqueeze(0)
- freeze_rotation = pipeline.model.dynamic_actors.actor_rotations_6d[actor_stop_idx].unsqueeze(0)
- pipeline.model.dynamic_actors.actor_positions[actor_stop_idx:] = freeze_position
- pipeline.model.dynamic_actors.actor_rotations_6d[actor_stop_idx:] = freeze_rotation
+ freeze_position = pipeline.model.dynamic_actors.actor_positions[actor_stop_idx, indices].unsqueeze(0)
+ freeze_rotation = pipeline.model.dynamic_actors.actor_rotations_6d[actor_stop_idx, indices].unsqueeze(0)
+ pipeline.model.dynamic_actors.actor_positions[actor_stop_idx:, indices] = freeze_position
+ pipeline.model.dynamic_actors.actor_rotations_6d[actor_stop_idx:, indices] = freeze_rotation
def get_shifted_camera_path(cameras, shift, shift_time, shift_steps, interpolation_steps):
@@ -882,11 +890,24 @@ class DatasetRender(BaseRender):
shift: Tuple[float, float, float] = (0, 0, 0)
"""Shift to apply to the camera pose."""
+
+ actor_shift: Tuple[float, ...] = (0.0, 0.0, 0.0)
+ """Shift to apply to all actor poses."""
+ actor_removal_time: Optional[float] = None
+ """Time at which to remove all actors."""
+ actor_stop_time: Optional[float] = None
+ """Time at which to stop all actors."""
+ actor_indices: Optional[List[int]] = None
+ """Indices of actors to modify. If None, modify all actors."""
+
calculate_and_save_metrics: bool = False
"""Whether to calculate and save metrics."""
metrics_filename: Path = Path("metrics.pkl")
"""Filename to save the metrics to."""
+ render_point_clouds: bool = False
+ """Whether to render point clouds."""
+
def main(self):
config: TrainerConfig
@@ -921,6 +942,8 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
data_manager_config = config.pipeline.datamanager
assert isinstance(data_manager_config, (VanillaDataManagerConfig, FullImageDatamanagerConfig))
+ modify_actors(pipeline, self.actor_shift, self.actor_removal_time, self.actor_stop_time, self.actor_indices)
+
self.output_path.mkdir(exist_ok=True, parents=True)
metrics_out = dict()
for split in self.pose_source.split("+"):
@@ -932,12 +955,14 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
dataset = datamanager.train_dataset
dataparser_outputs = getattr(dataset, "_dataparser_outputs", datamanager.train_dataparser_outputs)
+ lidar_dataset = datamanager.train_lidar_dataset
else:
with _disable_datamanager_setup(data_manager_config._target): # pylint: disable=protected-access
datamanager = data_manager_config.setup(test_mode=split, device=pipeline.device)
dataset = datamanager.eval_dataset
dataparser_outputs = getattr(dataset, "_dataparser_outputs", None)
+ lidar_dataset = datamanager.eval_lidar_dataset
if dataparser_outputs is None:
dataparser_outputs = datamanager.dataparser.get_dataparser_outputs(split=datamanager.test_split)
dataset.cameras.height = (
@@ -964,6 +989,11 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
device=datamanager.device,
num_workers=datamanager.world_size * 4,
)
+ lidar_dataloader = FixedIndicesEvalDataloader(
+ dataset=lidar_dataset,
+ device=datamanager.device,
+ num_workers=datamanager.world_size * 4,
+ )
images_root = Path(os.path.commonpath(dataparser_outputs.image_filenames))
with Progress(
TextColumn(f":movie_camera: Rendering split {split} :movie_camera:"),
@@ -1106,6 +1136,39 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
else:
raise ValueError(f"Unknown image format {self.image_format}")
+ if self.render_point_clouds:
+ with Progress(
+ TextColumn(f":movie_camera: Rendering lidars for split {split} :movie_camera:"),
+ BarColumn(),
+ TaskProgressColumn(
+ text_format="[progress.percentage]{task.completed}/{task.total:>.0f}({task.percentage:>3.1f}%)",
+ show_speed=True,
+ ),
+ ItersPerSecColumn(suffix="fps"),
+ TimeRemainingColumn(elapsed_when_finished=False, compact=False),
+ TimeElapsedColumn(),
+ ) as progress:
+ with torch.no_grad():
+ output_path = self.output_path / split / "lidar"
+ output_path.mkdir(exist_ok=True, parents=True)
+ for lidar_idx, (lidar, batch) in enumerate(
+ progress.track(lidar_dataloader, total=len(lidar_dataloader))
+ ):
+ lidar_output, _ = pipeline.model.get_outputs_for_lidar(lidar, batch=batch)
+ points_in_local = lidar_output["points"]
+ if "ray_drop_prob" in lidar_output:
+ points_in_local = points_in_local[(lidar_output["ray_drop_prob"] < 0.5).squeeze(-1)]
+
+ points_in_world = to_world(lidar.lidar_to_worlds[0], points_in_local)
+ # get ground truth for comparison
+ gt_point_in_world = to_world(lidar.lidar_to_worlds[0], batch["lidar"][..., :3])
+ plot_lidar_points(
+ gt_point_in_world.cpu().detach().numpy(), output_path / f"gt-lidar_{lidar_idx}.png"
+ )
+ plot_lidar_points(
+ points_in_world.cpu().detach().numpy(), output_path / f"lidar_{lidar_idx}.png"
+ )
+
if self.calculate_and_save_metrics:
metrics_out_path = Path(self.output_path, self.metrics_filename)
with open(metrics_out_path, "wb") as f:
@@ -1123,6 +1186,89 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
CONSOLE.print(Panel(table, title="[bold][green]:tada: Render on split {} Complete :tada:[/bold]", expand=False))
+def to_world(l2w, points):
+ points_in_world = (
+ l2w
+ @ torch.cat(
+ [
+ points,
+ torch.ones_like(points[..., :1]),
+ ],
+ dim=-1,
+ ).unsqueeze(-1)
+ ).squeeze()
+ return points_in_world
+
+
+def plot_lidar_points(points, output_path, cmin=-6.0, cmax=5.0, width=1920, height=1080, ranges=[100, 200, 10]):
+ x = points[:, 0]
+ y = points[:, 1]
+ z = points[:, 2]
+
+ # Create a 3D scatter plot
+ trace = go.Scatter3d(
+ x=x,
+ y=y,
+ z=z,
+ mode="markers",
+ marker=dict(
+ size=1.0,
+ color=z,
+ colorscale="Viridis",
+ opacity=0.8,
+ cmin=cmin,
+ cmax=cmax,
+ ),
+ )
+
+ x_range, y_range, z_range = ranges
+
+ # Compute the aspect ratio
+ max_range = 2 * max(x_range, y_range, z_range)
+ aspect_ratio = dict(x=x_range / max_range, y=y_range / max_range, z=z_range / max_range)
+
+ # Define the camera position
+ camera = dict(
+ up=dict(x=0, y=0, z=1),
+ center=dict(x=0, y=0, z=0),
+ eye=dict(x=0.0, y=-0.07, z=0.02),
+ )
+ layout = go.Layout(
+ scene=dict(
+ xaxis=dict(
+ title="",
+ range=[-x_range, x_range],
+ showticklabels=False,
+ ticks="",
+ showline=False,
+ showgrid=False,
+ ),
+ yaxis=dict(
+ title="",
+ range=[-y_range, y_range],
+ showticklabels=False,
+ ticks="",
+ showline=False,
+ showgrid=False,
+ ),
+ zaxis=dict(
+ title="",
+ range=[-z_range, z_range],
+ showticklabels=False,
+ ticks="",
+ showline=False,
+ showgrid=False,
+ ),
+ aspectmode="manual",
+ aspectratio=aspect_ratio,
+ camera=camera,
+ )
+ )
+
+ fig = go.Figure(data=[trace], layout=layout)
+ fig.write_image(output_path, width=width, height=height, scale=1)
+
+
def streamline_ad_config(config):
if getattr(config.pipeline.datamanager, "num_processes", None):
config.pipeline.datamanager.num_processes = 0
diff --git a/nerfstudio/viewer/viewer.py b/nerfstudio/viewer/viewer.py
index a1b614d8..72a61de4 100644
--- a/nerfstudio/viewer/viewer.py
+++ b/nerfstudio/viewer/viewer.py
@@ -13,7 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-""" Manage the state of the viewer """
+"""Manage the state of the viewer"""
+
from __future__ import annotations
import threading
@@ -135,7 +136,7 @@ def __init__(
viser.theme.TitlebarButton(
text="Github",
icon="GitHub",
- href="https://github.com/nerfstudio-project/nerfstudio",
+ href="https://github.com/georghess/neurad-studio",
),
viser.theme.TitlebarButton(
text="Documentation",
@@ -144,10 +145,10 @@ def __init__(
),
)
image = viser.theme.TitlebarImage(
- image_url_light="https://docs.nerf.studio/_static/imgs/logo.png",
- image_url_dark="https://docs.nerf.studio/_static/imgs/logo-dark.png",
- image_alt="NerfStudio Logo",
- href="https://docs.nerf.studio/",
+ image_url_light="https://raw.githubusercontent.com/georghess/neurad-studio/main/docs/_static/imgs/neurad_logo_horizontal_light.png",
+ image_url_dark="https://raw.githubusercontent.com/georghess/neurad-studio/main/docs/_static/imgs/neurad_logo_horizontal_dark.png",
+ image_alt="Neurad Logo",
+ href="https://github.com/georghess/neurad-studio",
)
titlebar_theme = viser.theme.TitlebarConfig(buttons=buttons, image=image)
self.viser_server.configure_theme(
diff --git a/pyproject.toml b/pyproject.toml
index 7ed90adb..283721f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,12 +3,12 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
-name = "nerfstudio"
-version = "1.0.3"
-description = "All-in-one repository for state-of-the-art NeRFs"
+name = "neurad-studio"
+version = "0.1.0"
+description = "Neural Rendering methods that are specialized for Autonomous Driving (NeuRAD)."
readme = "README.md"
license = { text="Apache 2.0"}
-requires-python = ">=3.8.0"
+requires-python = ">=3.10.0"
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python",
@@ -63,19 +63,16 @@ dependencies = [
"xatlas",
"trimesh>=3.20.2",
"timm==0.6.7",
- "gsplat>=0.1.9",
+ "gsplat==0.1.11",
"pytorch-msssim",
"pathos",
"packaging",
"zod>=0.1.7",
"pandaset@git+https://github.com/scaleapi/pandaset-devkit.git#egg=pandaset&subdirectory=python",
"av2==0.2.1",
+ "fastapi[all]==0.110",
]
-[project.urls]
-"Documentation" = "https://docs.nerf.studio"
-
-
[project.optional-dependencies]
# Generative related dependencies
@@ -100,10 +97,6 @@ dev = [
"diffusers==0.16.1",
"opencv-stubs==0.0.7",
"transformers==4.29.2",
- "pyright==1.1.331",
- # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels
- # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/
- "projectaria-tools>=1.3.1; sys_platform != 'win32'",
# pin torch to <=2.1 to fix https://github.com/pytorch/pytorch/issues/118736
"torch>=1.13.1,<2.2",
]
@@ -142,7 +135,7 @@ ns-dev-sync-viser-message-defs = "nerfstudio.scripts.viewer.sync_viser_message_d
include = ["nerfstudio*"]
[tool.setuptools.package-data]
-"*" = ["*.cu", "*.json", "py.typed", "setup.bash", "setup.zsh"]
+"*" = ["*.cu", "*.json", "py.typed", "setup.bash", "setup.zsh", "*.yaml"]
[tool.pytest.ini_options]
addopts = "-n=4 --typeguard-packages=nerfstudio --disable-warnings"
@@ -161,7 +154,7 @@ reportMissingImports = "warning"
reportMissingTypeStubs = false
reportPrivateImportUsage = false
-pythonVersion = "3.8"
+pythonVersion = "3.10"
pythonPlatform = "Linux"
[tool.ruff]
diff --git a/tests/data/configs/test_config1.yml b/tests/data/configs/test_config1.yml
deleted file mode 100644
index 67ce18cd..00000000
--- a/tests/data/configs/test_config1.yml
+++ /dev/null
@@ -1,192 +0,0 @@
-!!python/object:nerfstudio.engine.trainer.TrainerConfig
-_target: !!python/name:nerfstudio.engine.trainer.Trainer ''
-data: null
-experiment_name: unnamed
-load_checkpoint: !!python/object/apply:pathlib.PosixPath
-- outputs
-- unnamed
-- nerfacto
-- 2023-05-24_204526
-- config.yml
-load_config: null
-load_dir: null
-load_step: null
-log_gradients: false
-logging: !!python/object:nerfstudio.configs.base_config.LoggingConfig
- local_writer: !!python/object:nerfstudio.configs.base_config.LocalWriterConfig
- _target: !!python/name:nerfstudio.utils.writer.LocalWriter ''
- enable: true
- max_log_size: 10
- stats_to_track: !!python/tuple
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Train Iter (time)
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Train Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Test PSNR
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Vis Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Test Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - ETA (time)
- max_buffer_size: 20
- profiler: basic
- relative_log_dir: !!python/object/apply:pathlib.PosixPath []
- steps_per_log: 10
-machine: !!python/object:nerfstudio.configs.base_config.MachineConfig
- dist_url: auto
- machine_rank: 0
- num_devices: 1
- num_machines: 1
- seed: 42
-max_num_iterations: 30000
-method_name: nerfacto
-mixed_precision: true
-optimizers:
- fields:
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: &id001 !!python/name:torch.optim.adam.Adam ''
- eps: 1.0e-15
- lr: 0.01
- max_norm: null
- weight_decay: 0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: &id002 !!python/name:nerfstudio.engine.schedulers.ExponentialDecayScheduler ''
- lr_final: 0.0001
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
- proposal_networks:
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: *id001
- eps: 1.0e-15
- lr: 0.01
- max_norm: null
- weight_decay: 0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: *id002
- lr_final: 0.0001
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
-output_dir: !!python/object/apply:pathlib.PosixPath
-- outputs
-pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConfig
- _target: !!python/name:nerfstudio.pipelines.base_pipeline.VanillaPipeline ''
- datamanager: !!python/object:nerfstudio.data.datamanagers.base_datamanager.VanillaDataManagerConfig
- _target: !!python/name:nerfstudio.data.datamanagers.base_datamanager.VanillaDataManager ''
- camera_optimizer: !!python/object:nerfstudio.cameras.camera_optimizers.CameraOptimizerConfig
- _target: !!python/name:nerfstudio.cameras.camera_optimizers.CameraOptimizer ''
- mode: 'off'
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: *id001
- eps: 1.0e-08
- lr: 0.0006
- max_norm: null
- weight_decay: 0.01
- orientation_noise_std: 0.0
- param_group: camera_opt
- position_noise_std: 0.0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: *id002
- lr_final: 6.0e-06
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
- camera_res_scale_factor: 1.0
- collate_fn: !!python/name:nerfstudio.data.utils.nerfstudio_collate.nerfstudio_collate ''
- data: null
- dataparser: !!python/object:nerfstudio.data.dataparsers.minimal_dataparser.MinimalDataParserConfig
- _target: !!python/name:nerfstudio.data.dataparsers.minimal_dataparser.MinimalDataParser ''
- data: !!python/object/apply:pathlib.PosixPath
- - /
- - home
- - kulhajon
- - projects
- - nerf-playground
- - data
- - varun
- - cheezit-scaled
- eval_num_images_to_sample_from: -1
- eval_num_rays_per_batch: 4096
- eval_num_times_to_repeat_images: -1
- patch_size: 1
- train_num_images_to_sample_from: -1
- train_num_rays_per_batch: 4096
- train_num_times_to_repeat_images: -1
- model: !!python/object:nerfstudio.models.nerfacto.NerfactoModelConfig
- _target: !!python/name:nerfstudio.models.nerfacto.NerfactoModel ''
- background_color: last_sample
- collider_params:
- far_plane: 6.0
- near_plane: 2.0
- disable_scene_contraction: false
- distortion_loss_mult: 0.002
- enable_collider: true
- eval_num_rays_per_chunk: 32768
- far_plane: 1000.0
- hidden_dim: 64
- hidden_dim_color: 64
- hidden_dim_transient: 64
- interlevel_loss_mult: 1.0
- log2_hashmap_size: 19
- loss_coefficients:
- rgb_loss_coarse: 1.0
- rgb_loss_fine: 1.0
- max_res: 2048
- near_plane: 0.05
- num_levels: 16
- num_nerf_samples_per_ray: 48
- num_proposal_iterations: 2
- num_proposal_samples_per_ray: !!python/tuple
- - 256
- - 96
- orientation_loss_mult: 0.0001
- pred_normal_loss_mult: 0.001
- predict_normals: false
- proposal_initial_sampler: piecewise
- proposal_net_args_list:
- - hidden_dim: 16
- log2_hashmap_size: 17
- max_res: 128
- num_levels: 5
- use_linear: false
- - hidden_dim: 16
- log2_hashmap_size: 17
- max_res: 256
- num_levels: 5
- use_linear: false
- proposal_update_every: 5
- proposal_warmup: 5000
- proposal_weights_anneal_max_num_iters: 1000
- proposal_weights_anneal_slope: 10.0
- use_average_appearance_embedding: true
- use_gradient_scaling: false
- use_proposal_weight_anneal: true
- use_same_proposal_network: false
- use_single_jitter: true
-project_name: nerfstudio-project
-relative_model_dir: !!python/object/apply:pathlib.PosixPath
-- nerfstudio_models
-save_only_latest_checkpoint: true
-steps_per_eval_all_images: 25000
-steps_per_eval_batch: 500
-steps_per_eval_image: 500
-steps_per_save: 2000
-timestamp: 2023-05-24_205104
-use_grad_scaler: false
-viewer: !!python/object:nerfstudio.configs.base_config.ViewerConfig
- image_format: jpeg
- jpeg_quality: 90
- max_num_display_images: 512
- num_rays_per_chunk: 32768
- quit_on_train_completion: false
- relative_log_filename: viewer_log_filename.txt
- websocket_host: 0.0.0.0
- websocket_port: null
- websocket_port_default: 7007
-vis: viewer+wandb
diff --git a/tests/data/configs/test_config2.yml b/tests/data/configs/test_config2.yml
deleted file mode 100644
index 90737789..00000000
--- a/tests/data/configs/test_config2.yml
+++ /dev/null
@@ -1,186 +0,0 @@
-!!python/object:nerfstudio.engine.trainer.TrainerConfig
-_target: !!python/name:nerfstudio.engine.trainer.Trainer ''
-data: null
-experiment_name: unnamed
-load_checkpoint: null
-load_config: null
-load_dir: null
-load_step: null
-log_gradients: false
-logging: !!python/object:nerfstudio.configs.base_config.LoggingConfig
- local_writer: !!python/object:nerfstudio.configs.base_config.LocalWriterConfig
- _target: !!python/name:nerfstudio.utils.writer.LocalWriter ''
- enable: true
- max_log_size: 10
- stats_to_track: !!python/tuple
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Train Iter (time)
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Train Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Test PSNR
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Vis Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - Test Rays / Sec
- - !!python/object/apply:nerfstudio.utils.writer.EventName
- - ETA (time)
- max_buffer_size: 20
- profiler: basic
- relative_log_dir: !!python/object/apply:pathlib.PosixPath []
- steps_per_log: 10
-machine: !!python/object:nerfstudio.configs.base_config.MachineConfig
- dist_url: auto
- machine_rank: 0
- num_devices: 1
- num_machines: 1
- seed: 42
-max_num_iterations: 30000
-method_name: nerfacto
-mixed_precision: true
-optimizers:
- fields:
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: &id001 !!python/name:torch.optim.adam.Adam ''
- eps: 1.0e-15
- lr: 0.01
- max_norm: null
- weight_decay: 0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: &id002 !!python/name:nerfstudio.engine.schedulers.ExponentialDecayScheduler ''
- lr_final: 0.0001
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
- proposal_networks:
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: *id001
- eps: 1.0e-15
- lr: 0.01
- max_norm: null
- weight_decay: 0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: *id002
- lr_final: 0.0001
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
-output_dir: !!python/object/apply:pathlib.PosixPath
-- outputs
-pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConfig
- _target: !!python/name:nerfstudio.pipelines.base_pipeline.VanillaPipeline ''
- datamanager: !!python/object:nerfstudio.data.datamanagers.base_datamanager.VanillaDataManagerConfig
- _target: !!python/name:nerfstudio.data.datamanagers.base_datamanager.VanillaDataManager ''
- camera_optimizer: !!python/object:nerfstudio.cameras.camera_optimizers.CameraOptimizerConfig
- _target: !!python/name:nerfstudio.cameras.camera_optimizers.CameraOptimizer ''
- mode: 'off'
- optimizer: !!python/object:nerfstudio.engine.optimizers.AdamOptimizerConfig
- _target: *id001
- eps: 1.0e-08
- lr: 0.0006
- max_norm: null
- weight_decay: 0.01
- orientation_noise_std: 0.0
- param_group: camera_opt
- position_noise_std: 0.0
- scheduler: !!python/object:nerfstudio.engine.schedulers.ExponentialDecaySchedulerConfig
- _target: *id002
- lr_final: 6.0e-06
- lr_pre_warmup: 1.0e-08
- max_steps: 200000
- ramp: cosine
- warmup_steps: 0
- camera_res_scale_factor: 1.0
- data: null
- dataparser: !!python/object:nerfstudio.data.dataparsers.minimal_dataparser.MinimalDataParserConfig
- _target: !!python/name:nerfstudio.data.dataparsers.minimal_dataparser.MinimalDataParser ''
- data: !!python/object/apply:pathlib.PosixPath
- - /
- - home
- - kulhajon
- - projects
- - nerf-playground
- - data
- - varun
- - cheezit-scaled
- eval_num_images_to_sample_from: -1
- eval_num_rays_per_batch: 4096
- eval_num_times_to_repeat_images: -1
- patch_size: 1
- train_num_images_to_sample_from: -1
- train_num_rays_per_batch: 4096
- train_num_times_to_repeat_images: -1
- model: !!python/object:nerfstudio.models.nerfacto.NerfactoModelConfig
- _target: !!python/name:nerfstudio.models.nerfacto.NerfactoModel ''
- background_color: last_sample
- collider_params:
- far_plane: 6.0
- near_plane: 2.0
- disable_scene_contraction: false
- distortion_loss_mult: 0.002
- enable_collider: true
- eval_num_rays_per_chunk: 32768
- far_plane: 1000.0
- hidden_dim: 64
- hidden_dim_color: 64
- hidden_dim_transient: 64
- interlevel_loss_mult: 1.0
- log2_hashmap_size: 19
- loss_coefficients:
- rgb_loss_coarse: 1.0
- rgb_loss_fine: 1.0
- max_res: 2048
- near_plane: 0.05
- num_levels: 16
- num_nerf_samples_per_ray: 48
- num_proposal_iterations: 2
- num_proposal_samples_per_ray: !!python/tuple
- - 256
- - 96
- orientation_loss_mult: 0.0001
- pred_normal_loss_mult: 0.001
- predict_normals: false
- proposal_initial_sampler: piecewise
- proposal_net_args_list:
- - hidden_dim: 16
- log2_hashmap_size: 17
- max_res: 128
- num_levels: 5
- use_linear: false
- - hidden_dim: 16
- log2_hashmap_size: 17
- max_res: 256
- num_levels: 5
- use_linear: false
- proposal_update_every: 5
- proposal_warmup: 5000
- proposal_weights_anneal_max_num_iters: 1000
- proposal_weights_anneal_slope: 10.0
- use_average_appearance_embedding: true
- use_gradient_scaling: false
- use_proposal_weight_anneal: true
- use_same_proposal_network: false
- use_single_jitter: true
-project_name: nerfstudio-project
-relative_model_dir: !!python/object/apply:pathlib.PosixPath
-- nerfstudio_models
-save_only_latest_checkpoint: true
-steps_per_eval_all_images: 25000
-steps_per_eval_batch: 500
-steps_per_eval_image: 500
-steps_per_save: 2000
-timestamp: 2023-05-24_204526
-use_grad_scaler: false
-viewer: !!python/object:nerfstudio.configs.base_config.ViewerConfig
- image_format: jpeg
- jpeg_quality: 90
- max_num_display_images: 512
- num_rays_per_chunk: 32768
- quit_on_train_completion: false
- relative_log_filename: viewer_log_filename.txt
- websocket_host: 0.0.0.0
- websocket_port: null
- websocket_port_default: 7007
-vis: viewer+wandb
diff --git a/tests/data/lego_test/train/r_0.png b/tests/data/lego_test/train/r_0.png
deleted file mode 100644
index ab93a6d2..00000000
Binary files a/tests/data/lego_test/train/r_0.png and /dev/null differ
diff --git a/tests/data/lego_test/transforms_train.json b/tests/data/lego_test/transforms_train.json
deleted file mode 100644
index c398a6b5..00000000
--- a/tests/data/lego_test/transforms_train.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "camera_angle_x": 0.6911112070083618,
- "frames": [
- {
- "file_path": "./train/r_0",
- "rotation": 0.012566370614359171,
- "transform_matrix": [
- [
- -0.9999021887779236,
- 0.004192245192825794,
- -0.013345719315111637,
- -0.05379832163453102
- ],
- [
- -0.013988681137561798,
- -0.2996590733528137,
- 0.95394366979599,
- 3.845470428466797
- ],
- [
- -4.656612873077393e-10,
- 0.9540371894836426,
- 0.29968830943107605,
- 1.2080823183059692
- ],
- [
- 0.0,
- 0.0,
- 0.0,
- 1.0
- ]
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/tests/data/lego_test/transforms_val.json b/tests/data/lego_test/transforms_val.json
deleted file mode 100644
index b41732ec..00000000
--- a/tests/data/lego_test/transforms_val.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "camera_angle_x": 0.6911112070083618,
- "frames": [
- {
- "file_path": "./val/r_0",
- "rotation": 0.012566370614359171,
- "transform_matrix": [
- [
- -0.963964581489563,
- -0.2611401677131653,
- 0.0507759265601635,
- 0.2046843022108078
- ],
- [
- 0.26603081822395325,
- -0.9462433457374573,
- 0.18398693203926086,
- 0.7416750192642212
- ],
- [
- 7.450580596923828e-09,
- 0.1908649355173111,
- 0.9816163182258606,
- 3.957021951675415
- ],
- [
- 0.0,
- 0.0,
- 0.0,
- 1.0
- ]
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/tests/data/lego_test/val/r_0.png b/tests/data/lego_test/val/r_0.png
deleted file mode 100644
index ab93a6d2..00000000
Binary files a/tests/data/lego_test/val/r_0.png and /dev/null differ
diff --git a/tests/data/minimal_parser/train.npz b/tests/data/minimal_parser/train.npz
deleted file mode 100644
index 9d8acbb1..00000000
Binary files a/tests/data/minimal_parser/train.npz and /dev/null differ
diff --git a/tests/data/minimal_parser/val.npz b/tests/data/minimal_parser/val.npz
deleted file mode 100644
index 9d8acbb1..00000000
Binary files a/tests/data/minimal_parser/val.npz and /dev/null differ
diff --git a/tests/data/test_datamanager.py b/tests/data/test_datamanager.py
deleted file mode 100644
index 0547b736..00000000
--- a/tests/data/test_datamanager.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import pickle
-from pathlib import Path
-from typing import Any
-
-import pytest
-import torch
-import yaml
-
-from nerfstudio.cameras.cameras import Cameras
-from nerfstudio.configs.base_config import InstantiateConfig
-from nerfstudio.data.datamanagers.base_datamanager import (
- DataparserOutputs,
- VanillaDataManager,
- VanillaDataManagerConfig,
-)
-from nerfstudio.data.datasets.base_dataset import InputDataset
-from nerfstudio.data.datasets.depth_dataset import DepthDataset
-
-
-class DummyDataParser:
- def __init__(self, *args, **kwargs):
- pass
-
- def __getattr__(self, __name: str) -> Any:
- if __name.startswith("_"):
- return object.__getattribute__(self, __name)
- return None
-
- def get_dataparser_outputs(self, *args, **kwargs):
- return DataparserOutputs(
- [],
- Cameras(
- torch.ones((0, 3, 4)),
- torch.ones((0, 1)),
- torch.ones((0, 1)),
- torch.ones((0, 1)),
- torch.ones((0, 1)),
- 10,
- 10,
- ),
- metadata={"depth_filenames": [], "depth_unit_scale_factor": 1.0},
- )
-
-
-@pytest.fixture
-def config():
- config = VanillaDataManagerConfig()
- setattr(config, "dataparser", InstantiateConfig(_target=DummyDataParser))
- setattr(config.dataparser, "data", None)
- return config
-
-
-def test_data_manager_type_inference(config):
- # Mock for a faster test
-
- assert VanillaDataManager[DepthDataset](config).dataset_type is DepthDataset
- assert VanillaDataManager(config).dataset_type is InputDataset
-
- class tmp2(VanillaDataManager[DepthDataset]):
- pass
-
- assert tmp2(config).dataset_type is DepthDataset
-
- class tmp(VanillaDataManager):
- pass
-
- assert tmp(config).dataset_type is InputDataset
-
-
-class _pickle_enabled_tmp(VanillaDataManager):
- pass
-
-
-class _pickle_enabled_tmp2(VanillaDataManager[DepthDataset]):
- pass
-
-
-def test_data_manager_type_can_be_pickled(config):
- # Mock for a faster test
- assert VanillaDataManager[DepthDataset](config).dataset_type is DepthDataset
- obj = pickle.loads(pickle.dumps(VanillaDataManager[DepthDataset](config)))
- assert obj.dataset_type is DepthDataset
- assert isinstance(obj, VanillaDataManager)
-
- assert VanillaDataManager(config).dataset_type is InputDataset
- obj = pickle.loads(pickle.dumps(VanillaDataManager(config)))
- assert obj.dataset_type is InputDataset
- assert isinstance(obj, VanillaDataManager)
-
- assert _pickle_enabled_tmp(config).dataset_type is InputDataset
- obj = pickle.loads(pickle.dumps(_pickle_enabled_tmp(config)))
- assert obj.dataset_type is InputDataset
- assert isinstance(obj, _pickle_enabled_tmp)
-
- assert _pickle_enabled_tmp2(config).dataset_type is DepthDataset
- obj = pickle.loads(pickle.dumps(_pickle_enabled_tmp2(config)))
- assert obj.dataset_type is DepthDataset
- assert isinstance(obj, _pickle_enabled_tmp2)
-
-
-def test_data_manager_type_can_be_serialized(config):
- # Mock for a faster test
-
- assert VanillaDataManager(config).dataset_type is InputDataset
- obj = yaml.load(yaml.dump(VanillaDataManager(config)), Loader=yaml.UnsafeLoader)
- assert obj.dataset_type is InputDataset
- assert isinstance(obj, VanillaDataManager)
-
- assert VanillaDataManager[DepthDataset](config).dataset_type is DepthDataset
- obj = yaml.load(yaml.dump(VanillaDataManager[DepthDataset](config)), Loader=yaml.UnsafeLoader)
- assert obj.dataset_type is DepthDataset
- assert isinstance(obj, VanillaDataManager)
-
- class tmp(VanillaDataManager):
- pass
-
- try:
- globals()["tmp"] = tmp
- assert tmp(config).dataset_type is InputDataset
- obj = yaml.load(yaml.dump(tmp(config)), Loader=yaml.UnsafeLoader)
- assert obj.dataset_type is InputDataset
- assert isinstance(obj, tmp)
- finally:
- globals().pop("tmp")
-
- class tmp2(VanillaDataManager[DepthDataset]):
- pass
-
- try:
- globals()["tmp2"] = tmp2
-
- assert tmp2(config).dataset_type is DepthDataset
- obj = yaml.load(yaml.dump(tmp2(config)), Loader=yaml.UnsafeLoader)
- assert obj.dataset_type is DepthDataset
- assert isinstance(obj, tmp2)
- finally:
- globals().pop("tmp2")
-
-
-def _dummy_function():
- return True
-
-
-def test_deserialize_config1():
- with open(Path(__file__).parent / "configs" / "test_config1.yml", "r") as f:
- config_str = f.read()
- obj = yaml.load(config_str, Loader=yaml.Loader)
- obj.pipeline.datamanager.collate_fn([1, 2, 3])
-
-
-def test_deserialize_config2():
- with open(Path(__file__).parent / "configs" / "test_config2.yml", "r") as f:
- config_str = f.read()
- obj = yaml.load(config_str, Loader=yaml.Loader)
- obj.pipeline.datamanager.collate_fn([1, 2, 3])
diff --git a/tests/dataparsers/test_nerfstudio_dataparser.py b/tests/dataparsers/test_nerfstudio_dataparser.py
deleted file mode 100644
index 3f7bf06d..00000000
--- a/tests/dataparsers/test_nerfstudio_dataparser.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""
-Nerfstudio dataparser
-"""
-
-import json
-from pathlib import Path
-
-import numpy as np
-import pytest
-from PIL import Image
-from pytest import fixture
-
-
-@fixture
-def mocked_dataset(tmp_path: Path):
- """Mocked dataset with transforms"""
- (tmp_path / "images_4").mkdir()
- frames = []
- for i in range(10):
- Image.new("RGB", (100, 150)).save(tmp_path / "images_4" / f"img_{i}.png")
- frames.append(
- {
- "file_path": f"img_{i}.png",
- "transform_matrix": np.eye(4).tolist(),
- }
- )
- with (tmp_path / "transforms.json").open("w+", encoding="utf8") as f:
- json.dump({"fl_x": 2, "fl_y": 3, "cx": 4, "cy": 5, "h": 150, "w": 100, "frames": frames}, f)
- return tmp_path
-
-
-@pytest.mark.parametrize("orientation_method", ["up", "none", "pca"])
-def test_nerfstudio_dataparser_no_filelist(mocked_dataset, orientation_method):
- """Tests basic load"""
- assert (mocked_dataset / "images_4").exists()
- from nerfstudio.data.dataparsers.nerfstudio_dataparser import (
- DataparserOutputs,
- Nerfstudio,
- NerfstudioDataParserConfig,
- )
-
- parser: Nerfstudio = NerfstudioDataParserConfig(
- data=mocked_dataset,
- downscale_factor=4,
- orientation_method=orientation_method,
- center_method="none",
- auto_scale_poses=False,
- ).setup()
-
- paths = set()
- for split in ("train", "test", "val"):
- out = parser.get_dataparser_outputs(split)
- assert isinstance(out, DataparserOutputs)
- assert len(out.image_filenames) > 0
- paths.update(out.image_filenames)
- train_files = set(parser.get_dataparser_outputs("train").image_filenames)
- assert len(train_files.intersection(parser.get_dataparser_outputs("val").image_filenames)) == 0
- assert len(train_files.intersection(parser.get_dataparser_outputs("test").image_filenames)) == 0
- assert len(paths) == 10
-
-
-def test_nerfstudio_dataparser_split_filelist(mocked_dataset):
- """Tests basic load"""
- assert (mocked_dataset / "images_4").exists()
- with open(mocked_dataset / "transforms.json", "r+") as f:
- data = json.load(f)
- data["train_filenames"] = ["img_0.png", "img_1.png"]
- data["val_filenames"] = ["img_2.png", "img_3.png"]
- data["test_filenames"] = ["img_4.png", "img_5.png"]
- f.seek(0)
- f.truncate(0)
- json.dump(data, f)
-
- from nerfstudio.data.dataparsers.nerfstudio_dataparser import Nerfstudio, NerfstudioDataParserConfig
-
- parser: Nerfstudio = NerfstudioDataParserConfig(
- data=mocked_dataset,
- downscale_factor=4,
- orientation_method="none",
- center_method="none",
- auto_scale_poses=False,
- ).setup()
-
- assert parser.get_dataparser_outputs("train").image_filenames == [
- mocked_dataset / "images_4/img_0.png",
- mocked_dataset / "images_4/img_1.png",
- ]
- assert parser.get_dataparser_outputs("val").image_filenames == [
- mocked_dataset / "images_4/img_2.png",
- mocked_dataset / "images_4/img_3.png",
- ]
- assert parser.get_dataparser_outputs("test").image_filenames == [
- mocked_dataset / "images_4/img_4.png",
- mocked_dataset / "images_4/img_5.png",
- ]
diff --git a/tests/model_components/test_renderers.py b/tests/model_components/test_renderers.py
index a1e09838..b0933e52 100644
--- a/tests/model_components/test_renderers.py
+++ b/tests/model_components/test_renderers.py
@@ -69,7 +69,7 @@ def test_depth_renderer():
ray_samples = RaySamples(
frustums=frustums,
- camera_indices=torch.ones((num_samples, 1)),
+ camera_indices=torch.ones((num_samples, 1), dtype=torch.long),
deltas=torch.ones((num_samples, 1)),
)
diff --git a/tests/pipelines/test_vanilla_pipeline.py b/tests/pipelines/test_vanilla_pipeline.py
index 3bb73e06..93cc5995 100644
--- a/tests/pipelines/test_vanilla_pipeline.py
+++ b/tests/pipelines/test_vanilla_pipeline.py
@@ -36,6 +36,10 @@ def to(self, *args, **kwargs):
"Mocked to"
return self
+ def get_num_train_data(self):
+ """Mocked get_num_train_data"""
+ return 0
+
def test_load_state_dict():
"""Test pipeline load_state_dict calls model's load_state_dict"""
diff --git a/tests/plugins/test_registry.py b/tests/plugins/test_registry.py
index c17d88e1..9a973c75 100644
--- a/tests/plugins/test_registry.py
+++ b/tests/plugins/test_registry.py
@@ -19,10 +19,12 @@
@dataclass
class TestConfigClass(MethodSpecification):
- config: TrainerConfig = TrainerConfig(
- method_name="test-method",
- pipeline=VanillaPipelineConfig(),
- optimizers={},
+ config: TrainerConfig = field(
+ default_factory=lambda: TrainerConfig(
+ method_name="test-method",
+ pipeline=field(default_factory=VanillaPipelineConfig),
+ optimizers={},
+ )
)
description: str = "Test description"
diff --git a/tests/process_data/test_process_images.py b/tests/process_data/test_process_images.py
deleted file mode 100644
index fd506b1f..00000000
--- a/tests/process_data/test_process_images.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
-Process images test
-"""
-import os
-from pathlib import Path
-
-import numpy as np
-import torch
-from PIL import Image
-
-from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
-from nerfstudio.data.utils.colmap_parsing_utils import (
- Camera,
- Image as ColmapImage,
- Point3D,
- qvec2rotmat,
- write_cameras_binary,
- write_images_binary,
- write_points3D_binary,
-)
-from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset
-
-
-def random_quaternion(num_poses: int):
- """
- Generates random rotation quaternion.
- """
- u, v, w = np.random.uniform(size=(3, num_poses))
- quaternion = np.stack(
- (
- np.sqrt(1 - u) * np.sin(2 * np.pi * v),
- np.sqrt(1 - u) * np.cos(2 * np.pi * v),
- np.sqrt(u) * np.sin(2 * np.pi * w),
- np.sqrt(u) * np.cos(2 * np.pi * w),
- ),
- -1,
- )
- return quaternion
-
-
-def test_process_images_skip_colmap(tmp_path: Path):
- """
- Test ns-process-data images
- """
- # Mock a colmap sparse model
- width = 100
- height = 150
- sparse_path = tmp_path / "sparse" / "0"
- sparse_path.mkdir(exist_ok=True, parents=True)
- (tmp_path / "images").mkdir(exist_ok=True, parents=True)
- write_cameras_binary(
- {1: Camera(1, "OPENCV", width, height, [110, 110, 50, 75, 0, 0, 0, 0, 0, 0])},
- sparse_path / "cameras.bin",
- )
- write_points3D_binary(
- {
- 1: Point3D(
- id=1,
- xyz=np.array([0, 0, 0]),
- rgb=np.array([0, 0, 0]),
- error=np.array([0]),
- image_ids=np.array([1]),
- point2D_idxs=np.array([0]),
- ),
- },
- sparse_path / "points3D.bin",
- )
- frames = {}
- num_frames = 10
- qvecs = random_quaternion(num_frames)
- tvecs = np.random.uniform(size=(num_frames, 3))
- original_poses = np.concatenate(
- (
- np.concatenate(
- (
- np.stack(list(map(qvec2rotmat, qvecs))),
- tvecs[:, :, None],
- ),
- -1,
- ),
- np.array([[[0, 0, 0, 1]]], dtype=qvecs.dtype).repeat(num_frames, 0),
- ),
- -2,
- )
- for i in range(num_frames):
- frames[i + 1] = ColmapImage(i + 1, qvecs[i], tvecs[i], 1, f"image_{i}.png", [], [])
- Image.new("RGB", (width, height)).save(tmp_path / "images" / f"image_{i}.png")
- write_images_binary(frames, sparse_path / "images.bin")
-
- # Mock missing COLMAP and ffmpeg in the dev env
- old_path = os.environ.get("PATH", "")
- os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
- (tmp_path / "mocked_bin").mkdir()
- (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
- (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)
-
- # Convert images into a NerfStudio dataset
- cmd = ImagesToNerfstudioDataset(
- data=tmp_path / "images", output_dir=tmp_path / "nerfstudio", colmap_model_path=sparse_path, skip_colmap=True
- )
- cmd.main()
- os.environ["PATH"] = old_path
-
- assert (tmp_path / "nerfstudio" / "transforms.json").exists()
- parser = NerfstudioDataParserConfig(
- data=tmp_path / "nerfstudio",
- downscale_factor=None,
- orientation_method="none", # orientation_method,
- center_method="none",
- auto_scale_poses=False,
- ).setup()
- outputs = parser.get_dataparser_outputs("train")
- assert len(outputs.image_filenames) == 9
- assert torch.is_tensor(outputs.dataparser_transform)
-
- # Test if the original poses can be obtained back
- gt_poses = original_poses[[0, 1, 2, 3, 4, 5, 6, 7, 9]]
- dataparser_poses = outputs.transform_poses_to_original_space(outputs.cameras.camera_to_worlds, "opencv").numpy()
- dataparser_poses = np.concatenate(
- (dataparser_poses, np.array([[[0, 0, 0, 1]]]).repeat(len(dataparser_poses), 0)), 1
- )
- dataparser_poses = np.linalg.inv(dataparser_poses)
- np.testing.assert_allclose(gt_poses, dataparser_poses, rtol=0, atol=1e-5)
-
-
-def test_process_images_recursively_skip_colmap(tmp_path: Path):
- """
- Test ns-process-data images when images contains subdirectories"
- """
- # Mock a colmap sparse model
- width = 100
- height = 150
- sparse_path = tmp_path / "sparse" / "0"
- sparse_path.mkdir(exist_ok=True, parents=True)
- (tmp_path / "images").mkdir(exist_ok=True, parents=True)
- write_cameras_binary(
- {1: Camera(1, "OPENCV", width, height, [110, 110, 50, 75, 0, 0, 0, 0, 0, 0])},
- sparse_path / "cameras.bin",
- )
- write_points3D_binary(
- {
- 1: Point3D(
- id=1,
- xyz=np.array([0, 0, 0]),
- rgb=np.array([0, 0, 0]),
- error=np.array([0]),
- image_ids=np.array([1]),
- point2D_idxs=np.array([0]),
- ),
- },
- sparse_path / "points3D.bin",
- )
- frames = {}
- num_frames = 9
- num_subdirs = 3
- qvecs = random_quaternion(num_frames)
- tvecs = np.random.uniform(size=(num_frames, 3))
- original_poses = np.concatenate(
- (
- np.concatenate(
- (
- np.stack(list(map(qvec2rotmat, qvecs))),
- tvecs[:, :, None],
- ),
- -1,
- ),
- np.array([[[0, 0, 0, 1]]], dtype=qvecs.dtype).repeat(num_frames, 0),
- ),
- -2,
- )
- for i in range(num_frames):
- subdir = f"subdir_{num_frames // num_subdirs}"
- frames[i + 1] = ColmapImage(i + 1, qvecs[i], tvecs[i], 1, f"{subdir}/image_{i}.png", [], [])
- (tmp_path / "images" / subdir).mkdir(parents=True, exist_ok=True)
- Image.new("RGB", (width, height)).save(tmp_path / "images" / subdir / f"image_{i}.png")
- write_images_binary(frames, sparse_path / "images.bin")
-
- # Mock missing COLMAP and ffmpeg in the dev env
- old_path = os.environ.get("PATH", "")
- os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
- (tmp_path / "mocked_bin").mkdir()
- (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
- (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)
-
- # Convert images into a NerfStudio dataset
- cmd = ImagesToNerfstudioDataset(
- data=tmp_path / "images", output_dir=tmp_path / "nerfstudio", colmap_model_path=sparse_path, skip_colmap=True
- )
- cmd.main()
- os.environ["PATH"] = old_path
-
- assert (tmp_path / "nerfstudio" / "transforms.json").exists()
- parser = NerfstudioDataParserConfig(
- data=tmp_path / "nerfstudio",
- downscale_factor=None,
- orientation_method="none", # orientation_method,
- center_method="none",
- auto_scale_poses=False,
- ).setup()
- outputs = parser.get_dataparser_outputs("train")
- assert len(outputs.image_filenames) == 9
- assert torch.is_tensor(outputs.dataparser_transform)
-
- # Test if the original poses can be obtained back
- dataparser_poses = outputs.transform_poses_to_original_space(outputs.cameras.camera_to_worlds, "opencv").numpy()
- dataparser_poses = np.concatenate(
- (dataparser_poses, np.array([[[0, 0, 0, 1]]]).repeat(len(dataparser_poses), 0)), 1
- )
- dataparser_poses = np.linalg.inv(dataparser_poses)
- np.testing.assert_allclose(original_poses, dataparser_poses, rtol=0, atol=1e-5)
diff --git a/tests/test_train.py b/tests/test_train.py
deleted file mode 100644
index f1b3faf9..00000000
--- a/tests/test_train.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Default test to make sure train runs
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-import pytest
-
-from nerfstudio.configs.method_configs import method_configs
-from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
-from nerfstudio.data.dataparsers.minimal_dataparser import MinimalDataParserConfig
-from nerfstudio.engine.trainer import TrainerConfig
-from nerfstudio.models.vanilla_nerf import VanillaModelConfig
-from nerfstudio.scripts.train import train_loop
-
-BLACKLIST = [
- "base",
- "semantic-nerfw",
- "instant-ngp",
- "instant-ngp-bounded",
- "nerfacto-big",
- "phototourism",
- "depth-nerfacto",
- "neus",
- "generfacto",
- "neus-facto",
- "splatfacto",
- "splatfacto-big",
-]
-
-
-def set_reduced_config(config: TrainerConfig, tmp_path: Path):
- """Reducing the config settings to speedup test"""
- config.machine.device_type = "cpu"
- if hasattr(config.pipeline.model, "implementation"):
- setattr(config.pipeline.model, "implementation", "torch")
- config.mixed_precision = False
- config.use_grad_scaler = False
- config.max_num_iterations = 2
- # reduce dataset factors; set dataset to test
- config.pipeline.datamanager.dataparser = BlenderDataParserConfig(data=Path("tests/data/lego_test"))
- config.pipeline.datamanager.train_num_images_to_sample_from = 1
- config.pipeline.datamanager.train_num_rays_per_batch = 4
-
- # use tensorboard logging instead of wandb
- config.vis = "tensorboard"
- config.logging.relative_log_dir = Path("/tmp/")
-
- # reduce model factors
- if hasattr(config.pipeline.model, "num_coarse_samples"):
- assert isinstance(config.pipeline.model, VanillaModelConfig)
- config.pipeline.model.num_coarse_samples = 4
- if hasattr(config.pipeline.model, "num_importance_samples"):
- assert isinstance(config.pipeline.model, VanillaModelConfig)
- config.pipeline.model.num_importance_samples = 4
- # remove viewer
- config.viewer.quit_on_train_completion = True
-
- # timestamp & output directory
- config.set_timestamp()
- config.output_dir = tmp_path / "outputs"
-
- return config
-
-
-@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-def test_train(tmp_path: Path):
- """test run train script works properly"""
- all_config_names = method_configs.keys()
- for config_name in all_config_names:
- if config_name in BLACKLIST:
- print("skipping", config_name)
- continue
- print(f"testing run for: {config_name}")
- config = method_configs[config_name]
- config = set_reduced_config(config, tmp_path)
-
- train_loop(local_rank=0, world_size=0, config=config)
-
-
-def test_simple_io(tmp_path: Path):
- """test to check minimal data IO works correctly"""
- config = method_configs["vanilla-nerf"]
- config.pipeline.datamanager.dataparser = MinimalDataParserConfig(data=Path("tests/data/minimal_parser"))
- config = set_reduced_config(config, tmp_path)
- train_loop(local_rank=0, world_size=0, config=config)
-
-
-if __name__ == "__main__":
- test_train()
- test_simple_io()