From df53a72d4b8cc298d7d2b5931c5a47eacb5a7991 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 11 Mar 2024 14:23:47 -0400 Subject: [PATCH 01/19] CPU gsplat implementation scaffold --- CMakeLists.txt | 45 ++++++--- gsplat.hpp | 5 + model.cpp | 2 +- project_gaussians.cpp | 2 +- rasterize_gaussians.cpp | 2 +- simple_trainer.cpp | 27 ----- spherical_harmonics.cpp | 2 +- vendor/gsplat-cpu/bindings.h | 130 ++++++++++++++++++++++++ vendor/gsplat-cpu/gsplat_cpu.cpp | 163 +++++++++++++++++++++++++++++++ 9 files changed, 333 insertions(+), 45 deletions(-) create mode 100644 gsplat.hpp create mode 100644 vendor/gsplat-cpu/bindings.h create mode 100644 vendor/gsplat-cpu/gsplat_cpu.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e298ada..5e4d3dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,14 +9,14 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() -enable_language(${GPU_RUNTIME}) -set(CMAKE_${GPU_RUNTIME}_STANDARD 17) -set(${GPU_RUNTIME}_STANDARD 17) - if(GPU_RUNTIME STREQUAL "CUDA") set(CMAKE_CUDA_ARCHITECTURES 70 75) - find_package(CUDAToolkit REQUIRED) -else() + find_package(CUDAToolkit) + if (CUDAToolkit-NOTFOUND) + message(WARNING "CUDA toolkit not found, building with CPU support only") + set(GPU_RUNTIME "CPU") + endif() +elseif(GPU_RUNTIME STREQUAL "HIP") set(USE_HIP ON CACHE BOOL "Use HIP for GPU acceleration") if(NOT DEFINED HIP_PATH) @@ -40,6 +40,15 @@ else() list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") endif() +if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP")) + enable_language(${GPU_RUNTIME}) + set(CMAKE_${GPU_RUNTIME}_STANDARD 17) + set(${GPU_RUNTIME}_STANDARD 17) +else() + # CPU + set(CMAKE_CXX_STANDARD 17) +endif() + if (NOT WIN32 AND NOT APPLE) set(STDPPFS_LIBRARY stdc++fs) endif() @@ -52,13 +61,18 @@ if (NOT WIN32 AND NOT APPLE) endif() set(OpenCV_LIBS opencv_core opencv_imgproc opencv_highgui opencv_calib3d) -add_library(gsplat vendor/gsplat/forward.cu vendor/gsplat/backward.cu vendor/gsplat/bindings.cu vendor/gsplat/helpers.cuh) -if(GPU_RUNTIME STREQUAL "CUDA") - set(GPU_LIBRARIES "cuda") - target_link_libraries(gsplat PUBLIC cuda) -else(GPU_RUNTIME STREQUAL "HIP") - set(GPU_INCLUDE_DIRS "${ROCM_ROOT}/include") - target_compile_definitions(gsplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__) +if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP")) + add_library(gsplat vendor/gsplat/forward.cu vendor/gsplat/backward.cu vendor/gsplat/bindings.cu vendor/gsplat/helpers.cuh) + if(GPU_RUNTIME STREQUAL "CUDA") + set(GPU_LIBRARIES "cuda") + target_link_libraries(gsplat PUBLIC cuda) + set_target_properties(gsplat PROPERTIES CUDA_ARCHITECTURES "70;75") + else(GPU_RUNTIME STREQUAL "HIP") + set(GPU_INCLUDE_DIRS "${ROCM_ROOT}/include") + target_compile_definitions(gsplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__) + endif() +else() + add_library(gsplat vendor/gsplat-cpu/gsplat_cpu.cpp) endif() target_include_directories(gsplat PRIVATE @@ -67,7 +81,6 @@ target_include_directories(gsplat PRIVATE ${TORCH_INCLUDE_DIRS} ) set_target_properties(gsplat PROPERTIES LINKER_LANGUAGE CXX) -set_target_properties(gsplat PROPERTIES CUDA_ARCHITECTURES "70;75") add_executable(opensplat opensplat.cpp point_io.cpp nerfstudio.cpp model.cpp kdtree_tensor.cpp spherical_harmonics.cpp cv_utils.cpp utils.cpp project_gaussians.cpp rasterize_gaussians.cpp ssim.cpp optim_scheduler.cpp colmap.cpp input_data.cpp tensor_math.cpp) set_property(TARGET opensplat PROPERTY CXX_STANDARD 17) @@ -75,6 +88,8 @@ target_include_directories(opensplat PRIVATE ${PROJECT_SOURCE_DIR}/vendor/glm ${ target_link_libraries(opensplat PUBLIC ${STDPPFS_LIBRARY} ${GPU_LIBRARIES} gsplat ${TORCH_LIBRARIES} ${OpenCV_LIBS}) if(GPU_RUNTIME STREQUAL "HIP") target_compile_definitions(opensplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__) +elseif(GPU_RUNTIME STREQUAL "CUDA") + target_compile_definitions(opensplat PRIVATE USE_CUDA) endif() if(OPENSPLAT_BUILD_SIMPLE_TRAINER) @@ -84,6 +99,8 @@ if(OPENSPLAT_BUILD_SIMPLE_TRAINER) set_property(TARGET simple_trainer PROPERTY CXX_STANDARD 17) if(GPU_RUNTIME STREQUAL "HIP") target_compile_definitions(simple_trainer PRIVATE USE_HIP __HIP_PLATFORM_AMD__) + elseif(GPU_RUNTIME STREQUAL "CUDA") + target_compile_definitions(simple_trainer PRIVATE USE_CUDA) endif() endif() diff --git a/gsplat.hpp b/gsplat.hpp new file mode 100644 index 0000000..c0b2408 --- /dev/null +++ b/gsplat.hpp @@ -0,0 +1,5 @@ +#if defined(USE_HIP) || defined(USE_CUDA) +#include "vendor/gsplat/bindings.h" +#else +#include "vendor/gsplat-cpu/bindings.h" +#endif \ No newline at end of file diff --git a/model.cpp b/model.cpp index 915c334..401be84 100644 --- a/model.cpp +++ b/model.cpp @@ -4,7 +4,7 @@ #include "project_gaussians.hpp" #include "rasterize_gaussians.hpp" #include "tensor_math.hpp" -#include "vendor/gsplat/config.h" +#include "gsplat.hpp" torch::Tensor randomQuatTensor(long long n){ torch::Tensor u = torch::rand(n); diff --git a/project_gaussians.cpp b/project_gaussians.cpp index f479919..d899362 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -1,5 +1,5 @@ #include "project_gaussians.hpp" -#include "vendor/gsplat/bindings.h" +#include "gsplat.hpp" variable_list ProjectGaussians::forward(AutogradContext *ctx, torch::Tensor means, diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index f7cb655..42d3bdc 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -1,5 +1,5 @@ #include "rasterize_gaussians.hpp" -#include "vendor/gsplat/bindings.h" +#include "gsplat.hpp" #include "vendor/gsplat/config.h" std::tuple() << std::endl; diff --git a/spherical_harmonics.cpp b/spherical_harmonics.cpp index c88b462..c6d4036 100644 --- a/spherical_harmonics.cpp +++ b/spherical_harmonics.cpp @@ -1,5 +1,5 @@ #include "spherical_harmonics.hpp" -#include "vendor/gsplat/bindings.h" +#include "gsplat.hpp" int numShBases(int degree){ switch(degree){ diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h new file mode 100644 index 0000000..12daf87 --- /dev/null +++ b/vendor/gsplat-cpu/bindings.h @@ -0,0 +1,130 @@ +#include +#include +#include +#include +#include + +torch::Tensor compute_sh_forward_tensor( + unsigned num_points, + unsigned degree, + unsigned degrees_to_use, + torch::Tensor &viewdirs, + torch::Tensor &coeffs +); + +torch::Tensor compute_sh_backward_tensor( + unsigned num_points, + unsigned degree, + unsigned degrees_to_use, + torch::Tensor &viewdirs, + torch::Tensor &v_colors +); + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor> +project_gaussians_forward_tensor( + const int num_points, + torch::Tensor &means3d, + torch::Tensor &scales, + const float glob_scale, + torch::Tensor &quats, + torch::Tensor &viewmat, + torch::Tensor &projmat, + const float fx, + const float fy, + const float cx, + const float cy, + const unsigned img_height, + const unsigned img_width, + const std::tuple tile_bounds, + const float clip_thresh +); + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor> +project_gaussians_backward_tensor( + const int num_points, + torch::Tensor &means3d, + torch::Tensor &scales, + const float glob_scale, + torch::Tensor &quats, + torch::Tensor &viewmat, + torch::Tensor &projmat, + const float fx, + const float fy, + const float cx, + const float cy, + const unsigned img_height, + const unsigned img_width, + torch::Tensor &cov3d, + torch::Tensor &radii, + torch::Tensor &conics, + torch::Tensor &v_xy, + torch::Tensor &v_depth, + torch::Tensor &v_conic +); + + +std::tuple map_gaussian_to_intersects_tensor( + const int num_points, + const int num_intersects, + const torch::Tensor &xys, + const torch::Tensor &depths, + const torch::Tensor &radii, + const torch::Tensor &cum_tiles_hit, + const std::tuple tile_bounds +); + +torch::Tensor get_tile_bin_edges_tensor( + int num_intersects, + const torch::Tensor &isect_ids_sorted +); + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor +> rasterize_forward_tensor( + const std::tuple tile_bounds, + const std::tuple block, + const std::tuple img_size, + const torch::Tensor &gaussian_ids_sorted, + const torch::Tensor &tile_bins, + const torch::Tensor &xys, + const torch::Tensor &conics, + const torch::Tensor &colors, + const torch::Tensor &opacities, + const torch::Tensor &background +); + +std:: + tuple< + torch::Tensor, // dL_dxy + torch::Tensor, // dL_dconic + torch::Tensor, // dL_dcolors + torch::Tensor // dL_dopacity + > + rasterize_backward_tensor( + const unsigned img_height, + const unsigned img_width, + const torch::Tensor &gaussians_ids_sorted, + const torch::Tensor &tile_bins, + const torch::Tensor &xys, + const torch::Tensor &conics, + const torch::Tensor &colors, + const torch::Tensor &opacities, + const torch::Tensor &background, + const torch::Tensor &final_Ts, + const torch::Tensor &final_idx, + const torch::Tensor &v_output, // dL_dout_color + const torch::Tensor &v_output_alpha + ); diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp new file mode 100644 index 0000000..9d557d1 --- /dev/null +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -0,0 +1,163 @@ +#include "bindings.h" + +#include +#include +#include +#include +#include + +using namespace torch::indexing; + +torch::Tensor compute_sh_forward_tensor( + unsigned num_points, + unsigned degree, + unsigned degrees_to_use, + torch::Tensor &viewdirs, + torch::Tensor &coeffs +){ + return torch::Tensor(); +} + +torch::Tensor compute_sh_backward_tensor( + unsigned num_points, + unsigned degree, + unsigned degrees_to_use, + torch::Tensor &viewdirs, + torch::Tensor &v_colors +){ + return torch::Tensor(); +} + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor> +project_gaussians_forward_tensor( + const int num_points, + torch::Tensor &means3d, + torch::Tensor &scales, + const float glob_scale, + torch::Tensor &quats, + torch::Tensor &viewmat, + torch::Tensor &projmat, + const float fx, + const float fy, + const float cx, + const float cy, + const unsigned img_height, + const unsigned img_width, + const std::tuple tile_bounds, + const float clip_thresh +){ + float fovx = 0.5f * static_cast(img_width) / fx; + float fovy = 0.5f * static_cast(img_height) / fy; + + // clip_near_plane + torch::Tensor R = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); + torch::Tensor T = viewmat.index({"...", Slice(None, 3), 3}); + torch::Tensor pView = torch::matmul(R, means3d.index({"...", None})).index({"...", 0}) + T; + torch::Tensor isClose = pView.index({"...", 2}) < clip_thresh; + + + + + return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); +} + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor, + torch::Tensor> +project_gaussians_backward_tensor( + const int num_points, + torch::Tensor &means3d, + torch::Tensor &scales, + const float glob_scale, + torch::Tensor &quats, + torch::Tensor &viewmat, + torch::Tensor &projmat, + const float fx, + const float fy, + const float cx, + const float cy, + const unsigned img_height, + const unsigned img_width, + torch::Tensor &cov3d, + torch::Tensor &radii, + torch::Tensor &conics, + torch::Tensor &v_xy, + torch::Tensor &v_depth, + torch::Tensor &v_conic +){ + return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); +} + + +std::tuple map_gaussian_to_intersects_tensor( + const int num_points, + const int num_intersects, + const torch::Tensor &xys, + const torch::Tensor &depths, + const torch::Tensor &radii, + const torch::Tensor &cum_tiles_hit, + const std::tuple tile_bounds +){ + return std::make_tuple(torch::Tensor(), torch::Tensor()); +} + +torch::Tensor get_tile_bin_edges_tensor( + int num_intersects, + const torch::Tensor &isect_ids_sorted +){ + return torch::Tensor(); +} + +std::tuple< + torch::Tensor, + torch::Tensor, + torch::Tensor +> rasterize_forward_tensor( + const std::tuple tile_bounds, + const std::tuple block, + const std::tuple img_size, + const torch::Tensor &gaussian_ids_sorted, + const torch::Tensor &tile_bins, + const torch::Tensor &xys, + const torch::Tensor &conics, + const torch::Tensor &colors, + const torch::Tensor &opacities, + const torch::Tensor &background +){ + return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor()); +} + + +std:: + tuple< + torch::Tensor, // dL_dxy + torch::Tensor, // dL_dconic + torch::Tensor, // dL_dcolors + torch::Tensor // dL_dopacity + > + rasterize_backward_tensor( + const unsigned img_height, + const unsigned img_width, + const torch::Tensor &gaussians_ids_sorted, + const torch::Tensor &tile_bins, + const torch::Tensor &xys, + const torch::Tensor &conics, + const torch::Tensor &colors, + const torch::Tensor &opacities, + const torch::Tensor &background, + const torch::Tensor &final_Ts, + const torch::Tensor &final_idx, + const torch::Tensor &v_output, // dL_dout_color + const torch::Tensor &v_output_alpha + ){ + return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); +} \ No newline at end of file From 0fc488a95eb9376464a4f107ccc6a22f66205554 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 11 Mar 2024 16:28:06 -0400 Subject: [PATCH 02/19] Add compute_cov2d_bounds --- vendor/gsplat-cpu/bindings.h | 2 + vendor/gsplat-cpu/gsplat_cpu.cpp | 82 ++++++++++++++++++++++++++++++-- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index 12daf87..d831b4b 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -1,3 +1,5 @@ +// Originally based on https://github.dev/nerfstudio-project/gsplat + #include #include #include diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 9d557d1..7b2f31b 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -1,3 +1,6 @@ +// Originally based on https://github.dev/nerfstudio-project/gsplat +// This implementation is licensed under the AGPLv3 + #include "bindings.h" #include @@ -8,6 +11,32 @@ using namespace torch::indexing; +torch::Tensor quatToRotMat(const torch::Tensor &quat){ + auto u = torch::unbind(torch::nn::functional::normalize(quat, torch::nn::functional::NormalizeFuncOptions().dim(-1)), -1); + torch::Tensor w = u[0]; + torch::Tensor x = u[1]; + torch::Tensor y = u[2]; + torch::Tensor z = u[3]; + return torch::stack({ + torch::stack({ + 1.0 - 2.0 * (y.pow(2) + z.pow(2)), + 2.0 * (x * y - w * z), + 2.0 * (x * z + w * y) + }, -1), + torch::stack({ + 2.0 * (x * y + w * z), + 1.0 - 2.0 * (x.pow(2) + z.pow(2)), + 2.0 * (y * z - w * x) + }, -1), + torch::stack({ + 2.0 * (x * z - w * y), + 2.0 * (y * z + w * x), + 1.0 - 2.0 * (x.pow(2) + y.pow(2)) + }, -1) + }, -2); + +} + torch::Tensor compute_sh_forward_tensor( unsigned num_points, unsigned degree, @@ -56,12 +85,59 @@ project_gaussians_forward_tensor( float fovy = 0.5f * static_cast(img_height) / fy; // clip_near_plane - torch::Tensor R = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); - torch::Tensor T = viewmat.index({"...", Slice(None, 3), 3}); - torch::Tensor pView = torch::matmul(R, means3d.index({"...", None})).index({"...", 0}) + T; + torch::Tensor Rclip = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); + torch::Tensor Tclip = viewmat.index({"...", Slice(None, 3), 3}); + torch::Tensor pView = torch::matmul(Rclip, means3d.index({"...", None})).index({"...", 0}) + Tclip; torch::Tensor isClose = pView.index({"...", 2}) < clip_thresh; + // scale_rot_to_cov3d + torch::Tensor R = quatToRotMat(quats); + torch::Tensor M = R * glob_scale * scales.index({"...", None, Slice()}); + torch::Tensor cov3d = torch::matmul(M, M.transpose(-1, -2)); + + // project_cov3d_ewa + torch::Tensor W = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); + torch::Tensor p = viewmat.index({"...", Slice(None, 3), 3}); + torch::Tensor t = torch::matmul(W, means3d.index({"...", None})).index({"...", 0}) + p; + + torch::Tensor limX = 1.3f * torch::tensor({fovx}, means3d.device()); + torch::Tensor limY = 1.3f * torch::tensor({fovy}, means3d.device()); + + torch::Tensor minLimX = t.index({"...", 2}) * torch::min(limX, torch::max(-limX, t.index({"...", 0}) / t.index({"...", 2}))); + torch::Tensor minLimY = t.index({"...", 2}) * torch::min(limY, torch::max(-limY, t.index({"...", 1}) / t.index({"...", 2}))); + t = torch::cat({minLimX.index({"...", None}), minLimY.index({"...", None}), t.index({"...", 2, None})}, -1); + torch::Tensor rz = 1.0f / t.index({"...", 2}); + torch::Tensor rz2 = rz.pow(2); + + torch::Tensor J = torch::stack({ + torch::stack({fx * rz, torch::zeros_like(rz), -fx * t.index({"...", 0}) * rz2}, -1), + torch::stack({torch::zeros_like(rz), fy * rz, -fy * t.index({"...", 1}) * rz2}, -1) + }, -2); + + torch::Tensor T = torch::matmul(J, W); + torch::Tensor cov2d = torch::matmul(T, torch::matmul(cov3d, T.transpose(-1, -2))); + + // Add blur along axes + cov2d.index_put_({"...", 0, 0}, cov2d.index({"...", 0, 0}) + 0.3f); + cov2d.index_put_({"...", 1, 1}, cov2d.index({"...", 1, 1}) + 0.3f); + + // compute_cov2d_bounds + float eps = 1e-6f; + torch::Tensor det = cov2d.index({"...", 0, 0}) * cov2d.index({"...", 1, 1}) - cov2d.index({"...", 0, 1}).pow(2); + det = torch::clamp_min(det, eps); + torch::Tensor conic = torch::stack({ + cov2d.index({"...", 1, 1}) / det, + -cov2d.index({"...", 0, 1}) / det, + cov2d.index({"...", 0, 0}) / det + }, -1); + + torch::Tensor b = (cov2d.index({"...", 0, 0}) + cov2d.index({"...", 1, 1})) / 2.0f; + torch::Tensor sq = torch::sqrt(torch::clamp_min(b.pow(2) - det, 0.1f)); + torch::Tensor v1 = b + sq; + torch::Tensor v2 = b - sq; + torch::Tensor radius = torch::ceil(3.0f * torch::sqrt(torch::max(v1, v2))); + torch::Tensor detValid = det > eps; return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); From 9bc7d587888cf458011ae047e1f6bc26bc9147fd Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 12 Mar 2024 11:07:48 -0400 Subject: [PATCH 03/19] project_gaussians_forward_tensor implemented --- CMakeLists.txt | 4 +-- simple_trainer.cpp | 4 +-- vendor/gsplat-cpu/gsplat_cpu.cpp | 43 +++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e4d3dd..6c584f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,10 @@ project(opensplat) set(OPENSPLAT_BUILD_SIMPLE_TRAINER OFF CACHE BOOL "Build simple trainer applications") set(GPU_RUNTIME "CUDA" CACHE STRING "HIP or CUDA") -set(OPENCV_DIR "OPENCV_DIR-NOTFOUND" CACHE "OPENCV_DIR" "Path to the OPENCV installation directory") +set(OPENCV_DIR "OPENCV_DIR-NOTFOUND" CACHE PATH "Path to the OPENCV installation directory") if(NOT CMAKE_BUILD_TYPE) -set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() if(GPU_RUNTIME STREQUAL "CUDA") diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 7e54dea..d09f4fc 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -64,10 +64,10 @@ int main(int argc, char **argv){ // torch::Tensor block = torch::tensor({BLOCK_X, BLOCK_Y, 1}, device); // Init gaussians -#ifdef USE_HIP -#else +#ifdef USE_CUDA torch::cuda::manual_seed_all(0); #endif + torch::manual_seed(0); // Random points, scales and colors torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, device) - 0.5); // Positions [-1, 1] diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 7b2f31b..7432610 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -2,12 +2,12 @@ // This implementation is licensed under the AGPLv3 #include "bindings.h" +#include "../gsplat/config.h" #include #include #include #include -#include using namespace torch::indexing; @@ -37,6 +37,24 @@ torch::Tensor quatToRotMat(const torch::Tensor &quat){ } +std::tuple getTileBbox(torch::Tensor &pixCenter, const torch::Tensor &pixRadius, const std::tuple &tileBounds){ + torch::Tensor tileSize = torch::tensor({BLOCK_X, BLOCK_Y}, torch::TensorOptions().dtype(torch::kFloat32).device(pixCenter.device())); + torch::Tensor tileCenter = pixCenter / tileSize; + torch::Tensor tileRadius = pixRadius.index({"...", None}) / tileSize; + torch::Tensor topLeft = (tileCenter - tileRadius).to(torch::kInt32); + torch::Tensor bottomRight = (tileCenter + tileRadius).to(torch::kInt32) + 1; + torch::Tensor tileMin = torch::stack({ + torch::clamp(topLeft.index({"...", 0}), 0, std::get<0>(tileBounds)), + torch::clamp(topLeft.index({"...", 1}), 0, std::get<1>(tileBounds)) + }, -1); + torch::Tensor tileMax = torch::stack({ + torch::clamp(bottomRight.index({"...", 0}), 0, std::get<0>(tileBounds)), + torch::clamp(bottomRight.index({"...", 1}), 0, std::get<1>(tileBounds)) + }, -1); + + return std::make_tuple(tileMin, tileMax); +} + torch::Tensor compute_sh_forward_tensor( unsigned num_points, unsigned degree, @@ -88,7 +106,7 @@ project_gaussians_forward_tensor( torch::Tensor Rclip = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); torch::Tensor Tclip = viewmat.index({"...", Slice(None, 3), 3}); torch::Tensor pView = torch::matmul(Rclip, means3d.index({"...", None})).index({"...", 0}) + Tclip; - torch::Tensor isClose = pView.index({"...", 2}) < clip_thresh; + // torch::Tensor isClose = pView.index({"...", 2}) < clip_thresh; // scale_rot_to_cov3d torch::Tensor R = quatToRotMat(quats); @@ -137,10 +155,27 @@ project_gaussians_forward_tensor( torch::Tensor v1 = b + sq; torch::Tensor v2 = b - sq; torch::Tensor radius = torch::ceil(3.0f * torch::sqrt(torch::max(v1, v2))); - torch::Tensor detValid = det > eps; + // torch::Tensor detValid = det > eps; + + // project_pix + torch::Tensor pHom = torch::nn::functional::pad(means3d, torch::nn::functional::PadFuncOptions({0, 1}).mode(torch::kConstant).value(1.0f)); + pHom = torch::einsum("...ij,...j->...i", {projmat, pHom}); + torch::Tensor rw = 1.0f / torch::clamp_min(pHom.index({"...", 3}), eps); + torch::Tensor pProj = pHom.index({"...", Slice(None, 3)}) * rw.index({"...", None}); + torch::Tensor u = 0.5f * ((pProj.index({"...", 0}) + 1.0f) * static_cast(img_height) - 1.0f); + torch::Tensor v = 0.5f * ((pProj.index({"...", 1}) + 1.0f) * static_cast(img_width) - 1.0f); + torch::Tensor xys = torch::stack({u, v}, -1); // center + + auto bbox = getTileBbox(xys, radius, tile_bounds); + torch::Tensor tileMin = std::get<0>(bbox); + torch::Tensor tileMax = std::get<1>(bbox); + torch::Tensor numTilesHit = (tileMax.index({"...", 0}) - tileMin.index({"...", 0})) * + (tileMax.index({"...", 1}) - tileMin.index({"...", 1})); + torch::Tensor depths = pView.index({"...", 2}); + torch::Tensor radii = radius.to(torch::kInt32); - return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); + return std::make_tuple(cov3d, xys, depths, radii, conic, numTilesHit ); } std::tuple< From 5781d52e444c89a09d79ec003706a98bc1861db6 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 12 Mar 2024 17:22:17 +0000 Subject: [PATCH 04/19] Implemented rasterize_forward --- vendor/gsplat-cpu/gsplat_cpu.cpp | 83 +++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 7432610..ef0feb9 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -218,6 +218,33 @@ std::tuple map_gaussian_to_intersects_tensor( const torch::Tensor &cum_tiles_hit, const std::tuple tile_bounds ){ + num_intersects = cum_tiles_hit[-1] + isect_ids = torch.zeros(num_intersects, dtype=torch.int64, device=xys.device) + gaussian_ids = torch.zeros(num_intersects, dtype=torch.int32, device=xys.device) + + for idx in range(num_points): + if radii[idx] <= 0: + break + + tile_min, tile_max = get_tile_bbox(xys[idx], radii[idx], tile_bounds) + + cur_idx = 0 if idx == 0 else cum_tiles_hit[idx - 1].item() + + # Get raw byte representation of the float value at the given index + raw_bytes = struct.pack("f", depths[idx]) + + # Interpret those bytes as an int32_t + depth_id_n = struct.unpack("i", raw_bytes)[0] + + for i in range(tile_min[1], tile_max[1]): + for j in range(tile_min[0], tile_max[0]): + tile_id = i * tile_bounds[0] + j + isect_ids[cur_idx] = (tile_id << 32) | depth_id_n + gaussian_ids[cur_idx] = idx + cur_idx += 1 + + return isect_ids, gaussian_ids + return std::make_tuple(torch::Tensor(), torch::Tensor()); } @@ -244,7 +271,61 @@ std::tuple< const torch::Tensor &opacities, const torch::Tensor &background ){ - return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor()); + int channels = colors.size(1); + int width = std::get<1>(img_size); + int height = std::get<0>(img_size); + torch::Device device = xys.device(); + + torch::Tensor outImg = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor finalTs = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor finalIdx = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kInt32).device(device)); + + + for (int i = 0; i < width; i++){ + for (int j = 0; j < height; j++){ + int tileId = (i / std::get<0>(block)) * std::get<0>(tile_bounds) + (j / std::get<1>(block)); + int tileBinStart = tile_bins[tileId][0].item(); + int tileBinEnd = tile_bins[tileId][1].item(); + float T = 1.0f; + + int idx = tileBinStart; + for (; idx < tileBinEnd; idx++){ + torch::Tensor gaussianId = gaussian_ids_sorted[idx]; + torch::Tensor conic = conics[gaussianId]; + torch::Tensor center = xys[gaussianId]; + torch::Tensor delta = center - torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + torch::Tensor sigma = ( + 0.5f + * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) + + conic[1] * delta[0] * delta[1] + ); + + if (sigma.item() < 0.0f) continue; + + float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); + + if (alpha < 1.0f / 255.0f) continue; + + float nextT = T * (1.0f - alpha); + + if (nextT <= 1e-4f){ + idx -= 1; + break; + } + + float vis = alpha * T; + outImg[i][j] += vis * colors[gaussianId]; + T = nextT; + } + + finalTs[i][j] = T; + finalIdx[i][j] = idx; + outImg[i][j] += T * background; + } + } + + return std::make_tuple(outImg, finalTs, finalIdx); } From 6499b5d1f9dfa58f6a8916b0a85d537ae7e71c43 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 12 Mar 2024 19:15:32 +0000 Subject: [PATCH 05/19] Work on map_gaussian_to_intersects_tensor --- vendor/gsplat-cpu/gsplat_cpu.cpp | 64 +++++++++++++++++++------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index ef0feb9..2509d33 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -218,32 +218,46 @@ std::tuple map_gaussian_to_intersects_tensor( const torch::Tensor &cum_tiles_hit, const std::tuple tile_bounds ){ - num_intersects = cum_tiles_hit[-1] - isect_ids = torch.zeros(num_intersects, dtype=torch.int64, device=xys.device) - gaussian_ids = torch.zeros(num_intersects, dtype=torch.int32, device=xys.device) - - for idx in range(num_points): - if radii[idx] <= 0: - break - - tile_min, tile_max = get_tile_bbox(xys[idx], radii[idx], tile_bounds) - - cur_idx = 0 if idx == 0 else cum_tiles_hit[idx - 1].item() - - # Get raw byte representation of the float value at the given index - raw_bytes = struct.pack("f", depths[idx]) - - # Interpret those bytes as an int32_t - depth_id_n = struct.unpack("i", raw_bytes)[0] - - for i in range(tile_min[1], tile_max[1]): - for j in range(tile_min[0], tile_max[0]): - tile_id = i * tile_bounds[0] + j - isect_ids[cur_idx] = (tile_id << 32) | depth_id_n - gaussian_ids[cur_idx] = idx - cur_idx += 1 + torch::Device device = xys.device(); + int numIntersects = cum_tiles_hit[-1].item(); + torch::Tensor isectIds = torch::zeros(numIntersects, torch::TensorOptions().dtype(torch::kInt64).device(device)); + torch::Tensor gaussianIds = torch::zeros(numIntersects, torch::TensorOptions().dtype(torch::kInt32).device(device)); + for (int idx = 0; idx < num_points; idx++){ + if (radii[idx].item() <= 0.0f) break; + + auto bbox = getTileBbox(xys[idx], radii[idx], tile_bounds); + torch::Tensor tileMin = std::get<0>(bbox); + torch::Tensor tileMax = std::get<1>(bbox); + int curIdx; + + if (idx == 0){ + curIdx = 0; + }else{ + curIdx = cum_tiles_hit[idx - 1].item(); + } - return isect_ids, gaussian_ids + int32_t depthIdN = static_cast(depths[idx].item()); + int iStart = tileMin[1].item(); + int iEnd = tileMax[1].item(); + int jStart = tileMin[0].item(); + int jEnd = tileMax[0].item(); + int b = std::get<0>(tile_bounds); + + for (int i = iStart; i < iEnd; i++){ + for (int j = jStart; j < jEnd; j++){ + int tileId = i * b + j; + isectIds[curIdx] + } + } + } + // for i in range(tile_min[1], tile_max[1]): + // for j in range(tile_min[0], tile_max[0]): + // tile_id = i * tile_bounds[0] + j + // isect_ids[cur_idx] = (tile_id << 32) | depth_id_n + // gaussian_ids[cur_idx] = idx + // cur_idx += 1 + + // return isect_ids, gaussian_ids return std::make_tuple(torch::Tensor(), torch::Tensor()); } From 9f8c4d3643270e367a76ef30f3815a42e8cba323 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 12 Mar 2024 19:56:22 +0000 Subject: [PATCH 06/19] Forward pass working (very slow) --- vendor/gsplat-cpu/gsplat_cpu.cpp | 51 ++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 2509d33..a53ce12 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -37,7 +37,7 @@ torch::Tensor quatToRotMat(const torch::Tensor &quat){ } -std::tuple getTileBbox(torch::Tensor &pixCenter, const torch::Tensor &pixRadius, const std::tuple &tileBounds){ +std::tuple getTileBbox(const torch::Tensor &pixCenter, const torch::Tensor &pixRadius, const std::tuple &tileBounds){ torch::Tensor tileSize = torch::tensor({BLOCK_X, BLOCK_Y}, torch::TensorOptions().dtype(torch::kFloat32).device(pixCenter.device())); torch::Tensor tileCenter = pixCenter / tileSize; torch::Tensor tileRadius = pixRadius.index({"...", None}) / tileSize; @@ -245,28 +245,45 @@ std::tuple map_gaussian_to_intersects_tensor( for (int i = iStart; i < iEnd; i++){ for (int j = jStart; j < jEnd; j++){ - int tileId = i * b + j; - isectIds[curIdx] + int64_t tileId = i * b + j; + isectIds[curIdx] = (tileId << 32) | depthIdN; + gaussianIds[curIdx] = idx; + curIdx += 1; } } } - // for i in range(tile_min[1], tile_max[1]): - // for j in range(tile_min[0], tile_max[0]): - // tile_id = i * tile_bounds[0] + j - // isect_ids[cur_idx] = (tile_id << 32) | depth_id_n - // gaussian_ids[cur_idx] = idx - // cur_idx += 1 - // return isect_ids, gaussian_ids - - return std::make_tuple(torch::Tensor(), torch::Tensor()); + return std::make_tuple(isectIds, gaussianIds); } torch::Tensor get_tile_bin_edges_tensor( int num_intersects, const torch::Tensor &isect_ids_sorted ){ - return torch::Tensor(); + torch::Tensor tileBins = torch::zeros({num_intersects, 2}, torch::TensorOptions().dtype(torch::kInt32).device(isect_ids_sorted.device())); + + for (int idx = 0; idx < num_intersects; idx++){ + int64_t curTileIdx = isect_ids_sorted[idx].item() >> 32; + + if (idx == 0){ + tileBins[curTileIdx][0] = 0; + continue; + } + + if (idx == num_intersects - 1){ + tileBins[curTileIdx][1] = num_intersects; + break; + } + + int64_t prevTileIdx = isect_ids_sorted[idx - 1].item() >> 32; + + if (curTileIdx != prevTileIdx){ + tileBins[prevTileIdx][1] = idx; + tileBins[curTileIdx][0] = idx; + } + } + + return tileBins; } std::tuple< @@ -294,10 +311,14 @@ std::tuple< torch::Tensor finalTs = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalIdx = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kInt32).device(device)); - + int blockX = std::get<0>(block); + int blockY = std::get<1>(block); + int tileBoundsX = std::get<0>(tile_bounds); + for (int i = 0; i < width; i++){ + std::cout << i << std::endl; for (int j = 0; j < height; j++){ - int tileId = (i / std::get<0>(block)) * std::get<0>(tile_bounds) + (j / std::get<1>(block)); + int tileId = (i / blockX) * tileBoundsX + (j / blockY); int tileBinStart = tile_bins[tileId][0].item(); int tileBinEnd = tile_bins[tileId][1].item(); float T = 1.0f; From 863e1170990c54901763ea2a1bb9ad3449d61780 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 13 Mar 2024 17:31:19 +0000 Subject: [PATCH 07/19] PoC CPU simple_trainer working (slow) --- CMakeLists.txt | 2 +- project_gaussians.cpp | 20 +++++++++++--------- project_gaussians.hpp | 2 ++ rasterize_gaussians.cpp | 9 +++++---- simple_trainer.cpp | 14 +++++++------- vendor/gsplat-cpu/gsplat_cpu.cpp | 15 +++++++++------ 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c584f9..9d1eff4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ elseif(GPU_RUNTIME STREQUAL "CUDA") endif() if(OPENSPLAT_BUILD_SIMPLE_TRAINER) - add_executable(simple_trainer simple_trainer.cpp project_gaussians.cpp rasterize_gaussians.cpp) + add_executable(simple_trainer simple_trainer.cpp project_gaussians.cpp rasterize_gaussians.cpp cv_utils.cpp) target_include_directories(simple_trainer PRIVATE ${PROJECT_SOURCE_DIR}/vendor/glm ${GPU_INCLUDE_DIRS}) target_link_libraries(simple_trainer PUBLIC ${GPU_LIBRARIES} gsplat ${TORCH_LIBRARIES} ${OpenCV_LIBS}) set_property(TARGET simple_trainer PROPERTY CXX_STANDARD 17) diff --git a/project_gaussians.cpp b/project_gaussians.cpp index d899362..029d589 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -30,16 +30,18 @@ variable_list ProjectGaussians::forward(AutogradContext *ctx, torch::Tensor conics = std::get<4>(t); torch::Tensor numTilesHit = std::get<5>(t); - ctx->saved_data["imgHeight"] = imgHeight; - ctx->saved_data["imgWidth"] = imgWidth; - ctx->saved_data["numPoints"] = numPoints; - ctx->saved_data["globScale"] = globScale; - ctx->saved_data["fx"] = fx; - ctx->saved_data["fy"] = fy; - ctx->saved_data["cx"] = cx; - ctx->saved_data["cy"] = cy; + if (ctx != nullptr){ + ctx->saved_data["imgHeight"] = imgHeight; + ctx->saved_data["imgWidth"] = imgWidth; + ctx->saved_data["numPoints"] = numPoints; + ctx->saved_data["globScale"] = globScale; + ctx->saved_data["fx"] = fx; + ctx->saved_data["fy"] = fy; + ctx->saved_data["cx"] = cx; + ctx->saved_data["cy"] = cy; + ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); + } - ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); return { xys, depths, radii, conics, numTilesHit, cov3d }; } diff --git a/project_gaussians.hpp b/project_gaussians.hpp index ece9399..d84f53f 100644 --- a/project_gaussians.hpp +++ b/project_gaussians.hpp @@ -27,4 +27,6 @@ class ProjectGaussians : public Function{ }; + + #endif \ No newline at end of file diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index 42d3bdc..bf664c4 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -82,10 +82,11 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, // Map of tile bin IDs torch::Tensor finalIdx = std::get<2>(t); - ctx->saved_data["imgWidth"] = imgWidth; - ctx->saved_data["imgHeight"] = imgHeight; - - ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); + if (ctx != nullptr){ + ctx->saved_data["imgWidth"] = imgWidth; + ctx->saved_data["imgHeight"] = imgHeight; + ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); + } return outImg; } diff --git a/simple_trainer.cpp b/simple_trainer.cpp index d09f4fc..1dbb849 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -27,8 +27,8 @@ using namespace torch::indexing; int main(int argc, char **argv){ - int width = 256, - height = 256; + int width = 16, + height = 16; int numPoints = 100000; int iterations = 1000; float learningRate = 0.01; @@ -108,7 +108,7 @@ int main(int argc, char **argv){ torch::nn::MSELoss mseLoss; for (size_t i = 0; i < iterations; i++){ - auto p = ProjectGaussians::apply(means, scales, 1, + auto p = ProjectGaussians::forward(nullptr, means, scales, 1, quats, viewMat, viewMat, focal, focal, width / 2, @@ -117,7 +117,7 @@ int main(int argc, char **argv){ width, tileBounds); - torch::Tensor outImg = RasterizeGaussians::apply( + torch::Tensor outImg = RasterizeGaussians::forward(nullptr, p[0], // xys p[1], // depths p[2], // radii, @@ -137,8 +137,8 @@ int main(int argc, char **argv){ std::cout << "Iteration " << std::to_string(i + 1) << "/" << std::to_string(iterations) << " Loss: " << loss.item() << std::endl; - // cv::Mat image = tensorToImage(outImg.detach().cpu()); - // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - // cv::imwrite("render/" + std::to_string(i + 1) + ".png", image); + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite("render/" + std::to_string(i + 1) + ".png", image); } } \ No newline at end of file diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index a53ce12..52aecff 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -236,7 +236,9 @@ std::tuple map_gaussian_to_intersects_tensor( curIdx = cum_tiles_hit[idx - 1].item(); } - int32_t depthIdN = static_cast(depths[idx].item()); + float depth = depths[idx].item(); + int32_t depthIdN = *(reinterpret_cast(&depth)); + int iStart = tileMin[1].item(); int iEnd = tileMax[1].item(); int jStart = tileMin[0].item(); @@ -246,7 +248,7 @@ std::tuple map_gaussian_to_intersects_tensor( for (int i = iStart; i < iEnd; i++){ for (int j = jStart; j < jEnd; j++){ int64_t tileId = i * b + j; - isectIds[curIdx] = (tileId << 32) | depthIdN; + isectIds[curIdx] = static_cast(tileId << 32) | depthIdN; gaussianIds[curIdx] = idx; curIdx += 1; } @@ -263,7 +265,7 @@ torch::Tensor get_tile_bin_edges_tensor( torch::Tensor tileBins = torch::zeros({num_intersects, 2}, torch::TensorOptions().dtype(torch::kInt32).device(isect_ids_sorted.device())); for (int idx = 0; idx < num_intersects; idx++){ - int64_t curTileIdx = isect_ids_sorted[idx].item() >> 32; + int32_t curTileIdx = static_cast(isect_ids_sorted[idx].item() >> 32); if (idx == 0){ tileBins[curTileIdx][0] = 0; @@ -275,7 +277,7 @@ torch::Tensor get_tile_bin_edges_tensor( break; } - int64_t prevTileIdx = isect_ids_sorted[idx - 1].item() >> 32; + int32_t prevTileIdx = static_cast(isect_ids_sorted[idx - 1].item() >> 32); if (curTileIdx != prevTileIdx){ tileBins[prevTileIdx][1] = idx; @@ -322,13 +324,14 @@ std::tuple< int tileBinStart = tile_bins[tileId][0].item(); int tileBinEnd = tile_bins[tileId][1].item(); float T = 1.0f; - + torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + int idx = tileBinStart; for (; idx < tileBinEnd; idx++){ torch::Tensor gaussianId = gaussian_ids_sorted[idx]; torch::Tensor conic = conics[gaussianId]; torch::Tensor center = xys[gaussianId]; - torch::Tensor delta = center - torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor delta = center - ji; torch::Tensor sigma = ( 0.5f From cc8166b7e8b886d9eda6a7191037232d9b283b12 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Fri, 15 Mar 2024 18:14:15 +0000 Subject: [PATCH 08/19] PoC bounding box based rendering --- project_gaussians.cpp | 4 +- rasterize_gaussians.cpp | 13 +++- rasterize_gaussians.hpp | 1 + simple_trainer.cpp | 5 +- vendor/gsplat-cpu/bindings.h | 4 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 123 ++++++++++++++++++++++++++++++- 6 files changed, 141 insertions(+), 9 deletions(-) diff --git a/project_gaussians.cpp b/project_gaussians.cpp index 029d589..6781078 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -29,6 +29,8 @@ variable_list ProjectGaussians::forward(AutogradContext *ctx, torch::Tensor radii = std::get<3>(t); torch::Tensor conics = std::get<4>(t); torch::Tensor numTilesHit = std::get<5>(t); + torch::Tensor cov2d = std::get<6>(t); + if (ctx != nullptr){ ctx->saved_data["imgHeight"] = imgHeight; @@ -42,7 +44,7 @@ variable_list ProjectGaussians::forward(AutogradContext *ctx, ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); } - return { xys, depths, radii, conics, numTilesHit, cov3d }; + return { xys, depths, radii, conics, numTilesHit, cov3d, cov2d }; } tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index bf664c4..e1e01f0 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -2,6 +2,8 @@ #include "gsplat.hpp" #include "vendor/gsplat/config.h" +#include "cv_utils.hpp" // TODO REMOVE + std::tuple(t); + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite("test.png", image); + std::cout << "WROTE"; + exit(1); + + // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); diff --git a/rasterize_gaussians.hpp b/rasterize_gaussians.hpp index f73a85f..a97f69d 100644 --- a/rasterize_gaussians.hpp +++ b/rasterize_gaussians.hpp @@ -27,6 +27,7 @@ class RasterizeGaussians : public Function{ torch::Tensor numTilesHit, torch::Tensor colors, torch::Tensor opacity, + torch::Tensor cov2d, int imgHeight, int imgWidth, torch::Tensor background); diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 1dbb849..38660b1 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -27,8 +27,8 @@ using namespace torch::indexing; int main(int argc, char **argv){ - int width = 16, - height = 16; + int width = 128, + height = 128; int numPoints = 100000; int iterations = 1000; float learningRate = 0.01; @@ -125,6 +125,7 @@ int main(int argc, char **argv){ p[4], // numTilesHit torch::sigmoid(rgbs), torch::sigmoid(opacities), + p[6], // cov2d height, width, background); diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index d831b4b..aa9504e 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -28,6 +28,7 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, + torch::Tensor, torch::Tensor> project_gaussians_forward_tensor( const int num_points, @@ -105,7 +106,8 @@ std::tuple< const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, - const torch::Tensor &background + const torch::Tensor &background, + const torch::Tensor &cov2d ); std:: diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 52aecff..7472c26 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -81,6 +81,7 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, + torch::Tensor, torch::Tensor> project_gaussians_forward_tensor( const int num_points, @@ -101,6 +102,8 @@ project_gaussians_forward_tensor( ){ float fovx = 0.5f * static_cast(img_width) / fx; float fovy = 0.5f * static_cast(img_height) / fy; + + // TODO: no need to recompute W,p,t below (they are the same) // clip_near_plane torch::Tensor Rclip = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); @@ -175,7 +178,7 @@ project_gaussians_forward_tensor( torch::Tensor depths = pView.index({"...", 2}); torch::Tensor radii = radius.to(torch::kInt32); - return std::make_tuple(cov3d, xys, depths, radii, conic, numTilesHit ); + return std::make_tuple(cov3d, xys, depths, radii, conic, numTilesHit, cov2d ); } std::tuple< @@ -302,23 +305,134 @@ std::tuple< const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, - const torch::Tensor &background + const torch::Tensor &background, + const torch::Tensor &cov2d ){ int channels = colors.size(1); int width = std::get<1>(img_size); int height = std::get<0>(img_size); + int numPoints = xys.size(0); + torch::Device device = xys.device(); torch::Tensor outImg = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - torch::Tensor finalTs = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor finalTs = torch::ones({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalIdx = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kInt32).device(device)); + const float alphaThresh = 1.0f / 255.0f; + int idx = 0; + for (; idx < numPoints; idx++){ + torch::Tensor gaussianId = gaussian_ids_sorted[idx]; + torch::Tensor conic = conics[gaussianId]; + torch::Tensor center = xys[gaussianId]; + + float sqx = 3.0f * std::sqrt(cov2d[gaussianId][0][0].item()); + float sqy = 3.0f * std::sqrt(cov2d[gaussianId][1][1].item()); + + int minx = (std::max)(0, static_cast(std::floor(center[1].item() - sqy)) - 2); + int maxx = (std::min)(width, static_cast(std::ceil(center[1].item() + sqy)) + 2); + int miny = (std::max)(0, static_cast(std::floor(center[0].item() - sqx)) - 2); + int maxy = (std::min)(height, static_cast(std::ceil(center[0].item() + sqx)) + 2); + + for (int i = minx; i < maxx; i++){ + for (int j = miny; j < maxy; j++){ + torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + torch::Tensor delta = center - ji; + torch::Tensor sigma = ( + 0.5f + * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) + + conic[1] * delta[0] * delta[1] + ); + + if (sigma.item() < 0.0f) continue; + float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); + if (alpha < alphaThresh) continue; + + torch::Tensor T = finalTs[i][j]; + torch::Tensor nextT = T * (1.0f - alpha); + + // if (nextT.item() <= 1e-4f){ + // idx -= 1; + // break; + // } + + // outImg[i][j] = torch::tensor({1.0f, 1.0f, 1.0f}); + outImg[i][j] += alpha * T * colors[gaussianId]; + finalTs[i][j] = nextT; + } + } + } + + return std::make_tuple(outImg, finalTs, finalIdx); + +/* + int minx = 99999; + int miny = 99999; + int maxx = 0; + int maxy = 0; + for (int i = 0; i < width; i++){ + std::cout << i << std::endl; + for (int j = 0; j < height; j++){ + float T = 1.0f; + torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + int idx = 0; + for (; idx < 1; idx++){ + torch::Tensor gaussianId = gaussian_ids_sorted[idx]; + torch::Tensor conic = conics[gaussianId]; + torch::Tensor center = xys[gaussianId]; + torch::Tensor delta = center - ji; + + torch::Tensor sigma = ( + 0.5f + * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) + + conic[1] * delta[0] * delta[1] + ); + + if (sigma.item() < 0.0f) continue; + + float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); + + if (alpha < 1.0f / 255.0f) continue; + + float nextT = T * (1.0f - alpha); + + if (nextT <= 1e-4f){ + idx -= 1; + break; + } + + float vis = alpha * T; + // outImg[i][j] = torch::tensor({1.0f, 1.0f, 1.0f}); + outImg[i][j] += vis * colors[gaussianId]; + + maxx = (std::max)(i, maxx); + maxy = (std::max)(j, maxy); + minx = (std::min)(i, minx); + miny = (std::min)(j, miny); + + + T = nextT; + } + + finalTs[i][j] = T; + finalIdx[i][j] = idx; + // outImg[i][j] += T * background; + } + } + + std::cout << "[" << minx << ", " << miny << "], [" << maxx << ", " << maxy << "]" << std::endl; + + return std::make_tuple(outImg, finalTs, finalIdx); +*/ +/* + int blockX = std::get<0>(block); int blockY = std::get<1>(block); int tileBoundsX = std::get<0>(tile_bounds); for (int i = 0; i < width; i++){ - std::cout << i << std::endl; for (int j = 0; j < height; j++){ int tileId = (i / blockX) * tileBoundsX + (j / blockY); int tileBinStart = tile_bins[tileId][0].item(); @@ -364,6 +478,7 @@ std::tuple< } return std::make_tuple(outImg, finalTs, finalIdx); + */ } From e0f8b537f42b331aad4f22f7e30c0816299df90a Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Fri, 15 Mar 2024 20:22:14 +0000 Subject: [PATCH 09/19] Work on rasterizer --- rasterize_gaussians.cpp | 3 +- vendor/gsplat-cpu/bindings.h | 3 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 106 +++++++++++++++++++++---------- 3 files changed, 78 insertions(+), 34 deletions(-) diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index e1e01f0..deee76c 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -76,7 +76,8 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, colors, opacity, background, - cov2d); + cov2d, + depths); // Final image torch::Tensor outImg = std::get<0>(t); diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index aa9504e..f9ccd8f 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -107,7 +107,8 @@ std::tuple< const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, - const torch::Tensor &cov2d + const torch::Tensor &cov2d, + const torch::Tensor &depths ); std:: diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 7472c26..8e0a4c0 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -178,6 +178,8 @@ project_gaussians_forward_tensor( torch::Tensor depths = pView.index({"...", 2}); torch::Tensor radii = radius.to(torch::kInt32); + // TODO: compute camDepths as pProj.index({"...", 2}); + return std::make_tuple(cov3d, xys, depths, radii, conic, numTilesHit, cov2d ); } @@ -306,12 +308,26 @@ std::tuple< const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, - const torch::Tensor &cov2d + const torch::Tensor &cov2d, + const torch::Tensor &depths ){ + // torch::NoGradGuard noGrad; + int channels = colors.size(1); int width = std::get<1>(img_size); int height = std::get<0>(img_size); int numPoints = xys.size(0); + float *pDepths = static_cast(depths.data_ptr()); + + std::vector< size_t > gIndices( numPoints ); + std::iota( gIndices.begin(), gIndices.end(), 0 ); + std::sort(gIndices.begin(), gIndices.end(), [&pDepths](int a, int b){ + return pDepths[a] < pDepths[b]; + }); + + std::cout << pDepths[0] << std::endl; + + std::cout << pDepths[100]; torch::Device device = xys.device(); @@ -319,53 +335,79 @@ std::tuple< torch::Tensor finalTs = torch::ones({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalIdx = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kInt32).device(device)); + torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); + torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); + + int32_t *pGaussianIds = static_cast(gaussian_ids_sorted.data_ptr()); + float *pConics = static_cast(conics.data_ptr()); + float *pCenters = static_cast(xys.data_ptr()); + float *pSqCov2dX = static_cast(sqCov2dX.data_ptr()); + float *pSqCov2dY = static_cast(sqCov2dY.data_ptr()); + float *pOpacities = static_cast(opacities.data_ptr()); + + float *pOutImg = static_cast(outImg.data_ptr()); + float *pFinalTs = static_cast(finalTs.data_ptr()); + int32_t *pFinalIdx = static_cast(finalIdx.data_ptr()); + float *pColors = static_cast(colors.data_ptr()); + + float bgX = background[0].item(); + float bgY = background[1].item(); + float bgZ = background[2].item(); + const float alphaThresh = 1.0f / 255.0f; + float T = 1.0f; int idx = 0; for (; idx < numPoints; idx++){ - torch::Tensor gaussianId = gaussian_ids_sorted[idx]; - torch::Tensor conic = conics[gaussianId]; - torch::Tensor center = xys[gaussianId]; - - float sqx = 3.0f * std::sqrt(cov2d[gaussianId][0][0].item()); - float sqy = 3.0f * std::sqrt(cov2d[gaussianId][1][1].item()); + int32_t gaussianId = gIndices[idx]; - int minx = (std::max)(0, static_cast(std::floor(center[1].item() - sqy)) - 2); - int maxx = (std::min)(width, static_cast(std::ceil(center[1].item() + sqy)) + 2); - int miny = (std::max)(0, static_cast(std::floor(center[0].item() - sqx)) - 2); - int maxy = (std::min)(height, static_cast(std::ceil(center[0].item() + sqx)) + 2); + float A = pConics[gaussianId * 3 + 0]; + float B = pConics[gaussianId * 3 + 1]; + float C = pConics[gaussianId * 3 + 2]; + + float gX = pCenters[gaussianId * 2 + 0]; + float gY = pCenters[gaussianId * 2 + 1]; + + float sqx = pSqCov2dX[gaussianId]; + float sqy = pSqCov2dY[gaussianId]; + + int minx = (std::max)(0, static_cast(std::floor(gY - sqy)) - 2); + int maxx = (std::min)(width, static_cast(std::ceil(gY + sqy)) + 2); + int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); + int maxy = (std::min)(height, static_cast(std::ceil(gX + sqx)) + 2); for (int i = minx; i < maxx; i++){ for (int j = miny; j < maxy; j++){ - torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - - torch::Tensor delta = center - ji; - torch::Tensor sigma = ( + float xCam = gX - j; + float yCam = gY - i; + float sigma = ( 0.5f - * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) - + conic[1] * delta[0] * delta[1] + * (A * xCam * xCam + C * yCam * yCam) + + B * xCam * yCam ); - if (sigma.item() < 0.0f) continue; - float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); + if (sigma < 0.0f) continue; + float alpha = (std::min)(0.999f, (pOpacities[gaussianId] * std::exp(-sigma))); if (alpha < alphaThresh) continue; - torch::Tensor T = finalTs[i][j]; - torch::Tensor nextT = T * (1.0f - alpha); + size_t pixIdx = (i * height + j); + float T = pFinalTs[pixIdx]; + float nextT = T * (1.0f - alpha); - // if (nextT.item() <= 1e-4f){ - // idx -= 1; - // break; - // } + float alphaT = alpha * T; - // outImg[i][j] = torch::tensor({1.0f, 1.0f, 1.0f}); - outImg[i][j] += alpha * T * colors[gaussianId]; - finalTs[i][j] = nextT; + pOutImg[pixIdx * 3 + 0] += alphaT * (pColors[gaussianId * 3 + 0] + bgX); + pOutImg[pixIdx * 3 + 1] += alphaT * (pColors[gaussianId * 3 + 1] + bgY); + pOutImg[pixIdx * 3 + 2] += alphaT * (pColors[gaussianId * 3 + 2] + bgZ); + + pFinalTs[pixIdx] = nextT; + pFinalIdx[pixIdx] = idx; } } } return std::make_tuple(outImg, finalTs, finalIdx); + /* int minx = 99999; int miny = 99999; @@ -418,16 +460,16 @@ std::tuple< finalTs[i][j] = T; finalIdx[i][j] = idx; - // outImg[i][j] += T * background; + outImg[i][j] += T * background; } } std::cout << "[" << minx << ", " << miny << "], [" << maxx << ", " << maxy << "]" << std::endl; return std::make_tuple(outImg, finalTs, finalIdx); + */ /* - int blockX = std::get<0>(block); int blockY = std::get<1>(block); int tileBoundsX = std::get<0>(tile_bounds); @@ -446,7 +488,7 @@ std::tuple< torch::Tensor conic = conics[gaussianId]; torch::Tensor center = xys[gaussianId]; torch::Tensor delta = center - ji; - +pGaussianIds torch::Tensor sigma = ( 0.5f * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) @@ -478,7 +520,7 @@ std::tuple< } return std::make_tuple(outImg, finalTs, finalIdx); - */ +*/ } From df60f5226e124277014ab190cb8dedfffb11c71a Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sat, 16 Mar 2024 19:41:31 +0000 Subject: [PATCH 10/19] Build CUDA+CPU working --- CMakeLists.txt | 26 ++-- gsplat.hpp | 6 +- project_gaussians.cpp | 167 ++++++++++++--------- project_gaussians.hpp | 28 +++- rasterize_gaussians.cpp | 244 +++++++++++++++++++++---------- rasterize_gaussians.hpp | 21 ++- simple_trainer.cpp | 43 ++++-- tile_bounds.hpp | 5 + vendor/gsplat-cpu/bindings.h | 16 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 35 ++--- 10 files changed, 368 insertions(+), 223 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d1eff4..21687eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,13 +40,11 @@ elseif(GPU_RUNTIME STREQUAL "HIP") list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") endif() +set(CMAKE_CXX_STANDARD 17) if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP")) enable_language(${GPU_RUNTIME}) set(CMAKE_${GPU_RUNTIME}_STANDARD 17) set(${GPU_RUNTIME}_STANDARD 17) -else() - # CPU - set(CMAKE_CXX_STANDARD 17) endif() if (NOT WIN32 AND NOT APPLE) @@ -61,8 +59,10 @@ if (NOT WIN32 AND NOT APPLE) endif() set(OpenCV_LIBS opencv_core opencv_imgproc opencv_highgui opencv_calib3d) +set(GSPLAT_LIBS gsplat_cpu) if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP")) add_library(gsplat vendor/gsplat/forward.cu vendor/gsplat/backward.cu vendor/gsplat/bindings.cu vendor/gsplat/helpers.cuh) + list(APPEND GSPLAT_LIBS gsplat) if(GPU_RUNTIME STREQUAL "CUDA") set(GPU_LIBRARIES "cuda") target_link_libraries(gsplat PUBLIC cuda) @@ -71,21 +71,21 @@ if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP")) set(GPU_INCLUDE_DIRS "${ROCM_ROOT}/include") target_compile_definitions(gsplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__) endif() -else() - add_library(gsplat vendor/gsplat-cpu/gsplat_cpu.cpp) + target_include_directories(gsplat PRIVATE + ${PROJECT_SOURCE_DIR}/vendor/glm + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ${TORCH_INCLUDE_DIRS} + ) + set_target_properties(gsplat PROPERTIES LINKER_LANGUAGE CXX) endif() -target_include_directories(gsplat PRIVATE - ${PROJECT_SOURCE_DIR}/vendor/glm - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ${TORCH_INCLUDE_DIRS} -) -set_target_properties(gsplat PROPERTIES LINKER_LANGUAGE CXX) +add_library(gsplat_cpu vendor/gsplat-cpu/gsplat_cpu.cpp) +target_include_directories(gsplat_cpu PRIVATE ${TORCH_INCLUDE_DIRS}) add_executable(opensplat opensplat.cpp point_io.cpp nerfstudio.cpp model.cpp kdtree_tensor.cpp spherical_harmonics.cpp cv_utils.cpp utils.cpp project_gaussians.cpp rasterize_gaussians.cpp ssim.cpp optim_scheduler.cpp colmap.cpp input_data.cpp tensor_math.cpp) set_property(TARGET opensplat PROPERTY CXX_STANDARD 17) target_include_directories(opensplat PRIVATE ${PROJECT_SOURCE_DIR}/vendor/glm ${GPU_INCLUDE_DIRS}) -target_link_libraries(opensplat PUBLIC ${STDPPFS_LIBRARY} ${GPU_LIBRARIES} gsplat ${TORCH_LIBRARIES} ${OpenCV_LIBS}) +target_link_libraries(opensplat PUBLIC ${STDPPFS_LIBRARY} ${GPU_LIBRARIES} ${GSPLAT_LIBS} ${TORCH_LIBRARIES} ${OpenCV_LIBS}) if(GPU_RUNTIME STREQUAL "HIP") target_compile_definitions(opensplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__) elseif(GPU_RUNTIME STREQUAL "CUDA") @@ -95,7 +95,7 @@ endif() if(OPENSPLAT_BUILD_SIMPLE_TRAINER) add_executable(simple_trainer simple_trainer.cpp project_gaussians.cpp rasterize_gaussians.cpp cv_utils.cpp) target_include_directories(simple_trainer PRIVATE ${PROJECT_SOURCE_DIR}/vendor/glm ${GPU_INCLUDE_DIRS}) - target_link_libraries(simple_trainer PUBLIC ${GPU_LIBRARIES} gsplat ${TORCH_LIBRARIES} ${OpenCV_LIBS}) + target_link_libraries(simple_trainer PUBLIC ${GPU_LIBRARIES} ${GSPLAT_LIBS} ${TORCH_LIBRARIES} ${OpenCV_LIBS}) set_property(TARGET simple_trainer PROPERTY CXX_STANDARD 17) if(GPU_RUNTIME STREQUAL "HIP") target_compile_definitions(simple_trainer PRIVATE USE_HIP __HIP_PLATFORM_AMD__) diff --git a/gsplat.hpp b/gsplat.hpp index c0b2408..fab3520 100644 --- a/gsplat.hpp +++ b/gsplat.hpp @@ -1,5 +1,5 @@ #if defined(USE_HIP) || defined(USE_CUDA) #include "vendor/gsplat/bindings.h" -#else -#include "vendor/gsplat-cpu/bindings.h" -#endif \ No newline at end of file +#endif + +#include "vendor/gsplat-cpu/bindings.h" \ No newline at end of file diff --git a/project_gaussians.cpp b/project_gaussians.cpp index 6781078..160ad3f 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -1,7 +1,94 @@ #include "project_gaussians.hpp" #include "gsplat.hpp" -variable_list ProjectGaussians::forward(AutogradContext *ctx, +// variable_list ProjectGaussians::forward(AutogradContext *ctx, +// torch::Tensor means, +// torch::Tensor scales, +// float globScale, +// torch::Tensor quats, +// torch::Tensor viewMat, +// torch::Tensor projMat, +// float fx, +// float fy, +// float cx, +// float cy, +// int imgHeight, +// int imgWidth, +// TileBounds tileBounds, +// float clipThresh +// ){ + +// int numPoints = means.size(0); + +// auto t = project_gaussians_forward_tensor(numPoints, means, scales, globScale, +// quats, viewMat, projMat, fx, fy, +// cx, cy, imgHeight, imgWidth, tileBounds, clipThresh); +// torch::Tensor cov3d = std::get<0>(t); +// torch::Tensor xys = std::get<1>(t); +// torch::Tensor depths = std::get<2>(t); +// torch::Tensor radii = std::get<3>(t); +// torch::Tensor conics = std::get<4>(t); +// torch::Tensor numTilesHit = std::get<5>(t); + +// ctx->saved_data["imgHeight"] = imgHeight; +// ctx->saved_data["imgWidth"] = imgWidth; +// ctx->saved_data["numPoints"] = numPoints; +// ctx->saved_data["globScale"] = globScale; +// ctx->saved_data["fx"] = fx; +// ctx->saved_data["fy"] = fy; +// ctx->saved_data["cx"] = cx; +// ctx->saved_data["cy"] = cy; +// ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); + +// return { xys, depths, radii, conics, numTilesHit, cov3d }; +// } + +// tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { +// torch::Tensor v_xys = grad_outputs[0]; +// torch::Tensor v_depths = grad_outputs[1]; +// torch::Tensor v_radii = grad_outputs[2]; +// torch::Tensor v_conics = grad_outputs[3]; +// torch::Tensor v_numTiles = grad_outputs[4]; +// torch::Tensor v_cov3d = grad_outputs[5]; + +// variable_list saved = ctx->get_saved_variables(); +// torch::Tensor means = saved[0]; +// torch::Tensor scales = saved[1]; +// torch::Tensor quats = saved[2]; +// torch::Tensor viewMat = saved[3]; +// torch::Tensor projMat = saved[4]; +// torch::Tensor cov3d = saved[5]; +// torch::Tensor radii = saved[6]; +// torch::Tensor conics = saved[7]; + +// auto t = project_gaussians_backward_tensor(ctx->saved_data["numPoints"].toInt(), +// means, scales, ctx->saved_data["globScale"].toDouble(), +// quats, viewMat, projMat, +// ctx->saved_data["fx"].toDouble(), ctx->saved_data["fy"].toDouble(), +// ctx->saved_data["cx"].toDouble(), ctx->saved_data["cy"].toDouble(), +// ctx->saved_data["imgHeight"].toInt(), ctx->saved_data["imgWidth"].toInt(), +// cov3d, radii, +// conics, v_xys, v_depths, v_conics); +// torch::Tensor none; + +// return {std::get<2>(t), // v_mean +// std::get<3>(t), // v_scale +// none, // globScale +// std::get<4>(t), // v_quat +// none, // viewMat +// none, // projMat +// none, // fx +// none, // fy +// none, // cx +// none, // cy +// none, // imgHeight +// none, // imgWidth +// none, // tileBounds +// none // clipThresh +// }; +// } + +variable_list ProjectGaussiansCPU::Apply( torch::Tensor means, torch::Tensor scales, float globScale, @@ -14,80 +101,20 @@ variable_list ProjectGaussians::forward(AutogradContext *ctx, float cy, int imgHeight, int imgWidth, - TileBounds tileBounds, float clipThresh ){ int numPoints = means.size(0); - auto t = project_gaussians_forward_tensor(numPoints, means, scales, globScale, + auto t = project_gaussians_forward_tensor_cpu(numPoints, means, scales, globScale, quats, viewMat, projMat, fx, fy, - cx, cy, imgHeight, imgWidth, tileBounds, clipThresh); - torch::Tensor cov3d = std::get<0>(t); - torch::Tensor xys = std::get<1>(t); - torch::Tensor depths = std::get<2>(t); - torch::Tensor radii = std::get<3>(t); - torch::Tensor conics = std::get<4>(t); - torch::Tensor numTilesHit = std::get<5>(t); - torch::Tensor cov2d = std::get<6>(t); + cx, cy, imgHeight, imgWidth, clipThresh); + + torch::Tensor xys = std::get<0>(t); + torch::Tensor radii = std::get<1>(t); + torch::Tensor conics = std::get<2>(t); + torch::Tensor cov2d = std::get<3>(t); + torch::Tensor camDepths = std::get<4>(t); - - if (ctx != nullptr){ - ctx->saved_data["imgHeight"] = imgHeight; - ctx->saved_data["imgWidth"] = imgWidth; - ctx->saved_data["numPoints"] = numPoints; - ctx->saved_data["globScale"] = globScale; - ctx->saved_data["fx"] = fx; - ctx->saved_data["fy"] = fy; - ctx->saved_data["cx"] = cx; - ctx->saved_data["cy"] = cy; - ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); - } - - return { xys, depths, radii, conics, numTilesHit, cov3d, cov2d }; -} - -tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { - torch::Tensor v_xys = grad_outputs[0]; - torch::Tensor v_depths = grad_outputs[1]; - torch::Tensor v_radii = grad_outputs[2]; - torch::Tensor v_conics = grad_outputs[3]; - torch::Tensor v_numTiles = grad_outputs[4]; - torch::Tensor v_cov3d = grad_outputs[5]; - - variable_list saved = ctx->get_saved_variables(); - torch::Tensor means = saved[0]; - torch::Tensor scales = saved[1]; - torch::Tensor quats = saved[2]; - torch::Tensor viewMat = saved[3]; - torch::Tensor projMat = saved[4]; - torch::Tensor cov3d = saved[5]; - torch::Tensor radii = saved[6]; - torch::Tensor conics = saved[7]; - - auto t = project_gaussians_backward_tensor(ctx->saved_data["numPoints"].toInt(), - means, scales, ctx->saved_data["globScale"].toDouble(), - quats, viewMat, projMat, - ctx->saved_data["fx"].toDouble(), ctx->saved_data["fy"].toDouble(), - ctx->saved_data["cx"].toDouble(), ctx->saved_data["cy"].toDouble(), - ctx->saved_data["imgHeight"].toInt(), ctx->saved_data["imgWidth"].toInt(), - cov3d, radii, - conics, v_xys, v_depths, v_conics); - torch::Tensor none; - - return {std::get<2>(t), // v_mean - std::get<3>(t), // v_scale - none, // globScale - std::get<4>(t), // v_quat - none, // viewMat - none, // projMat - none, // fx - none, // fy - none, // cx - none, // cy - none, // imgHeight - none, // imgWidth - none, // tileBounds - none // clipThresh - }; + return { xys, radii, conics, cov2d, camDepths }; } \ No newline at end of file diff --git a/project_gaussians.hpp b/project_gaussians.hpp index d84f53f..eb41247 100644 --- a/project_gaussians.hpp +++ b/project_gaussians.hpp @@ -6,9 +6,29 @@ using namespace torch::autograd; -class ProjectGaussians : public Function{ +// class ProjectGaussians : public Function{ +// public: +// static variable_list forward(AutogradContext *ctx, +// torch::Tensor means, +// torch::Tensor scales, +// float globScale, +// torch::Tensor quats, +// torch::Tensor viewMat, +// torch::Tensor projMat, +// float fx, +// float fy, +// float cx, +// float cy, +// int imgHeight, +// int imgWidth, +// TileBounds tileBounds, +// float clipThresh = 0.01); +// static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); +// }; + +class ProjectGaussiansCPU{ public: - static variable_list forward(AutogradContext *ctx, + static variable_list Apply( torch::Tensor means, torch::Tensor scales, float globScale, @@ -21,12 +41,8 @@ class ProjectGaussians : public Function{ float cy, int imgHeight, int imgWidth, - TileBounds tileBounds, float clipThresh = 0.01); - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); }; - - #endif \ No newline at end of file diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index deee76c..f1058f1 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -37,15 +37,120 @@ std::tuple block = std::make_tuple(BLOCK_X, BLOCK_Y, 1); +// std::tuple imgSize = std::make_tuple(imgWidth, imgHeight, 1); + +// torch::Tensor cumTilesHit = torch::cumsum(numTilesHit, 0, torch::kInt32); +// int numIntersects = cumTilesHit[cumTilesHit.size(0) - 1].item(); + +// auto b = binAndSortGaussians(numPoints, numIntersects, xys, depths, radii, cumTilesHit, tileBounds); +// torch::Tensor gaussianIdsSorted = std::get<3>(b); +// torch::Tensor tileBins = std::get<4>(b); + +// auto t = rasterize_forward_tensor(tileBounds, block, imgSize, +// gaussianIdsSorted, +// tileBins, +// xys, +// conics, +// colors, +// opacity, +// background); +// // Final image +// torch::Tensor outImg = std::get<0>(t); + +// // Map of alpha-inverse (1 - finalTs = alpha) +// torch::Tensor finalTs = std::get<1>(t); + +// // Map of tile bin IDs +// torch::Tensor finalIdx = std::get<2>(t); + +// ctx->saved_data["imgWidth"] = imgWidth; +// ctx->saved_data["imgHeight"] = imgHeight; +// ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); + +// return outImg; +// } + +// tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { +// torch::Tensor v_outImg = grad_outputs[0]; +// int imgHeight = ctx->saved_data["imgHeight"].toInt(); +// int imgWidth = ctx->saved_data["imgWidth"].toInt(); + +// variable_list saved = ctx->get_saved_variables(); +// torch::Tensor gaussianIdsSorted = saved[0]; +// torch::Tensor tileBins = saved[1]; +// torch::Tensor xys = saved[2]; +// torch::Tensor conics = saved[3]; +// torch::Tensor colors = saved[4]; +// torch::Tensor opacity = saved[5]; +// torch::Tensor background = saved[6]; +// torch::Tensor finalTs = saved[7]; +// torch::Tensor finalIdx = saved[8]; + +// // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); +// torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); + +// auto t = rasterize_backward_tensor(imgHeight, imgWidth, +// gaussianIdsSorted, +// tileBins, +// xys, +// conics, +// colors, +// opacity, +// background, +// finalTs, +// finalIdx, +// v_outImg, +// v_outAlpha); + +// torch::Tensor v_xy = std::get<0>(t); +// torch::Tensor v_conic = std::get<1>(t); +// torch::Tensor v_colors = std::get<2>(t); +// torch::Tensor v_opacity = std::get<3>(t); +// torch::Tensor none; + +// return { v_xy, +// none, // depths +// none, // radii +// v_conic, +// none, // numTilesHit +// v_colors, +// v_opacity, +// none, // imgHeight +// none, // imgWidth +// none // background +// }; +// } + + +torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, torch::Tensor xys, - torch::Tensor depths, torch::Tensor radii, torch::Tensor conics, - torch::Tensor numTilesHit, torch::Tensor colors, torch::Tensor opacity, torch::Tensor cov2d, + torch::Tensor camDepths, int imgHeight, int imgWidth, torch::Tensor background @@ -53,31 +158,15 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, int numPoints = xys.size(0); - TileBounds tileBounds = std::make_tuple( - (imgWidth + BLOCK_X - 1) / BLOCK_X, - (imgHeight + BLOCK_Y - 1) / BLOCK_Y, - 1 - ); - std::tuple block = std::make_tuple(BLOCK_X, BLOCK_Y, 1); - std::tuple imgSize = std::make_tuple(imgWidth, imgHeight, 1); - - torch::Tensor cumTilesHit = torch::cumsum(numTilesHit, 0, torch::kInt32); - int numIntersects = cumTilesHit[cumTilesHit.size(0) - 1].item(); - - auto b = binAndSortGaussians(numPoints, numIntersects, xys, depths, radii, cumTilesHit, tileBounds); - torch::Tensor gaussianIdsSorted = std::get<3>(b); - torch::Tensor tileBins = std::get<4>(b); - - auto t = rasterize_forward_tensor(tileBounds, block, imgSize, - gaussianIdsSorted, - tileBins, + auto t = rasterize_forward_tensor_cpu(imgWidth, imgHeight, xys, conics, colors, opacity, background, cov2d, - depths); + camDepths + ); // Final image torch::Tensor outImg = std::get<0>(t); @@ -87,69 +176,68 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, std::cout << "WROTE"; exit(1); - // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); - // Map of tile bin IDs + // Map of gaussian IDs torch::Tensor finalIdx = std::get<2>(t); - if (ctx != nullptr){ - ctx->saved_data["imgWidth"] = imgWidth; - ctx->saved_data["imgHeight"] = imgHeight; - ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); - } + ctx->saved_data["imgWidth"] = imgWidth; + ctx->saved_data["imgHeight"] = imgHeight; + ctx->save_for_backward({ xys, conics, colors, opacity, background, finalTs, finalIdx }); return outImg; } -tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { - torch::Tensor v_outImg = grad_outputs[0]; - int imgHeight = ctx->saved_data["imgHeight"].toInt(); - int imgWidth = ctx->saved_data["imgWidth"].toInt(); - - variable_list saved = ctx->get_saved_variables(); - torch::Tensor gaussianIdsSorted = saved[0]; - torch::Tensor tileBins = saved[1]; - torch::Tensor xys = saved[2]; - torch::Tensor conics = saved[3]; - torch::Tensor colors = saved[4]; - torch::Tensor opacity = saved[5]; - torch::Tensor background = saved[6]; - torch::Tensor finalTs = saved[7]; - torch::Tensor finalIdx = saved[8]; - - // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); - torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); +tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list grad_outputs) { + // torch::Tensor v_outImg = grad_outputs[0]; + // int imgHeight = ctx->saved_data["imgHeight"].toInt(); + // int imgWidth = ctx->saved_data["imgWidth"].toInt(); + + // variable_list saved = ctx->get_saved_variables(); + // torch::Tensor gaussianIdsSorted = saved[0]; + // torch::Tensor tileBins = saved[1]; + // torch::Tensor xys = saved[2]; + // torch::Tensor conics = saved[3]; + // torch::Tensor colors = saved[4]; + // torch::Tensor opacity = saved[5]; + // torch::Tensor background = saved[6]; + // torch::Tensor finalTs = saved[7]; + // torch::Tensor finalIdx = saved[8]; + + // // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); + // torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); - auto t = rasterize_backward_tensor(imgHeight, imgWidth, - gaussianIdsSorted, - tileBins, - xys, - conics, - colors, - opacity, - background, - finalTs, - finalIdx, - v_outImg, - v_outAlpha); - - torch::Tensor v_xy = std::get<0>(t); - torch::Tensor v_conic = std::get<1>(t); - torch::Tensor v_colors = std::get<2>(t); - torch::Tensor v_opacity = std::get<3>(t); - torch::Tensor none; - - return { v_xy, - none, // depths - none, // radii - v_conic, - none, // numTilesHit - v_colors, - v_opacity, - none, // imgHeight - none, // imgWidth - none // background - }; -} \ No newline at end of file + // auto t = rasterize_backward_tensor(imgHeight, imgWidth, + // gaussianIdsSorted, + // tileBins, + // xys, + // conics, + // colors, + // opacity, + // background, + // finalTs, + // finalIdx, + // v_outImg, + // v_outAlpha); + + // torch::Tensor v_xy = std::get<0>(t); + // torch::Tensor v_conic = std::get<1>(t); + // torch::Tensor v_colors = std::get<2>(t); + // torch::Tensor v_opacity = std::get<3>(t); + // torch::Tensor none; + + // return { v_xy, + // none, // depths + // none, // radii + // v_conic, + // none, // numTilesHit + // v_colors, + // v_opacity, + // none, // imgHeight + // none, // imgWidth + // none // background + // }; +} + + diff --git a/rasterize_gaussians.hpp b/rasterize_gaussians.hpp index a97f69d..4d8229a 100644 --- a/rasterize_gaussians.hpp +++ b/rasterize_gaussians.hpp @@ -17,17 +17,32 @@ std::tuple{ +// class RasterizeGaussians : public Function{ +// public: +// static torch::Tensor forward(AutogradContext *ctx, +// torch::Tensor xys, +// torch::Tensor depths, +// torch::Tensor radii, +// torch::Tensor conics, +// torch::Tensor numTilesHit, +// torch::Tensor colors, +// torch::Tensor opacity, +// int imgHeight, +// int imgWidth, +// torch::Tensor background); +// static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); +// }; + +class RasterizeGaussiansCPU : public Function{ public: static torch::Tensor forward(AutogradContext *ctx, torch::Tensor xys, - torch::Tensor depths, torch::Tensor radii, torch::Tensor conics, - torch::Tensor numTilesHit, torch::Tensor colors, torch::Tensor opacity, torch::Tensor cov2d, + torch::Tensor camDepths, int imgHeight, int imgWidth, torch::Tensor background); diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 38660b1..495bd3e 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -23,9 +23,6 @@ using namespace torch::indexing; - - - int main(int argc, char **argv){ int width = 128, height = 128; @@ -34,7 +31,7 @@ int main(int argc, char **argv){ float learningRate = 0.01; torch::Device device = torch::kCPU; - if (torch::cuda::is_available()) { + if (torch::cuda::is_available() && !(argc == 2 && std::string(argv[1]) == "--cpu")){ std::cout << "Using CUDA" << std::endl; device = torch::kCUDA; }else{ @@ -108,24 +105,44 @@ int main(int argc, char **argv){ torch::nn::MSELoss mseLoss; for (size_t i = 0; i < iterations; i++){ - auto p = ProjectGaussians::forward(nullptr, means, scales, 1, + // auto p = ProjectGaussians::Apply(means, scales, 1, + // quats, viewMat, viewMat, + // focal, focal, + // width / 2, + // height / 2, + // height, + // width, + // tileBounds); + + // torch::Tensor outImg = RasterizeGaussians::apply( + // p[0], // xys + // p[1], // depths + // p[2], // radii, + // p[3], // conics + // p[4], // numTilesHit + // torch::sigmoid(rgbs), + // torch::sigmoid(opacities), + // p[6], // cov2d + // height, + // width, + // background); + + auto p = ProjectGaussiansCPU::Apply(means, scales, 1, quats, viewMat, viewMat, focal, focal, width / 2, height / 2, height, - width, - tileBounds); + width); - torch::Tensor outImg = RasterizeGaussians::forward(nullptr, + torch::Tensor outImg = RasterizeGaussiansCPU::apply( p[0], // xys - p[1], // depths - p[2], // radii, - p[3], // conics - p[4], // numTilesHit + p[1], // radii, + p[2], // conics torch::sigmoid(rgbs), torch::sigmoid(opacities), - p[6], // cov2d + p[3], // cov2d + p[4], // camDepths height, width, background); diff --git a/tile_bounds.hpp b/tile_bounds.hpp index 7b29dc2..4c1330b 100644 --- a/tile_bounds.hpp +++ b/tile_bounds.hpp @@ -1,3 +1,8 @@ +#ifndef TILE_BOUNDS_H +#define TILE_BOUNDS_H + #include typedef std::tuple TileBounds; + +#endif \ No newline at end of file diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index f9ccd8f..5f90386 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -27,10 +27,8 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, - torch::Tensor, - torch::Tensor, torch::Tensor> -project_gaussians_forward_tensor( +project_gaussians_forward_tensor_cpu( const int num_points, torch::Tensor &means3d, torch::Tensor &scales, @@ -44,7 +42,6 @@ project_gaussians_forward_tensor( const float cy, const unsigned img_height, const unsigned img_width, - const std::tuple tile_bounds, const float clip_thresh ); @@ -96,19 +93,16 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor -> rasterize_forward_tensor( - const std::tuple tile_bounds, - const std::tuple block, - const std::tuple img_size, - const torch::Tensor &gaussian_ids_sorted, - const torch::Tensor &tile_bins, +> rasterize_forward_tensor_cpu( + const int width, + const int height, const torch::Tensor &xys, const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, const torch::Tensor &cov2d, - const torch::Tensor &depths + const torch::Tensor &camDepths ); std:: diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 8e0a4c0..893a1d6 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -80,10 +80,8 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, - torch::Tensor, - torch::Tensor, torch::Tensor> -project_gaussians_forward_tensor( +project_gaussians_forward_tensor_cpu( const int num_points, torch::Tensor &means3d, torch::Tensor &scales, @@ -97,7 +95,6 @@ project_gaussians_forward_tensor( const float cy, const unsigned img_height, const unsigned img_width, - const std::tuple tile_bounds, const float clip_thresh ){ float fovx = 0.5f * static_cast(img_width) / fx; @@ -169,18 +166,10 @@ project_gaussians_forward_tensor( torch::Tensor v = 0.5f * ((pProj.index({"...", 1}) + 1.0f) * static_cast(img_width) - 1.0f); torch::Tensor xys = torch::stack({u, v}, -1); // center - auto bbox = getTileBbox(xys, radius, tile_bounds); - torch::Tensor tileMin = std::get<0>(bbox); - torch::Tensor tileMax = std::get<1>(bbox); - torch::Tensor numTilesHit = (tileMax.index({"...", 0}) - tileMin.index({"...", 0})) * - (tileMax.index({"...", 1}) - tileMin.index({"...", 1})); - - torch::Tensor depths = pView.index({"...", 2}); torch::Tensor radii = radius.to(torch::kInt32); + torch::Tensor camDepths = pProj.index({"...", 2}); - // TODO: compute camDepths as pProj.index({"...", 2}); - - return std::make_tuple(cov3d, xys, depths, radii, conic, numTilesHit, cov2d ); + return std::make_tuple(xys, radii, conic, cov2d, camDepths); } std::tuple< @@ -297,27 +286,22 @@ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor -> rasterize_forward_tensor( - const std::tuple tile_bounds, - const std::tuple block, - const std::tuple img_size, - const torch::Tensor &gaussian_ids_sorted, - const torch::Tensor &tile_bins, +> rasterize_forward_tensor_cpu( + const int width, + const int height, const torch::Tensor &xys, const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, const torch::Tensor &cov2d, - const torch::Tensor &depths + const torch::Tensor &camDepths ){ - // torch::NoGradGuard noGrad; + torch::NoGradGuard noGrad; int channels = colors.size(1); - int width = std::get<1>(img_size); - int height = std::get<0>(img_size); int numPoints = xys.size(0); - float *pDepths = static_cast(depths.data_ptr()); + float *pDepths = static_cast(camDepths.data_ptr()); std::vector< size_t > gIndices( numPoints ); std::iota( gIndices.begin(), gIndices.end(), 0 ); @@ -338,7 +322,6 @@ std::tuple< torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); - int32_t *pGaussianIds = static_cast(gaussian_ids_sorted.data_ptr()); float *pConics = static_cast(conics.data_ptr()); float *pCenters = static_cast(xys.data_ptr()); float *pSqCov2dX = static_cast(sqCov2dX.data_ptr()); From 25e724447a28f8ea11d53cf39f49cf04068cad3c Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 17 Mar 2024 17:52:41 +0000 Subject: [PATCH 11/19] Simultaneous GPU+CPU build --- project_gaussians.cpp | 156 +++++++++--------- project_gaussians.hpp | 38 ++--- rasterize_gaussians.cpp | 208 ++++++++++++------------ rasterize_gaussians.hpp | 30 ++-- simple_trainer.cpp | 112 +++++++------ vendor/gsplat-cpu/bindings.h | 62 +------ vendor/gsplat-cpu/gsplat_cpu.cpp | 269 +------------------------------ 7 files changed, 287 insertions(+), 588 deletions(-) diff --git a/project_gaussians.cpp b/project_gaussians.cpp index 160ad3f..895ee82 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -1,92 +1,92 @@ #include "project_gaussians.hpp" #include "gsplat.hpp" -// variable_list ProjectGaussians::forward(AutogradContext *ctx, -// torch::Tensor means, -// torch::Tensor scales, -// float globScale, -// torch::Tensor quats, -// torch::Tensor viewMat, -// torch::Tensor projMat, -// float fx, -// float fy, -// float cx, -// float cy, -// int imgHeight, -// int imgWidth, -// TileBounds tileBounds, -// float clipThresh -// ){ +variable_list ProjectGaussians::forward(AutogradContext *ctx, + torch::Tensor means, + torch::Tensor scales, + float globScale, + torch::Tensor quats, + torch::Tensor viewMat, + torch::Tensor projMat, + float fx, + float fy, + float cx, + float cy, + int imgHeight, + int imgWidth, + TileBounds tileBounds, + float clipThresh + ){ -// int numPoints = means.size(0); + int numPoints = means.size(0); -// auto t = project_gaussians_forward_tensor(numPoints, means, scales, globScale, -// quats, viewMat, projMat, fx, fy, -// cx, cy, imgHeight, imgWidth, tileBounds, clipThresh); -// torch::Tensor cov3d = std::get<0>(t); -// torch::Tensor xys = std::get<1>(t); -// torch::Tensor depths = std::get<2>(t); -// torch::Tensor radii = std::get<3>(t); -// torch::Tensor conics = std::get<4>(t); -// torch::Tensor numTilesHit = std::get<5>(t); + auto t = project_gaussians_forward_tensor(numPoints, means, scales, globScale, + quats, viewMat, projMat, fx, fy, + cx, cy, imgHeight, imgWidth, tileBounds, clipThresh); + torch::Tensor cov3d = std::get<0>(t); + torch::Tensor xys = std::get<1>(t); + torch::Tensor depths = std::get<2>(t); + torch::Tensor radii = std::get<3>(t); + torch::Tensor conics = std::get<4>(t); + torch::Tensor numTilesHit = std::get<5>(t); -// ctx->saved_data["imgHeight"] = imgHeight; -// ctx->saved_data["imgWidth"] = imgWidth; -// ctx->saved_data["numPoints"] = numPoints; -// ctx->saved_data["globScale"] = globScale; -// ctx->saved_data["fx"] = fx; -// ctx->saved_data["fy"] = fy; -// ctx->saved_data["cx"] = cx; -// ctx->saved_data["cy"] = cy; -// ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); + ctx->saved_data["imgHeight"] = imgHeight; + ctx->saved_data["imgWidth"] = imgWidth; + ctx->saved_data["numPoints"] = numPoints; + ctx->saved_data["globScale"] = globScale; + ctx->saved_data["fx"] = fx; + ctx->saved_data["fy"] = fy; + ctx->saved_data["cx"] = cx; + ctx->saved_data["cy"] = cy; + ctx->save_for_backward({ means, scales, quats, viewMat, projMat, cov3d, radii, conics }); -// return { xys, depths, radii, conics, numTilesHit, cov3d }; -// } + return { xys, depths, radii, conics, numTilesHit, cov3d }; +} -// tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { -// torch::Tensor v_xys = grad_outputs[0]; -// torch::Tensor v_depths = grad_outputs[1]; -// torch::Tensor v_radii = grad_outputs[2]; -// torch::Tensor v_conics = grad_outputs[3]; -// torch::Tensor v_numTiles = grad_outputs[4]; -// torch::Tensor v_cov3d = grad_outputs[5]; +tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { + torch::Tensor v_xys = grad_outputs[0]; + torch::Tensor v_depths = grad_outputs[1]; + torch::Tensor v_radii = grad_outputs[2]; + torch::Tensor v_conics = grad_outputs[3]; + torch::Tensor v_numTiles = grad_outputs[4]; + torch::Tensor v_cov3d = grad_outputs[5]; -// variable_list saved = ctx->get_saved_variables(); -// torch::Tensor means = saved[0]; -// torch::Tensor scales = saved[1]; -// torch::Tensor quats = saved[2]; -// torch::Tensor viewMat = saved[3]; -// torch::Tensor projMat = saved[4]; -// torch::Tensor cov3d = saved[5]; -// torch::Tensor radii = saved[6]; -// torch::Tensor conics = saved[7]; + variable_list saved = ctx->get_saved_variables(); + torch::Tensor means = saved[0]; + torch::Tensor scales = saved[1]; + torch::Tensor quats = saved[2]; + torch::Tensor viewMat = saved[3]; + torch::Tensor projMat = saved[4]; + torch::Tensor cov3d = saved[5]; + torch::Tensor radii = saved[6]; + torch::Tensor conics = saved[7]; -// auto t = project_gaussians_backward_tensor(ctx->saved_data["numPoints"].toInt(), -// means, scales, ctx->saved_data["globScale"].toDouble(), -// quats, viewMat, projMat, -// ctx->saved_data["fx"].toDouble(), ctx->saved_data["fy"].toDouble(), -// ctx->saved_data["cx"].toDouble(), ctx->saved_data["cy"].toDouble(), -// ctx->saved_data["imgHeight"].toInt(), ctx->saved_data["imgWidth"].toInt(), -// cov3d, radii, -// conics, v_xys, v_depths, v_conics); -// torch::Tensor none; + auto t = project_gaussians_backward_tensor(ctx->saved_data["numPoints"].toInt(), + means, scales, ctx->saved_data["globScale"].toDouble(), + quats, viewMat, projMat, + ctx->saved_data["fx"].toDouble(), ctx->saved_data["fy"].toDouble(), + ctx->saved_data["cx"].toDouble(), ctx->saved_data["cy"].toDouble(), + ctx->saved_data["imgHeight"].toInt(), ctx->saved_data["imgWidth"].toInt(), + cov3d, radii, + conics, v_xys, v_depths, v_conics); + torch::Tensor none; -// return {std::get<2>(t), // v_mean -// std::get<3>(t), // v_scale -// none, // globScale -// std::get<4>(t), // v_quat -// none, // viewMat -// none, // projMat -// none, // fx -// none, // fy -// none, // cx -// none, // cy -// none, // imgHeight -// none, // imgWidth -// none, // tileBounds -// none // clipThresh -// }; -// } + return {std::get<2>(t), // v_mean + std::get<3>(t), // v_scale + none, // globScale + std::get<4>(t), // v_quat + none, // viewMat + none, // projMat + none, // fx + none, // fy + none, // cx + none, // cy + none, // imgHeight + none, // imgWidth + none, // tileBounds + none // clipThresh + }; +} variable_list ProjectGaussiansCPU::Apply( torch::Tensor means, diff --git a/project_gaussians.hpp b/project_gaussians.hpp index eb41247..b6c7e6c 100644 --- a/project_gaussians.hpp +++ b/project_gaussians.hpp @@ -6,25 +6,25 @@ using namespace torch::autograd; -// class ProjectGaussians : public Function{ -// public: -// static variable_list forward(AutogradContext *ctx, -// torch::Tensor means, -// torch::Tensor scales, -// float globScale, -// torch::Tensor quats, -// torch::Tensor viewMat, -// torch::Tensor projMat, -// float fx, -// float fy, -// float cx, -// float cy, -// int imgHeight, -// int imgWidth, -// TileBounds tileBounds, -// float clipThresh = 0.01); -// static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); -// }; +class ProjectGaussians : public Function{ +public: + static variable_list forward(AutogradContext *ctx, + torch::Tensor means, + torch::Tensor scales, + float globScale, + torch::Tensor quats, + torch::Tensor viewMat, + torch::Tensor projMat, + float fx, + float fy, + float cx, + float cy, + int imgHeight, + int imgWidth, + TileBounds tileBounds, + float clipThresh = 0.01); + static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); +}; class ProjectGaussiansCPU{ public: diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index f1058f1..976d28a 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -37,110 +37,116 @@ std::tuple block = std::make_tuple(BLOCK_X, BLOCK_Y, 1); + std::tuple imgSize = std::make_tuple(imgWidth, imgHeight, 1); -// int numPoints = xys.size(0); - -// TileBounds tileBounds = std::make_tuple( -// (imgWidth + BLOCK_X - 1) / BLOCK_X, -// (imgHeight + BLOCK_Y - 1) / BLOCK_Y, -// 1 -// ); -// std::tuple block = std::make_tuple(BLOCK_X, BLOCK_Y, 1); -// std::tuple imgSize = std::make_tuple(imgWidth, imgHeight, 1); + torch::Tensor cumTilesHit = torch::cumsum(numTilesHit, 0, torch::kInt32); + int numIntersects = cumTilesHit[cumTilesHit.size(0) - 1].item(); + + auto b = binAndSortGaussians(numPoints, numIntersects, xys, depths, radii, cumTilesHit, tileBounds); + torch::Tensor gaussianIdsSorted = std::get<3>(b); + torch::Tensor tileBins = std::get<4>(b); + + auto t = rasterize_forward_tensor(tileBounds, block, imgSize, + gaussianIdsSorted, + tileBins, + xys, + conics, + colors, + opacity, + background); + // Final image + torch::Tensor outImg = std::get<0>(t); -// torch::Tensor cumTilesHit = torch::cumsum(numTilesHit, 0, torch::kInt32); -// int numIntersects = cumTilesHit[cumTilesHit.size(0) - 1].item(); - -// auto b = binAndSortGaussians(numPoints, numIntersects, xys, depths, radii, cumTilesHit, tileBounds); -// torch::Tensor gaussianIdsSorted = std::get<3>(b); -// torch::Tensor tileBins = std::get<4>(b); - -// auto t = rasterize_forward_tensor(tileBounds, block, imgSize, -// gaussianIdsSorted, -// tileBins, -// xys, -// conics, -// colors, -// opacity, -// background); -// // Final image -// torch::Tensor outImg = std::get<0>(t); - -// // Map of alpha-inverse (1 - finalTs = alpha) -// torch::Tensor finalTs = std::get<1>(t); - -// // Map of tile bin IDs -// torch::Tensor finalIdx = std::get<2>(t); - -// ctx->saved_data["imgWidth"] = imgWidth; -// ctx->saved_data["imgHeight"] = imgHeight; -// ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite("testcuda.png", image); + std::cout << "WROTE! " << imgWidth << "x" << imgHeight; + exit(1); + + // Map of alpha-inverse (1 - finalTs = alpha) + torch::Tensor finalTs = std::get<1>(t); + + // Map of tile bin IDs + torch::Tensor finalIdx = std::get<2>(t); + + ctx->saved_data["imgWidth"] = imgWidth; + ctx->saved_data["imgHeight"] = imgHeight; + ctx->save_for_backward({ gaussianIdsSorted, tileBins, xys, conics, colors, opacity, background, finalTs, finalIdx }); -// return outImg; -// } - -// tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { -// torch::Tensor v_outImg = grad_outputs[0]; -// int imgHeight = ctx->saved_data["imgHeight"].toInt(); -// int imgWidth = ctx->saved_data["imgWidth"].toInt(); - -// variable_list saved = ctx->get_saved_variables(); -// torch::Tensor gaussianIdsSorted = saved[0]; -// torch::Tensor tileBins = saved[1]; -// torch::Tensor xys = saved[2]; -// torch::Tensor conics = saved[3]; -// torch::Tensor colors = saved[4]; -// torch::Tensor opacity = saved[5]; -// torch::Tensor background = saved[6]; -// torch::Tensor finalTs = saved[7]; -// torch::Tensor finalIdx = saved[8]; - -// // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); -// torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); + return outImg; +} + +tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_outputs) { + torch::Tensor v_outImg = grad_outputs[0]; + int imgHeight = ctx->saved_data["imgHeight"].toInt(); + int imgWidth = ctx->saved_data["imgWidth"].toInt(); + + variable_list saved = ctx->get_saved_variables(); + torch::Tensor gaussianIdsSorted = saved[0]; + torch::Tensor tileBins = saved[1]; + torch::Tensor xys = saved[2]; + torch::Tensor conics = saved[3]; + torch::Tensor colors = saved[4]; + torch::Tensor opacity = saved[5]; + torch::Tensor background = saved[6]; + torch::Tensor finalTs = saved[7]; + torch::Tensor finalIdx = saved[8]; + + // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); + torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); -// auto t = rasterize_backward_tensor(imgHeight, imgWidth, -// gaussianIdsSorted, -// tileBins, -// xys, -// conics, -// colors, -// opacity, -// background, -// finalTs, -// finalIdx, -// v_outImg, -// v_outAlpha); - -// torch::Tensor v_xy = std::get<0>(t); -// torch::Tensor v_conic = std::get<1>(t); -// torch::Tensor v_colors = std::get<2>(t); -// torch::Tensor v_opacity = std::get<3>(t); -// torch::Tensor none; - -// return { v_xy, -// none, // depths -// none, // radii -// v_conic, -// none, // numTilesHit -// v_colors, -// v_opacity, -// none, // imgHeight -// none, // imgWidth -// none // background -// }; -// } + auto t = rasterize_backward_tensor(imgHeight, imgWidth, + gaussianIdsSorted, + tileBins, + xys, + conics, + colors, + opacity, + background, + finalTs, + finalIdx, + v_outImg, + v_outAlpha); + + torch::Tensor v_xy = std::get<0>(t); + torch::Tensor v_conic = std::get<1>(t); + torch::Tensor v_colors = std::get<2>(t); + torch::Tensor v_opacity = std::get<3>(t); + torch::Tensor none; + + return { v_xy, + none, // depths + none, // radii + v_conic, + none, // numTilesHit + v_colors, + v_opacity, + none, // imgHeight + none, // imgWidth + none // background + }; +} torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, @@ -173,7 +179,7 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, cv::Mat image = tensorToImage(outImg.detach().cpu()); cv::cvtColor(image, image, cv::COLOR_RGB2BGR); cv::imwrite("test.png", image); - std::cout << "WROTE"; + std::cout << "WROTE " << imgWidth << "x" << imgHeight; exit(1); // Map of alpha-inverse (1 - finalTs = alpha) diff --git a/rasterize_gaussians.hpp b/rasterize_gaussians.hpp index 4d8229a..aa21a0b 100644 --- a/rasterize_gaussians.hpp +++ b/rasterize_gaussians.hpp @@ -17,21 +17,21 @@ std::tuple{ -// public: -// static torch::Tensor forward(AutogradContext *ctx, -// torch::Tensor xys, -// torch::Tensor depths, -// torch::Tensor radii, -// torch::Tensor conics, -// torch::Tensor numTilesHit, -// torch::Tensor colors, -// torch::Tensor opacity, -// int imgHeight, -// int imgWidth, -// torch::Tensor background); -// static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); -// }; +class RasterizeGaussians : public Function{ +public: + static torch::Tensor forward(AutogradContext *ctx, + torch::Tensor xys, + torch::Tensor depths, + torch::Tensor radii, + torch::Tensor conics, + torch::Tensor numTilesHit, + torch::Tensor colors, + torch::Tensor opacity, + int imgHeight, + int imgWidth, + torch::Tensor background); + static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); +}; class RasterizeGaussiansCPU : public Function{ public: diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 495bd3e..70261f6 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -4,12 +4,10 @@ #include #ifdef USE_HIP #include -#else +#elif defined(USE_CUDA) #include #endif -#include - #include #include #include @@ -24,8 +22,8 @@ using namespace torch::indexing; int main(int argc, char **argv){ - int width = 128, - height = 128; + int width = 256, + height = 256; int numPoints = 100000; int iterations = 1000; float learningRate = 0.01; @@ -66,16 +64,36 @@ int main(int argc, char **argv){ #endif torch::manual_seed(0); + // TODO: remove // Random points, scales and colors - torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, device) - 0.5); // Positions [-1, 1] - torch::Tensor scales = torch::rand({numPoints, 3}, device); - torch::Tensor rgbs = torch::rand({numPoints, 3}, device); + torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, torch::kCPU) - 0.5); // Positions [-1, 1] + torch::Tensor scales = torch::rand({numPoints, 3}, torch::kCPU); + torch::Tensor rgbs = torch::rand({numPoints, 3}, torch::kCPU); // Random rotations (quaternions) // quats = ( sqrt(1-u) sin(2πv), sqrt(1-u) cos(2πv), sqrt(u) sin(2πw), sqrt(u) cos(2πw)) - torch::Tensor u = torch::rand({numPoints, 1}, device); - torch::Tensor v = torch::rand({numPoints, 1}, device); - torch::Tensor w = torch::rand({numPoints, 1}, device); + torch::Tensor u = torch::rand({numPoints, 1}, torch::kCPU); + torch::Tensor v = torch::rand({numPoints, 1}, torch::kCPU); + torch::Tensor w = torch::rand({numPoints, 1}, torch::kCPU); + + means = means.to(device); + scales = scales.to(device); + rgbs = rgbs.to(device); + u = u.to(device); + v = v.to(device); + w = w.to(device); + + // TODO: uncomment + // // Random points, scales and colors + // torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, device) - 0.5); // Positions [-1, 1] + // torch::Tensor scales = torch::rand({numPoints, 3}, device); + // torch::Tensor rgbs = torch::rand({numPoints, 3}, device); + + // // Random rotations (quaternions) + // // quats = ( sqrt(1-u) sin(2πv), sqrt(1-u) cos(2πv), sqrt(u) sin(2πw), sqrt(u) cos(2πw)) + // torch::Tensor u = torch::rand({numPoints, 1}, device); + // torch::Tensor v = torch::rand({numPoints, 1}, device); + // torch::Tensor w = torch::rand({numPoints, 1}, device); torch::Tensor quats = torch::cat({ torch::sqrt(1.0 - u) * torch::sin(2.0 * PI * v), torch::sqrt(1.0 - u) * torch::cos(2.0 * PI * v), @@ -103,31 +121,11 @@ int main(int argc, char **argv){ torch::optim::Adam optimizer({rgbs, means, scales, opacities, quats}, learningRate); torch::nn::MSELoss mseLoss; + torch::Tensor outImg; for (size_t i = 0; i < iterations; i++){ - // auto p = ProjectGaussians::Apply(means, scales, 1, - // quats, viewMat, viewMat, - // focal, focal, - // width / 2, - // height / 2, - // height, - // width, - // tileBounds); - - // torch::Tensor outImg = RasterizeGaussians::apply( - // p[0], // xys - // p[1], // depths - // p[2], // radii, - // p[3], // conics - // p[4], // numTilesHit - // torch::sigmoid(rgbs), - // torch::sigmoid(opacities), - // p[6], // cov2d - // height, - // width, - // background); - - auto p = ProjectGaussiansCPU::Apply(means, scales, 1, + if (device == torch::kCPU){ + auto p = ProjectGaussiansCPU::Apply(means, scales, 1, quats, viewMat, viewMat, focal, focal, width / 2, @@ -135,18 +133,40 @@ int main(int argc, char **argv){ height, width); - torch::Tensor outImg = RasterizeGaussiansCPU::apply( - p[0], // xys - p[1], // radii, - p[2], // conics - torch::sigmoid(rgbs), - torch::sigmoid(opacities), - p[3], // cov2d - p[4], // camDepths - height, - width, - background); - + torch::Tensor outImg = RasterizeGaussiansCPU::apply( + p[0], // xys + p[1], // radii, + p[2], // conics + torch::sigmoid(rgbs), + torch::sigmoid(opacities), + p[3], // cov2d + p[4], // camDepths + height, + width, + background); + }else{ + auto p = ProjectGaussians::apply(means, scales, 1, + quats, viewMat, viewMat, + focal, focal, + width / 2, + height / 2, + height, + width, + tileBounds); + + torch::Tensor outImg = RasterizeGaussians::apply( + p[0], // xys + p[1], // depths + p[2], // radii, + p[3], // conics + p[4], // numTilesHit + torch::sigmoid(rgbs), + torch::sigmoid(opacities), + height, + width, + background); + } + outImg.requires_grad_(); torch::Tensor loss = mseLoss(outImg, gtImage); optimizer.zero_grad(); diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index 5f90386..6b16b6e 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -6,22 +6,6 @@ #include #include -torch::Tensor compute_sh_forward_tensor( - unsigned num_points, - unsigned degree, - unsigned degrees_to_use, - torch::Tensor &viewdirs, - torch::Tensor &coeffs -); - -torch::Tensor compute_sh_backward_tensor( - unsigned num_points, - unsigned degree, - unsigned degrees_to_use, - torch::Tensor &viewdirs, - torch::Tensor &v_colors -); - std::tuple< torch::Tensor, torch::Tensor, @@ -45,50 +29,6 @@ project_gaussians_forward_tensor_cpu( const float clip_thresh ); -std::tuple< - torch::Tensor, - torch::Tensor, - torch::Tensor, - torch::Tensor, - torch::Tensor> -project_gaussians_backward_tensor( - const int num_points, - torch::Tensor &means3d, - torch::Tensor &scales, - const float glob_scale, - torch::Tensor &quats, - torch::Tensor &viewmat, - torch::Tensor &projmat, - const float fx, - const float fy, - const float cx, - const float cy, - const unsigned img_height, - const unsigned img_width, - torch::Tensor &cov3d, - torch::Tensor &radii, - torch::Tensor &conics, - torch::Tensor &v_xy, - torch::Tensor &v_depth, - torch::Tensor &v_conic -); - - -std::tuple map_gaussian_to_intersects_tensor( - const int num_points, - const int num_intersects, - const torch::Tensor &xys, - const torch::Tensor &depths, - const torch::Tensor &radii, - const torch::Tensor &cum_tiles_hit, - const std::tuple tile_bounds -); - -torch::Tensor get_tile_bin_edges_tensor( - int num_intersects, - const torch::Tensor &isect_ids_sorted -); - std::tuple< torch::Tensor, torch::Tensor, @@ -112,7 +52,7 @@ std:: torch::Tensor, // dL_dcolors torch::Tensor // dL_dopacity > - rasterize_backward_tensor( + rasterize_backward_tensor_cpu( const unsigned img_height, const unsigned img_width, const torch::Tensor &gaussians_ids_sorted, diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 893a1d6..20273a7 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -37,44 +37,6 @@ torch::Tensor quatToRotMat(const torch::Tensor &quat){ } -std::tuple getTileBbox(const torch::Tensor &pixCenter, const torch::Tensor &pixRadius, const std::tuple &tileBounds){ - torch::Tensor tileSize = torch::tensor({BLOCK_X, BLOCK_Y}, torch::TensorOptions().dtype(torch::kFloat32).device(pixCenter.device())); - torch::Tensor tileCenter = pixCenter / tileSize; - torch::Tensor tileRadius = pixRadius.index({"...", None}) / tileSize; - torch::Tensor topLeft = (tileCenter - tileRadius).to(torch::kInt32); - torch::Tensor bottomRight = (tileCenter + tileRadius).to(torch::kInt32) + 1; - torch::Tensor tileMin = torch::stack({ - torch::clamp(topLeft.index({"...", 0}), 0, std::get<0>(tileBounds)), - torch::clamp(topLeft.index({"...", 1}), 0, std::get<1>(tileBounds)) - }, -1); - torch::Tensor tileMax = torch::stack({ - torch::clamp(bottomRight.index({"...", 0}), 0, std::get<0>(tileBounds)), - torch::clamp(bottomRight.index({"...", 1}), 0, std::get<1>(tileBounds)) - }, -1); - - return std::make_tuple(tileMin, tileMax); -} - -torch::Tensor compute_sh_forward_tensor( - unsigned num_points, - unsigned degree, - unsigned degrees_to_use, - torch::Tensor &viewdirs, - torch::Tensor &coeffs -){ - return torch::Tensor(); -} - -torch::Tensor compute_sh_backward_tensor( - unsigned num_points, - unsigned degree, - unsigned degrees_to_use, - torch::Tensor &viewdirs, - torch::Tensor &v_colors -){ - return torch::Tensor(); -} - std::tuple< torch::Tensor, torch::Tensor, @@ -172,116 +134,6 @@ project_gaussians_forward_tensor_cpu( return std::make_tuple(xys, radii, conic, cov2d, camDepths); } -std::tuple< - torch::Tensor, - torch::Tensor, - torch::Tensor, - torch::Tensor, - torch::Tensor> -project_gaussians_backward_tensor( - const int num_points, - torch::Tensor &means3d, - torch::Tensor &scales, - const float glob_scale, - torch::Tensor &quats, - torch::Tensor &viewmat, - torch::Tensor &projmat, - const float fx, - const float fy, - const float cx, - const float cy, - const unsigned img_height, - const unsigned img_width, - torch::Tensor &cov3d, - torch::Tensor &radii, - torch::Tensor &conics, - torch::Tensor &v_xy, - torch::Tensor &v_depth, - torch::Tensor &v_conic -){ - return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); -} - - -std::tuple map_gaussian_to_intersects_tensor( - const int num_points, - const int num_intersects, - const torch::Tensor &xys, - const torch::Tensor &depths, - const torch::Tensor &radii, - const torch::Tensor &cum_tiles_hit, - const std::tuple tile_bounds -){ - torch::Device device = xys.device(); - int numIntersects = cum_tiles_hit[-1].item(); - torch::Tensor isectIds = torch::zeros(numIntersects, torch::TensorOptions().dtype(torch::kInt64).device(device)); - torch::Tensor gaussianIds = torch::zeros(numIntersects, torch::TensorOptions().dtype(torch::kInt32).device(device)); - for (int idx = 0; idx < num_points; idx++){ - if (radii[idx].item() <= 0.0f) break; - - auto bbox = getTileBbox(xys[idx], radii[idx], tile_bounds); - torch::Tensor tileMin = std::get<0>(bbox); - torch::Tensor tileMax = std::get<1>(bbox); - int curIdx; - - if (idx == 0){ - curIdx = 0; - }else{ - curIdx = cum_tiles_hit[idx - 1].item(); - } - - float depth = depths[idx].item(); - int32_t depthIdN = *(reinterpret_cast(&depth)); - - int iStart = tileMin[1].item(); - int iEnd = tileMax[1].item(); - int jStart = tileMin[0].item(); - int jEnd = tileMax[0].item(); - int b = std::get<0>(tile_bounds); - - for (int i = iStart; i < iEnd; i++){ - for (int j = jStart; j < jEnd; j++){ - int64_t tileId = i * b + j; - isectIds[curIdx] = static_cast(tileId << 32) | depthIdN; - gaussianIds[curIdx] = idx; - curIdx += 1; - } - } - } - - return std::make_tuple(isectIds, gaussianIds); -} - -torch::Tensor get_tile_bin_edges_tensor( - int num_intersects, - const torch::Tensor &isect_ids_sorted -){ - torch::Tensor tileBins = torch::zeros({num_intersects, 2}, torch::TensorOptions().dtype(torch::kInt32).device(isect_ids_sorted.device())); - - for (int idx = 0; idx < num_intersects; idx++){ - int32_t curTileIdx = static_cast(isect_ids_sorted[idx].item() >> 32); - - if (idx == 0){ - tileBins[curTileIdx][0] = 0; - continue; - } - - if (idx == num_intersects - 1){ - tileBins[curTileIdx][1] = num_intersects; - break; - } - - int32_t prevTileIdx = static_cast(isect_ids_sorted[idx - 1].item() >> 32); - - if (curTileIdx != prevTileIdx){ - tileBins[prevTileIdx][1] = idx; - tileBins[curTileIdx][0] = idx; - } - } - - return tileBins; -} - std::tuple< torch::Tensor, torch::Tensor, @@ -309,10 +161,6 @@ std::tuple< return pDepths[a] < pDepths[b]; }); - std::cout << pDepths[0] << std::endl; - - std::cout << pDepths[100]; - torch::Device device = xys.device(); torch::Tensor outImg = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); @@ -389,121 +237,6 @@ std::tuple< } return std::make_tuple(outImg, finalTs, finalIdx); - - -/* - int minx = 99999; - int miny = 99999; - int maxx = 0; - int maxy = 0; - for (int i = 0; i < width; i++){ - std::cout << i << std::endl; - for (int j = 0; j < height; j++){ - float T = 1.0f; - torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - - int idx = 0; - for (; idx < 1; idx++){ - torch::Tensor gaussianId = gaussian_ids_sorted[idx]; - torch::Tensor conic = conics[gaussianId]; - torch::Tensor center = xys[gaussianId]; - torch::Tensor delta = center - ji; - - torch::Tensor sigma = ( - 0.5f - * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) - + conic[1] * delta[0] * delta[1] - ); - - if (sigma.item() < 0.0f) continue; - - float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); - - if (alpha < 1.0f / 255.0f) continue; - - float nextT = T * (1.0f - alpha); - - if (nextT <= 1e-4f){ - idx -= 1; - break; - } - - float vis = alpha * T; - // outImg[i][j] = torch::tensor({1.0f, 1.0f, 1.0f}); - outImg[i][j] += vis * colors[gaussianId]; - - maxx = (std::max)(i, maxx); - maxy = (std::max)(j, maxy); - minx = (std::min)(i, minx); - miny = (std::min)(j, miny); - - - T = nextT; - } - - finalTs[i][j] = T; - finalIdx[i][j] = idx; - outImg[i][j] += T * background; - } - } - - std::cout << "[" << minx << ", " << miny << "], [" << maxx << ", " << maxy << "]" << std::endl; - - return std::make_tuple(outImg, finalTs, finalIdx); - -*/ -/* - int blockX = std::get<0>(block); - int blockY = std::get<1>(block); - int tileBoundsX = std::get<0>(tile_bounds); - - for (int i = 0; i < width; i++){ - for (int j = 0; j < height; j++){ - int tileId = (i / blockX) * tileBoundsX + (j / blockY); - int tileBinStart = tile_bins[tileId][0].item(); - int tileBinEnd = tile_bins[tileId][1].item(); - float T = 1.0f; - torch::Tensor ji = torch::tensor({j, i}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - - int idx = tileBinStart; - for (; idx < tileBinEnd; idx++){ - torch::Tensor gaussianId = gaussian_ids_sorted[idx]; - torch::Tensor conic = conics[gaussianId]; - torch::Tensor center = xys[gaussianId]; - torch::Tensor delta = center - ji; -pGaussianIds - torch::Tensor sigma = ( - 0.5f - * (conic[0] * delta[0] * delta[0] + conic[2] * delta[1] * delta[1]) - + conic[1] * delta[0] * delta[1] - ); - - if (sigma.item() < 0.0f) continue; - - float alpha = (std::min)(0.999f, (opacities[gaussianId] * torch::exp(-sigma)).item()); - - if (alpha < 1.0f / 255.0f) continue; - - float nextT = T * (1.0f - alpha); - - if (nextT <= 1e-4f){ - idx -= 1; - break; - } - - float vis = alpha * T; - outImg[i][j] += vis * colors[gaussianId]; - T = nextT; - } - - finalTs[i][j] = T; - finalIdx[i][j] = idx; - outImg[i][j] += T * background; - } - } - - return std::make_tuple(outImg, finalTs, finalIdx); -*/ } @@ -514,7 +247,7 @@ std:: torch::Tensor, // dL_dcolors torch::Tensor // dL_dopacity > - rasterize_backward_tensor( + rasterize_backward_tensor_cpu( const unsigned img_height, const unsigned img_width, const torch::Tensor &gaussians_ids_sorted, From 0d54e998029484819b9128b3b08b46bfa8b2480d Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 18 Mar 2024 16:38:19 +0000 Subject: [PATCH 12/19] CPU backward pass rasterization working --- rasterize_gaussians.cpp | 122 +++++++++++++++------------- simple_trainer.cpp | 6 +- vendor/gsplat-cpu/bindings.h | 8 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 134 +++++++++++++++++++++++++++++-- 4 files changed, 198 insertions(+), 72 deletions(-) diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index 976d28a..7c613a7 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -78,11 +78,11 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, // Final image torch::Tensor outImg = std::get<0>(t); - cv::Mat image = tensorToImage(outImg.detach().cpu()); - cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite("testcuda.png", image); - std::cout << "WROTE! " << imgWidth << "x" << imgHeight; - exit(1); + // cv::Mat image = tensorToImage(outImg.detach().cpu()); + // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + // cv::imwrite("testcuda.png", image); + // std::cout << "WROTE! " << imgWidth << "x" << imgHeight; + // exit(1); // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); @@ -135,6 +135,9 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; + std::cout << "CUDA " << v_xy[0] << v_conic[0] << v_colors[0] << v_opacity[0] << std::endl; + exit(1); + return { v_xy, none, // depths none, // radii @@ -176,11 +179,11 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, // Final image torch::Tensor outImg = std::get<0>(t); - cv::Mat image = tensorToImage(outImg.detach().cpu()); - cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite("test.png", image); - std::cout << "WROTE " << imgWidth << "x" << imgHeight; - exit(1); + // cv::Mat image = tensorToImage(outImg.detach().cpu()); + // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + // cv::imwrite("test.png", image); + // std::cout << "WROTE " << imgWidth << "x" << imgHeight; + // exit(1); // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); @@ -190,60 +193,63 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, ctx->saved_data["imgWidth"] = imgWidth; ctx->saved_data["imgHeight"] = imgHeight; - ctx->save_for_backward({ xys, conics, colors, opacity, background, finalTs, finalIdx }); + ctx->save_for_backward({ xys, conics, colors, opacity, background, cov2d, camDepths, finalTs, finalIdx }); return outImg; } tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list grad_outputs) { - // torch::Tensor v_outImg = grad_outputs[0]; - // int imgHeight = ctx->saved_data["imgHeight"].toInt(); - // int imgWidth = ctx->saved_data["imgWidth"].toInt(); - - // variable_list saved = ctx->get_saved_variables(); - // torch::Tensor gaussianIdsSorted = saved[0]; - // torch::Tensor tileBins = saved[1]; - // torch::Tensor xys = saved[2]; - // torch::Tensor conics = saved[3]; - // torch::Tensor colors = saved[4]; - // torch::Tensor opacity = saved[5]; - // torch::Tensor background = saved[6]; - // torch::Tensor finalTs = saved[7]; - // torch::Tensor finalIdx = saved[8]; - - // // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); - // torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); + torch::Tensor v_outImg = grad_outputs[0]; + int imgHeight = ctx->saved_data["imgHeight"].toInt(); + int imgWidth = ctx->saved_data["imgWidth"].toInt(); + + variable_list saved = ctx->get_saved_variables(); + torch::Tensor xys = saved[0]; + torch::Tensor conics = saved[1]; + torch::Tensor colors = saved[2]; + torch::Tensor opacity = saved[3]; + torch::Tensor background = saved[4]; + torch::Tensor cov2d = saved[5]; + torch::Tensor camDepths = saved[6]; + torch::Tensor finalTs = saved[7]; + torch::Tensor finalIdx = saved[8]; + + // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); + torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); - // auto t = rasterize_backward_tensor(imgHeight, imgWidth, - // gaussianIdsSorted, - // tileBins, - // xys, - // conics, - // colors, - // opacity, - // background, - // finalTs, - // finalIdx, - // v_outImg, - // v_outAlpha); - - // torch::Tensor v_xy = std::get<0>(t); - // torch::Tensor v_conic = std::get<1>(t); - // torch::Tensor v_colors = std::get<2>(t); - // torch::Tensor v_opacity = std::get<3>(t); - // torch::Tensor none; - - // return { v_xy, - // none, // depths - // none, // radii - // v_conic, - // none, // numTilesHit - // v_colors, - // v_opacity, - // none, // imgHeight - // none, // imgWidth - // none // background - // }; + auto t = rasterize_backward_tensor_cpu(imgHeight, imgWidth, + xys, + conics, + colors, + opacity, + background, + cov2d, + camDepths, + finalTs, + finalIdx, + v_outImg, + v_outAlpha); + + torch::Tensor v_xy = std::get<0>(t); + torch::Tensor v_conic = std::get<1>(t); + torch::Tensor v_colors = std::get<2>(t); + torch::Tensor v_opacity = std::get<3>(t); + torch::Tensor none; + + std::cout << "CPU " << v_xy[0] << v_conic[0] << v_colors[0] << v_opacity[0] << std::endl; + exit(1); + + return { v_xy, + none, // depths + none, // radii + v_conic, + none, // numTilesHit + v_colors, + v_opacity, + none, // imgHeight + none, // imgWidth + none // background + }; } diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 70261f6..8494f9f 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -24,7 +24,7 @@ using namespace torch::indexing; int main(int argc, char **argv){ int width = 256, height = 256; - int numPoints = 100000; + int numPoints = 32; //100000; int iterations = 1000; float learningRate = 0.01; @@ -133,7 +133,7 @@ int main(int argc, char **argv){ height, width); - torch::Tensor outImg = RasterizeGaussiansCPU::apply( + outImg = RasterizeGaussiansCPU::apply( p[0], // xys p[1], // radii, p[2], // conics @@ -154,7 +154,7 @@ int main(int argc, char **argv){ width, tileBounds); - torch::Tensor outImg = RasterizeGaussians::apply( + outImg = RasterizeGaussians::apply( p[0], // xys p[1], // depths p[2], // radii, diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index 6b16b6e..c09776e 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -53,15 +53,15 @@ std:: torch::Tensor // dL_dopacity > rasterize_backward_tensor_cpu( - const unsigned img_height, - const unsigned img_width, - const torch::Tensor &gaussians_ids_sorted, - const torch::Tensor &tile_bins, + const int height, + const int width, const torch::Tensor &xys, const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, + const torch::Tensor &cov2d, + const torch::Tensor &camDepths, const torch::Tensor &final_Ts, const torch::Tensor &final_idx, const torch::Tensor &v_output, // dL_dout_color diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 20273a7..bacf7aa 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -1,5 +1,5 @@ -// Originally based on https://github.dev/nerfstudio-project/gsplat -// This implementation is licensed under the AGPLv3 +// Originally started from https://github.dev/nerfstudio-project/gsplat +// This implementation has been substantially changed and is licensed under the AGPLv3 #include "bindings.h" #include "../gsplat/config.h" @@ -248,19 +248,139 @@ std:: torch::Tensor // dL_dopacity > rasterize_backward_tensor_cpu( - const unsigned img_height, - const unsigned img_width, - const torch::Tensor &gaussians_ids_sorted, - const torch::Tensor &tile_bins, + const int height, + const int width, const torch::Tensor &xys, const torch::Tensor &conics, const torch::Tensor &colors, const torch::Tensor &opacities, const torch::Tensor &background, + const torch::Tensor &cov2d, + const torch::Tensor &camDepths, const torch::Tensor &final_Ts, const torch::Tensor &final_idx, const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ){ - return std::make_tuple(torch::Tensor(), torch::Tensor(), torch::Tensor(), torch::Tensor()); + int numPoints = xys.size(0); + int channels = colors.size(1); + torch::Device device = xys.device(); + + torch::Tensor v_xy = torch::zeros({numPoints, 2}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor v_conic = torch::zeros({numPoints, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor v_colors = torch::zeros({numPoints, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor v_opacity = torch::zeros({numPoints, 1}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + float *pv_xy = static_cast(v_xy.data_ptr()); + float *pv_conic = static_cast(v_conic.data_ptr()); + float *pv_colors = static_cast(v_colors.data_ptr()); + float *pv_opacity = static_cast(v_opacity.data_ptr()); + + torch::Tensor buffer = torch::zeros({width, height, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + std::vector< size_t > gIndices( numPoints ); + + float *pDepths = static_cast(camDepths.data_ptr()); + std::iota( gIndices.begin(), gIndices.end(), 0 ); + std::sort(gIndices.begin(), gIndices.end(), [&pDepths](int a, int b){ + return pDepths[a] < pDepths[b]; + }); + + torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); + torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); + + float *pConics = static_cast(conics.data_ptr()); + float *pCenters = static_cast(xys.data_ptr()); + float *pSqCov2dX = static_cast(sqCov2dX.data_ptr()); + float *pSqCov2dY = static_cast(sqCov2dY.data_ptr()); + float *pOpacities = static_cast(opacities.data_ptr()); + + float *pColors = static_cast(colors.data_ptr()); + float *pBuffer = static_cast(buffer.data_ptr()); + float *pv_output = static_cast(v_output.data_ptr()); + float *pv_outputAlpha = static_cast(v_output_alpha.data_ptr()); + + float bgX = background[0].item(); + float bgY = background[1].item(); + float bgZ = background[2].item(); + + torch::Tensor T = final_Ts.clone(); + float *pT = static_cast(T.data_ptr()); + float *pFinalTs = static_cast(final_Ts.data_ptr()); + + const float alphaThresh = 1.0f / 255.0f; + int idx = numPoints - 1; + for (; idx >= 0; idx--){ + int32_t gaussianId = gIndices[idx]; + + float A = pConics[gaussianId * 3 + 0]; + float B = pConics[gaussianId * 3 + 1]; + float C = pConics[gaussianId * 3 + 2]; + + float gX = pCenters[gaussianId * 2 + 0]; + float gY = pCenters[gaussianId * 2 + 1]; + + float sqx = pSqCov2dX[gaussianId]; + float sqy = pSqCov2dY[gaussianId]; + + int minx = (std::max)(0, static_cast(std::floor(gY - sqy)) - 2); + int maxx = (std::min)(width, static_cast(std::ceil(gY + sqy)) + 2); + int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); + int maxy = (std::min)(height, static_cast(std::ceil(gX + sqx)) + 2); + + for (int i = minx; i < maxx; i++){ + for (int j = miny; j < maxy; j++){ + float xCam = gX - j; + float yCam = gY - i; + float sigma = ( + 0.5f + * (A * xCam * xCam + C * yCam * yCam) + + B * xCam * yCam + ); + + if (sigma < 0.0f) continue; + float vis = std::exp(-sigma); + float alpha = (std::min)(0.999f, pOpacities[gaussianId] * vis); + if (alpha < alphaThresh) continue; + + size_t pixIdx = (i * height + j); + + float ra = 1.0f / (1.0f - alpha); + pT[pixIdx] *= ra; + float T = pT[pixIdx]; + float TFinal = pFinalTs[pixIdx]; + + float fac = alpha * T; + float v_alpha = 0.0f; + pv_colors[gaussianId * 3 + 0] += fac * pv_output[pixIdx * 3 + 0]; + pv_colors[gaussianId * 3 + 1] += fac * pv_output[pixIdx * 3 + 1]; + pv_colors[gaussianId * 3 + 2] += fac * pv_output[pixIdx * 3 + 2]; + + v_alpha += (pColors[gaussianId * 3 + 0] * T - pBuffer[pixIdx * 3 + 0] * ra) * pv_output[pixIdx * 3 + 0]; + v_alpha += (pColors[gaussianId * 3 + 1] * T - pBuffer[pixIdx * 3 + 1] * ra) * pv_output[pixIdx * 3 + 1]; + v_alpha += (pColors[gaussianId * 3 + 2] * T - pBuffer[pixIdx * 3 + 2] * ra) * pv_output[pixIdx * 3 + 2]; + v_alpha += (TFinal * ra * pv_outputAlpha[pixIdx]); + + v_alpha += -TFinal * ra * bgX * pv_output[pixIdx * 3 + 0]; + v_alpha += -TFinal * ra * bgY * pv_output[pixIdx * 3 + 1]; + v_alpha += -TFinal * ra * bgZ * pv_output[pixIdx * 3 + 2]; + + pBuffer[pixIdx * 3 + 0] += pColors[gaussianId * 3 + 0] * fac; + pBuffer[pixIdx * 3 + 1] += pColors[gaussianId * 3 + 1] * fac; + pBuffer[pixIdx * 3 + 2] += pColors[gaussianId * 3 + 2] * fac; + + float v_sigma = -pOpacities[gaussianId] * vis * v_alpha; + pv_conic[gaussianId * 3 + 0] += 0.5f * v_sigma * xCam * xCam; + pv_conic[gaussianId * 3 + 1] += 0.5f * v_sigma * xCam * yCam; + pv_conic[gaussianId * 3 + 2] += 0.5f * v_sigma * yCam * yCam; + + pv_xy[gaussianId * 2 + 0] += v_sigma * (A * xCam + B * yCam); + pv_xy[gaussianId * 2 + 1] += v_sigma * (B * xCam + C * yCam); + + pv_opacity[gaussianId] += vis * v_alpha; + } + } + } + + return std::make_tuple(v_xy, v_conic, v_colors, v_opacity); } \ No newline at end of file From 3be2500bbca99d873fd93d420ec8c9a181a3a338 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 19 Mar 2024 05:05:06 +0000 Subject: [PATCH 13/19] Fix width/height dimensions --- opensplat.cpp | 2 +- project_gaussians.cpp | 4 +- rasterize_gaussians.cpp | 55 ++++++++++++++++++--------- simple_trainer.cpp | 65 +++++++++++++++++++------------- vendor/gsplat-cpu/gsplat_cpu.cpp | 60 ++++++++++++++++++----------- 5 files changed, 118 insertions(+), 68 deletions(-) diff --git a/opensplat.cpp b/opensplat.cpp index 22eb8a6..e184059 100644 --- a/opensplat.cpp +++ b/opensplat.cpp @@ -37,7 +37,7 @@ int main(int argc, char *argv[]){ ("h,help", "Print usage") ; options.parse_positional({ "input" }); - options.positional_help("[nerfstudio project path]"); + options.positional_help("[colmap or nerfstudio project path]"); cxxopts::ParseResult result; try { result = options.parse(argc, argv); diff --git a/project_gaussians.cpp b/project_gaussians.cpp index 895ee82..9d41697 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -29,7 +29,7 @@ variable_list ProjectGaussians::forward(AutogradContext *ctx, torch::Tensor radii = std::get<3>(t); torch::Tensor conics = std::get<4>(t); torch::Tensor numTilesHit = std::get<5>(t); - + ctx->saved_data["imgHeight"] = imgHeight; ctx->saved_data["imgWidth"] = imgWidth; ctx->saved_data["numPoints"] = numPoints; @@ -115,6 +115,6 @@ variable_list ProjectGaussiansCPU::Apply( torch::Tensor conics = std::get<2>(t); torch::Tensor cov2d = std::get<3>(t); torch::Tensor camDepths = std::get<4>(t); - + return { xys, radii, conics, cov2d, camDepths }; } \ No newline at end of file diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index 7c613a7..5858f9d 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -77,16 +77,18 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, background); // Final image torch::Tensor outImg = std::get<0>(t); - - // cv::Mat image = tensorToImage(outImg.detach().cpu()); - // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - // cv::imwrite("testcuda.png", image); - // std::cout << "WROTE! " << imgWidth << "x" << imgHeight; - // exit(1); + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite("cudatest.png", image); + std::cout << "WROTE " << imgWidth << "x" << imgHeight; + // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); + std::cout << finalTs << std::endl; + exit(1); + // Map of tile bin IDs torch::Tensor finalIdx = std::get<2>(t); @@ -116,6 +118,9 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); + // std::cout << xys[0] << finalTs[0] << std::endl; + // exit(1); + auto t = rasterize_backward_tensor(imgHeight, imgWidth, gaussianIdsSorted, tileBins, @@ -135,8 +140,12 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - std::cout << "CUDA " << v_xy[0] << v_conic[0] << v_colors[0] << v_opacity[0] << std::endl; - exit(1); + // for (size_t i = 0; i < v_xy.size(0); i++){ + // if (v_xy[i][0].item() != 0){ + // std::cout << "CUDA " << i << " " << v_xy[i] << v_conic[i] << v_colors[i] << v_opacity[i] << std::endl; + // exit(1); + // } + // } return { v_xy, none, // depths @@ -179,16 +188,19 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, // Final image torch::Tensor outImg = std::get<0>(t); - // cv::Mat image = tensorToImage(outImg.detach().cpu()); - // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - // cv::imwrite("test.png", image); - // std::cout << "WROTE " << imgWidth << "x" << imgHeight; - // exit(1); + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite("test.png", image); + std::cout << "WROTE " << imgWidth << "x" << imgHeight; // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); + std::cout << finalTs << std::endl; + exit(1); + // Map of gaussian IDs + // TODO: remove finalIdx (not needed) torch::Tensor finalIdx = std::get<2>(t); ctx->saved_data["imgWidth"] = imgWidth; @@ -217,6 +229,9 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); + // std::cout << xys[0] << finalTs[0] << std::endl; + // exit(1); + auto t = rasterize_backward_tensor_cpu(imgHeight, imgWidth, xys, conics, @@ -230,22 +245,28 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr v_outImg, v_outAlpha); + torch::Tensor v_xy = std::get<0>(t); torch::Tensor v_conic = std::get<1>(t); torch::Tensor v_colors = std::get<2>(t); torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - std::cout << "CPU " << v_xy[0] << v_conic[0] << v_colors[0] << v_opacity[0] << std::endl; - exit(1); + // for (size_t i = 0; i < v_xy.size(0); i++){ + // if (v_xy[i][0].item() != 0){ + // size_t i = 11; + // std::cout << "CPU " << i << " " << v_xy[i] << v_conic[i] << v_colors[i] << v_opacity[i] << std::endl; + // exit(1); + // } + // }exit(1); return { v_xy, - none, // depths none, // radii v_conic, - none, // numTilesHit v_colors, v_opacity, + none, // cov2d + none, // camDepths none, // imgHeight none, // imgWidth none // background diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 8494f9f..637e16e 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -16,20 +16,46 @@ #include "rasterize_gaussians.hpp" #include "constants.hpp" #include "cv_utils.hpp" +#include "vendor/cxxopts.hpp" using namespace torch::indexing; +int main(int argc, char **argv){ + cxxopts::Options options("simple_trainer", "Test program for gsplat execution"); + options.add_options() + ("cpu", "Force CPU execution") + ("width", "Test image width", cxxopts::value()->default_value("256")) + ("height", "Test image height", cxxopts::value()->default_value("256")) + ("iters", "Number of iterations", cxxopts::value()->default_value("1000")) + ("points", "Number of gaussians", cxxopts::value()->default_value("100000")) + ("lr", "Learning rate", cxxopts::value()->default_value("0.01")) + ("render", "Save rendered images to folder", cxxopts::value()->default_value("")) + ("h,help", "Print usage") + ; + cxxopts::ParseResult result; + try { + result = options.parse(argc, argv); + } + catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + std::cerr << options.help() << std::endl; + return EXIT_FAILURE; + } + if (result.count("help")) { + std::cout << options.help() << std::endl; + return EXIT_SUCCESS; + } -int main(int argc, char **argv){ - int width = 256, - height = 256; - int numPoints = 32; //100000; - int iterations = 1000; - float learningRate = 0.01; + int width = result["width"].as(), + height = result["height"].as(); + int numPoints = result["points"].as(); + int iterations = result["iters"].as(); + float learningRate = result["lr"].as(); + std::string render = result["render"].as(); torch::Device device = torch::kCPU; - if (torch::cuda::is_available() && !(argc == 2 && std::string(argv[1]) == "--cpu")){ + if (torch::cuda::is_available() && result.count("cpu") == 0){ std::cout << "Using CUDA" << std::endl; device = torch::kCUDA; }else{ @@ -55,16 +81,12 @@ int main(int argc, char **argv){ (height + BLOCK_Y - 1) / BLOCK_Y, 1); - // torch::Tensor imgSize = torch::tensor({width, height, 1}, device); - // torch::Tensor block = torch::tensor({BLOCK_X, BLOCK_Y, 1}, device); - // Init gaussians #ifdef USE_CUDA torch::cuda::manual_seed_all(0); #endif torch::manual_seed(0); - // TODO: remove // Random points, scales and colors torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, torch::kCPU) - 0.5); // Positions [-1, 1] torch::Tensor scales = torch::rand({numPoints, 3}, torch::kCPU); @@ -83,17 +105,6 @@ int main(int argc, char **argv){ v = v.to(device); w = w.to(device); - // TODO: uncomment - // // Random points, scales and colors - // torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, device) - 0.5); // Positions [-1, 1] - // torch::Tensor scales = torch::rand({numPoints, 3}, device); - // torch::Tensor rgbs = torch::rand({numPoints, 3}, device); - - // // Random rotations (quaternions) - // // quats = ( sqrt(1-u) sin(2πv), sqrt(1-u) cos(2πv), sqrt(u) sin(2πw), sqrt(u) cos(2πw)) - // torch::Tensor u = torch::rand({numPoints, 1}, device); - // torch::Tensor v = torch::rand({numPoints, 1}, device); - // torch::Tensor w = torch::rand({numPoints, 1}, device); torch::Tensor quats = torch::cat({ torch::sqrt(1.0 - u) * torch::sin(2.0 * PI * v), torch::sqrt(1.0 - u) * torch::cos(2.0 * PI * v), @@ -174,9 +185,11 @@ int main(int argc, char **argv){ optimizer.step(); std::cout << "Iteration " << std::to_string(i + 1) << "/" << std::to_string(iterations) << " Loss: " << loss.item() << std::endl; - - cv::Mat image = tensorToImage(outImg.detach().cpu()); - cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite("render/" + std::to_string(i + 1) + ".png", image); + + if (!render.empty()){ + cv::Mat image = tensorToImage(outImg.detach().cpu()); + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + cv::imwrite(render + "/" + std::to_string(i + 1) + ".png", image); + } } } \ No newline at end of file diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index bacf7aa..b24c212 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include using namespace torch::indexing; @@ -59,8 +59,8 @@ project_gaussians_forward_tensor_cpu( const unsigned img_width, const float clip_thresh ){ - float fovx = 0.5f * static_cast(img_width) / fx; - float fovy = 0.5f * static_cast(img_height) / fy; + float fovx = 0.5f * static_cast(img_height) / fx; + float fovy = 0.5f * static_cast(img_width) / fy; // TODO: no need to recompute W,p,t below (they are the same) @@ -124,8 +124,8 @@ project_gaussians_forward_tensor_cpu( pHom = torch::einsum("...ij,...j->...i", {projmat, pHom}); torch::Tensor rw = 1.0f / torch::clamp_min(pHom.index({"...", 3}), eps); torch::Tensor pProj = pHom.index({"...", Slice(None, 3)}) * rw.index({"...", None}); - torch::Tensor u = 0.5f * ((pProj.index({"...", 0}) + 1.0f) * static_cast(img_height) - 1.0f); - torch::Tensor v = 0.5f * ((pProj.index({"...", 1}) + 1.0f) * static_cast(img_width) - 1.0f); + torch::Tensor u = 0.5f * ((pProj.index({"...", 0}) + 1.0f) * static_cast(img_width) - 1.0f); + torch::Tensor v = 0.5f * ((pProj.index({"...", 1}) + 1.0f) * static_cast(img_height) - 1.0f); torch::Tensor xys = torch::stack({u, v}, -1); // center torch::Tensor radii = radius.to(torch::kInt32); @@ -163,9 +163,9 @@ std::tuple< torch::Device device = xys.device(); - torch::Tensor outImg = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - torch::Tensor finalTs = torch::ones({width, height, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - torch::Tensor finalIdx = torch::zeros({width, height, channels}, torch::TensorOptions().dtype(torch::kInt32).device(device)); + torch::Tensor outImg = torch::zeros({height, width, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor finalTs = torch::ones({height, width}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor finalIdx = torch::zeros({height, width}, torch::TensorOptions().dtype(torch::kInt32).device(device)); torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); @@ -178,6 +178,7 @@ std::tuple< float *pOutImg = static_cast(outImg.data_ptr()); float *pFinalTs = static_cast(finalTs.data_ptr()); + int32_t *pFinalIdx = static_cast(finalIdx.data_ptr()); float *pColors = static_cast(colors.data_ptr()); @@ -186,9 +187,8 @@ std::tuple< float bgZ = background[2].item(); const float alphaThresh = 1.0f / 255.0f; - float T = 1.0f; - int idx = 0; - for (; idx < numPoints; idx++){ + + for (int idx = 0; idx < numPoints; idx++){ int32_t gaussianId = gIndices[idx]; float A = pConics[gaussianId * 3 + 0]; @@ -202,9 +202,9 @@ std::tuple< float sqy = pSqCov2dY[gaussianId]; int minx = (std::max)(0, static_cast(std::floor(gY - sqy)) - 2); - int maxx = (std::min)(width, static_cast(std::ceil(gY + sqy)) + 2); + int maxx = (std::min)(height, static_cast(std::ceil(gY + sqy)) + 2); int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); - int maxy = (std::min)(height, static_cast(std::ceil(gX + sqx)) + 2); + int maxy = (std::min)(width, static_cast(std::ceil(gX + sqx)) + 2); for (int i = minx; i < maxx; i++){ for (int j = miny; j < maxy; j++){ @@ -220,15 +220,18 @@ std::tuple< float alpha = (std::min)(0.999f, (pOpacities[gaussianId] * std::exp(-sigma))); if (alpha < alphaThresh) continue; - size_t pixIdx = (i * height + j); + size_t pixIdx = (i * width + j); float T = pFinalTs[pixIdx]; float nextT = T * (1.0f - alpha); + if (nextT <= 1e-4f) { // this pixel is done + continue; + } - float alphaT = alpha * T; + float vis = alpha * T; - pOutImg[pixIdx * 3 + 0] += alphaT * (pColors[gaussianId * 3 + 0] + bgX); - pOutImg[pixIdx * 3 + 1] += alphaT * (pColors[gaussianId * 3 + 1] + bgY); - pOutImg[pixIdx * 3 + 2] += alphaT * (pColors[gaussianId * 3 + 2] + bgZ); + pOutImg[pixIdx * 3 + 0] += vis * pColors[gaussianId * 3 + 0]; + pOutImg[pixIdx * 3 + 1] += vis * pColors[gaussianId * 3 + 1]; + pOutImg[pixIdx * 3 + 2] += vis * pColors[gaussianId * 3 + 2]; pFinalTs[pixIdx] = nextT; pFinalIdx[pixIdx] = idx; @@ -236,6 +239,18 @@ std::tuple< } } + // Background + for (int i = 0; i < width; i++){ + for (int j = 0; j < height; j++){ + size_t pixIdx = (i * width + j); + float T = pFinalTs[pixIdx]; + + pOutImg[pixIdx * 3 + 0] += T * bgX; + pOutImg[pixIdx * 3 + 1] += T * bgY; + pOutImg[pixIdx * 3 + 2] += T * bgZ; + } + } + return std::make_tuple(outImg, finalTs, finalIdx); } @@ -262,6 +277,8 @@ std:: const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ){ + torch::NoGradGuard noGrad; + int numPoints = xys.size(0); int channels = colors.size(1); torch::Device device = xys.device(); @@ -276,7 +293,7 @@ std:: float *pv_colors = static_cast(v_colors.data_ptr()); float *pv_opacity = static_cast(v_opacity.data_ptr()); - torch::Tensor buffer = torch::zeros({width, height, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + torch::Tensor buffer = torch::zeros({height, width, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); std::vector< size_t > gIndices( numPoints ); @@ -309,8 +326,7 @@ std:: float *pFinalTs = static_cast(final_Ts.data_ptr()); const float alphaThresh = 1.0f / 255.0f; - int idx = numPoints - 1; - for (; idx >= 0; idx--){ + for (int idx = numPoints - 1; idx >= 0; idx--){ int32_t gaussianId = gIndices[idx]; float A = pConics[gaussianId * 3 + 0]; @@ -343,7 +359,7 @@ std:: float alpha = (std::min)(0.999f, pOpacities[gaussianId] * vis); if (alpha < alphaThresh) continue; - size_t pixIdx = (i * height + j); + size_t pixIdx = (i * width + j); // TODO!!! CHECK!!! float ra = 1.0f / (1.0f - alpha); pT[pixIdx] *= ra; From c840bf2394181e1995dd9008cc96d1a7f1a9a265 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 14:27:52 +0000 Subject: [PATCH 14/19] Backward pass working! --- rasterize_gaussians.cpp | 44 +++------ simple_trainer.cpp | 7 +- vendor/gsplat-cpu/bindings.h | 5 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 155 +++++++++++++++++++++++-------- vendor/gsplat/backward.cu | 6 ++ vendor/gsplat/bindings.cu | 1 + 6 files changed, 148 insertions(+), 70 deletions(-) diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index 5858f9d..f00d6c6 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -83,12 +83,8 @@ torch::Tensor RasterizeGaussians::forward(AutogradContext *ctx, cv::imwrite("cudatest.png", image); std::cout << "WROTE " << imgWidth << "x" << imgHeight; - // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); - std::cout << finalTs << std::endl; - exit(1); - // Map of tile bin IDs torch::Tensor finalIdx = std::get<2>(t); @@ -118,9 +114,6 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); - // std::cout << xys[0] << finalTs[0] << std::endl; - // exit(1); - auto t = rasterize_backward_tensor(imgHeight, imgWidth, gaussianIdsSorted, tileBins, @@ -140,12 +133,8 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - // for (size_t i = 0; i < v_xy.size(0); i++){ - // if (v_xy[i][0].item() != 0){ - // std::cout << "CUDA " << i << " " << v_xy[i] << v_conic[i] << v_colors[i] << v_opacity[i] << std::endl; - // exit(1); - // } - // } + std::cout << v_colors << v_xy; + exit(1); return { v_xy, none, // depths @@ -188,23 +177,22 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, // Final image torch::Tensor outImg = std::get<0>(t); - cv::Mat image = tensorToImage(outImg.detach().cpu()); - cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite("test.png", image); - std::cout << "WROTE " << imgWidth << "x" << imgHeight; + // cv::Mat image = tensorToImage(outImg.detach().cpu()); + // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + // cv::imwrite("test.png", image); + // std::cout << "WROTE " << imgWidth << "x" << imgHeight; - // Map of alpha-inverse (1 - finalTs = alpha) torch::Tensor finalTs = std::get<1>(t); - std::cout << finalTs << std::endl; - exit(1); - // Map of gaussian IDs // TODO: remove finalIdx (not needed) torch::Tensor finalIdx = std::get<2>(t); + std::vector *pxgid = std::get<3>(t); + ctx->saved_data["imgWidth"] = imgWidth; ctx->saved_data["imgHeight"] = imgHeight; + ctx->saved_data["pxgid"] = reinterpret_cast(pxgid); ctx->save_for_backward({ xys, conics, colors, opacity, background, cov2d, camDepths, finalTs, finalIdx }); return outImg; @@ -214,6 +202,7 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr torch::Tensor v_outImg = grad_outputs[0]; int imgHeight = ctx->saved_data["imgHeight"].toInt(); int imgWidth = ctx->saved_data["imgWidth"].toInt(); + const std::vector *pxgid = reinterpret_cast *>(ctx->saved_data["pxgid"].toInt()); variable_list saved = ctx->get_saved_variables(); torch::Tensor xys = saved[0]; @@ -229,9 +218,6 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); - // std::cout << xys[0] << finalTs[0] << std::endl; - // exit(1); - auto t = rasterize_backward_tensor_cpu(imgHeight, imgWidth, xys, conics, @@ -242,6 +228,7 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr camDepths, finalTs, finalIdx, + pxgid, v_outImg, v_outAlpha); @@ -252,13 +239,8 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - // for (size_t i = 0; i < v_xy.size(0); i++){ - // if (v_xy[i][0].item() != 0){ - // size_t i = 11; - // std::cout << "CPU " << i << " " << v_xy[i] << v_conic[i] << v_colors[i] << v_opacity[i] << std::endl; - // exit(1); - // } - // }exit(1); + // std::cout << v_colors << v_xy; + // exit(1); return { v_xy, none, // radii diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 637e16e..336f7a9 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #ifdef USE_HIP @@ -19,6 +20,7 @@ #include "vendor/cxxopts.hpp" using namespace torch::indexing; +namespace fs = std::filesystem; int main(int argc, char **argv){ cxxopts::Options options("simple_trainer", "Test program for gsplat execution"); @@ -53,6 +55,7 @@ int main(int argc, char **argv){ int iterations = result["iters"].as(); float learningRate = result["lr"].as(); std::string render = result["render"].as(); + if (!fs::exists(render)) fs::create_directories(render); torch::Device device = torch::kCPU; if (torch::cuda::is_available() && result.count("cpu") == 0){ @@ -90,6 +93,8 @@ int main(int argc, char **argv){ // Random points, scales and colors torch::Tensor means = 2.0 * (torch::rand({numPoints, 3}, torch::kCPU) - 0.5); // Positions [-1, 1] torch::Tensor scales = torch::rand({numPoints, 3}, torch::kCPU); + // torch::Tensor means = torch::tensor({{0.5f, 0.5f, -5.0f}, {0.5f, 0.5f, -6.0f}, {0.25f, 0.25f, -4.0f}}, torch::kCPU); + // torch::Tensor scales = torch::tensor({{0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f}}, torch::kCPU); torch::Tensor rgbs = torch::rand({numPoints, 3}, torch::kCPU); // Random rotations (quaternions) @@ -189,7 +194,7 @@ int main(int argc, char **argv){ if (!render.empty()){ cv::Mat image = tensorToImage(outImg.detach().cpu()); cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite(render + "/" + std::to_string(i + 1) + ".png", image); + cv::imwrite((fs::path(render) / (std::to_string(i + 1) + ".png")).string(), image); } } } \ No newline at end of file diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index c09776e..3f507e1 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -32,7 +33,8 @@ project_gaussians_forward_tensor_cpu( std::tuple< torch::Tensor, torch::Tensor, - torch::Tensor + torch::Tensor, + std::vector * > rasterize_forward_tensor_cpu( const int width, const int height, @@ -64,6 +66,7 @@ std:: const torch::Tensor &camDepths, const torch::Tensor &final_Ts, const torch::Tensor &final_idx, + const std::vector *pxgid, const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ); diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index b24c212..23330e8 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -137,7 +137,8 @@ project_gaussians_forward_tensor_cpu( std::tuple< torch::Tensor, torch::Tensor, - torch::Tensor + torch::Tensor, + std::vector * > rasterize_forward_tensor_cpu( const int width, const int height, @@ -154,6 +155,7 @@ std::tuple< int channels = colors.size(1); int numPoints = xys.size(0); float *pDepths = static_cast(camDepths.data_ptr()); + std::vector *pxgid = new std::vector[width * height]; std::vector< size_t > gIndices( numPoints ); std::iota( gIndices.begin(), gIndices.end(), 0 ); @@ -166,6 +168,7 @@ std::tuple< torch::Tensor outImg = torch::zeros({height, width, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalTs = torch::ones({height, width}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalIdx = torch::zeros({height, width}, torch::TensorOptions().dtype(torch::kInt32).device(device)); + torch::Tensor done = torch::zeros({height, width}, torch::TensorOptions().dtype(torch::kBool).device(device)); torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); @@ -178,7 +181,8 @@ std::tuple< float *pOutImg = static_cast(outImg.data_ptr()); float *pFinalTs = static_cast(finalTs.data_ptr()); - + bool *pDone = static_cast(done.data_ptr()); + int32_t *pFinalIdx = static_cast(finalIdx.data_ptr()); float *pColors = static_cast(colors.data_ptr()); @@ -208,6 +212,9 @@ std::tuple< for (int i = minx; i < maxx; i++){ for (int j = miny; j < maxy; j++){ + size_t pixIdx = (i * width + j); + if (pDone[pixIdx]) continue; + float xCam = gX - j; float yCam = gY - i; float sigma = ( @@ -220,10 +227,10 @@ std::tuple< float alpha = (std::min)(0.999f, (pOpacities[gaussianId] * std::exp(-sigma))); if (alpha < alphaThresh) continue; - size_t pixIdx = (i * width + j); float T = pFinalTs[pixIdx]; float nextT = T * (1.0f - alpha); if (nextT <= 1e-4f) { // this pixel is done + pDone[pixIdx] = true; continue; } @@ -234,7 +241,8 @@ std::tuple< pOutImg[pixIdx * 3 + 2] += vis * pColors[gaussianId * 3 + 2]; pFinalTs[pixIdx] = nextT; - pFinalIdx[pixIdx] = idx; + pFinalIdx[pixIdx] = gaussianId; + pxgid[pixIdx].push_back(gaussianId); } } } @@ -248,10 +256,12 @@ std::tuple< pOutImg[pixIdx * 3 + 0] += T * bgX; pOutImg[pixIdx * 3 + 1] += T * bgY; pOutImg[pixIdx * 3 + 2] += T * bgZ; + + std::reverse(pxgid[pixIdx].begin(), pxgid[pixIdx].end()); } } - return std::make_tuple(outImg, finalTs, finalIdx); + return std::make_tuple(outImg, finalTs, finalIdx, pxgid); } @@ -274,6 +284,7 @@ std:: const torch::Tensor &camDepths, const torch::Tensor &final_Ts, const torch::Tensor &final_idx, + const std::vector *pxgid, const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ){ @@ -292,9 +303,27 @@ std:: float *pv_conic = static_cast(v_conic.data_ptr()); float *pv_colors = static_cast(v_colors.data_ptr()); float *pv_opacity = static_cast(v_opacity.data_ptr()); + + // torch::Tensor buffer = torch::zeros({height, width, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + float *pColors = static_cast(colors.data_ptr()); + // float *pBuffer = static_cast(buffer.data_ptr()); + float *pv_output = static_cast(v_output.data_ptr()); + float *pv_outputAlpha = static_cast(v_output_alpha.data_ptr()); + float *pConics = static_cast(conics.data_ptr()); + float *pCenters = static_cast(xys.data_ptr()); + float *pOpacities = static_cast(opacities.data_ptr()); - torch::Tensor buffer = torch::zeros({height, width, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + float bgX = background[0].item(); + float bgY = background[1].item(); + float bgZ = background[2].item(); + + // torch::Tensor Ts = final_Ts.clone(); + // float *pTs = static_cast(Ts.data_ptr()); + float *pFinalTs = static_cast(final_Ts.data_ptr()); + const float alphaThresh = 1.0f / 255.0f; +/* std::vector< size_t > gIndices( numPoints ); float *pDepths = static_cast(camDepths.data_ptr()); @@ -306,24 +335,9 @@ std:: torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); - float *pConics = static_cast(conics.data_ptr()); - float *pCenters = static_cast(xys.data_ptr()); + float *pSqCov2dX = static_cast(sqCov2dX.data_ptr()); float *pSqCov2dY = static_cast(sqCov2dY.data_ptr()); - float *pOpacities = static_cast(opacities.data_ptr()); - - float *pColors = static_cast(colors.data_ptr()); - float *pBuffer = static_cast(buffer.data_ptr()); - float *pv_output = static_cast(v_output.data_ptr()); - float *pv_outputAlpha = static_cast(v_output_alpha.data_ptr()); - - float bgX = background[0].item(); - float bgY = background[1].item(); - float bgZ = background[2].item(); - - torch::Tensor T = final_Ts.clone(); - float *pT = static_cast(T.data_ptr()); - float *pFinalTs = static_cast(final_Ts.data_ptr()); const float alphaThresh = 1.0f / 255.0f; for (int idx = numPoints - 1; idx >= 0; idx--){ @@ -340,12 +354,14 @@ std:: float sqy = pSqCov2dY[gaussianId]; int minx = (std::max)(0, static_cast(std::floor(gY - sqy)) - 2); - int maxx = (std::min)(width, static_cast(std::ceil(gY + sqy)) + 2); + int maxx = (std::min)(height, static_cast(std::ceil(gY + sqy)) + 2); int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); - int maxy = (std::min)(height, static_cast(std::ceil(gX + sqx)) + 2); + int maxy = (std::min)(width, static_cast(std::ceil(gX + sqx)) + 2); for (int i = minx; i < maxx; i++){ for (int j = miny; j < maxy; j++){ + size_t pixIdx = (i * width + j); + float xCam = gX - j; float yCam = gY - i; float sigma = ( @@ -356,30 +372,31 @@ std:: if (sigma < 0.0f) continue; float vis = std::exp(-sigma); - float alpha = (std::min)(0.999f, pOpacities[gaussianId] * vis); + float alpha = (std::min)(0.99f, pOpacities[gaussianId] * vis); if (alpha < alphaThresh) continue; - size_t pixIdx = (i * width + j); // TODO!!! CHECK!!! float ra = 1.0f / (1.0f - alpha); - pT[pixIdx] *= ra; - float T = pT[pixIdx]; - float TFinal = pFinalTs[pixIdx]; + float T = pTs[pixIdx]; + T *= ra; + pTs[pixIdx] = T; + + float Tfinal = pFinalTs[pixIdx]; float fac = alpha * T; - float v_alpha = 0.0f; + pv_colors[gaussianId * 3 + 0] += fac * pv_output[pixIdx * 3 + 0]; pv_colors[gaussianId * 3 + 1] += fac * pv_output[pixIdx * 3 + 1]; pv_colors[gaussianId * 3 + 2] += fac * pv_output[pixIdx * 3 + 2]; - v_alpha += (pColors[gaussianId * 3 + 0] * T - pBuffer[pixIdx * 3 + 0] * ra) * pv_output[pixIdx * 3 + 0]; - v_alpha += (pColors[gaussianId * 3 + 1] * T - pBuffer[pixIdx * 3 + 1] * ra) * pv_output[pixIdx * 3 + 1]; - v_alpha += (pColors[gaussianId * 3 + 2] * T - pBuffer[pixIdx * 3 + 2] * ra) * pv_output[pixIdx * 3 + 2]; - v_alpha += (TFinal * ra * pv_outputAlpha[pixIdx]); + float v_alpha = ((pColors[gaussianId * 3 + 0] * T - pBuffer[pixIdx * 3 + 0] * ra) * pv_output[pixIdx * 3 + 0]) + + ((pColors[gaussianId * 3 + 1] * T - pBuffer[pixIdx * 3 + 1] * ra) * pv_output[pixIdx * 3 + 1]) + + ((pColors[gaussianId * 3 + 2] * T - pBuffer[pixIdx * 3 + 2] * ra) * pv_output[pixIdx * 3 + 2]) + + (Tfinal * ra * pv_outputAlpha[pixIdx]) + - v_alpha += -TFinal * ra * bgX * pv_output[pixIdx * 3 + 0]; - v_alpha += -TFinal * ra * bgY * pv_output[pixIdx * 3 + 1]; - v_alpha += -TFinal * ra * bgZ * pv_output[pixIdx * 3 + 2]; + (-Tfinal * ra * bgX * pv_output[pixIdx * 3 + 0]) + + (-Tfinal * ra * bgY * pv_output[pixIdx * 3 + 1]) + + (-Tfinal * ra * bgZ * pv_output[pixIdx * 3 + 2]); pBuffer[pixIdx * 3 + 0] += pColors[gaussianId * 3 + 0] * fac; pBuffer[pixIdx * 3 + 1] += pColors[gaussianId * 3 + 1] * fac; @@ -393,6 +410,70 @@ std:: pv_xy[gaussianId * 2 + 0] += v_sigma * (A * xCam + B * yCam); pv_xy[gaussianId * 2 + 1] += v_sigma * (B * xCam + C * yCam); + pv_opacity[gaussianId] += vis * v_alpha; + } + } + }*/ + + for (int j = 0; j < width; j++){ + for (int i = 0; i < height; i++){ + size_t pixIdx = (i * width + j); + float Tfinal = pFinalTs[pixIdx]; + float T = Tfinal; + float buffer[3] = {0.0f, 0.0f, 0.0f}; + + for (const int32_t &gaussianId : pxgid[pixIdx]){ + float A = pConics[gaussianId * 3 + 0]; + float B = pConics[gaussianId * 3 + 1]; + float C = pConics[gaussianId * 3 + 2]; + + float gX = pCenters[gaussianId * 2 + 0]; + float gY = pCenters[gaussianId * 2 + 1]; + + float xCam = gX - j; + float yCam = gY - i; + float sigma = ( + 0.5f + * (A * xCam * xCam + C * yCam * yCam) + + B * xCam * yCam + ); + + if (sigma < 0.0f) continue; + float vis = std::exp(-sigma); + float alpha = (std::min)(0.99f, pOpacities[gaussianId] * vis); + if (alpha < alphaThresh) continue; + + float ra = 1.0f / (1.0f - alpha); + T *= ra; + float fac = alpha * T; + + pv_colors[gaussianId * 3 + 0] += fac * pv_output[pixIdx * 3 + 0]; + pv_colors[gaussianId * 3 + 1] += fac * pv_output[pixIdx * 3 + 1]; + pv_colors[gaussianId * 3 + 2] += fac * pv_output[pixIdx * 3 + 2]; + + float v_alpha = ((pColors[gaussianId * 3 + 0] * T - buffer[0] * ra) * pv_output[pixIdx * 3 + 0]) + + ((pColors[gaussianId * 3 + 1] * T - buffer[1] * ra) * pv_output[pixIdx * 3 + 1]) + + ((pColors[gaussianId * 3 + 2] * T - buffer[2] * ra) * pv_output[pixIdx * 3 + 2]) + + (Tfinal * ra * pv_outputAlpha[pixIdx]) + + + (-Tfinal * ra * bgX * pv_output[pixIdx * 3 + 0]) + + (-Tfinal * ra * bgY * pv_output[pixIdx * 3 + 1]) + + (-Tfinal * ra * bgZ * pv_output[pixIdx * 3 + 2]); + + buffer[0] += pColors[gaussianId * 3 + 0] * fac; + buffer[1] += pColors[gaussianId * 3 + 1] * fac; + buffer[2] += pColors[gaussianId * 3 + 2] * fac; + + float v_sigma = -pOpacities[gaussianId] * vis * v_alpha; + pv_conic[gaussianId * 3 + 0] += 0.5f * v_sigma * xCam * xCam; + pv_conic[gaussianId * 3 + 1] += 0.5f * v_sigma * xCam * yCam; + pv_conic[gaussianId * 3 + 2] += 0.5f * v_sigma * yCam * yCam; + + // std::cout << v_sigma << std::endl; + + pv_xy[gaussianId * 2 + 0] += v_sigma * (A * xCam + B * yCam); + pv_xy[gaussianId * 2 + 1] += v_sigma * (B * xCam + C * yCam); + pv_opacity[gaussianId] += vis * v_alpha; } } diff --git a/vendor/gsplat/backward.cu b/vendor/gsplat/backward.cu index c338dba..c0e8f1a 100644 --- a/vendor/gsplat/backward.cu +++ b/vendor/gsplat/backward.cu @@ -10,6 +10,8 @@ #include #endif +#include // TODO REMOVE + namespace cg = cooperative_groups; __global__ void nd_rasterize_backward_kernel( @@ -299,6 +301,10 @@ __global__ void rasterize_backward_kernel( // compute the current T for this gaussian float ra = 1.f / (1.f - alpha); T *= ra; + // if (pix_id == 4999){ + // printf("%f \n", T); + // } + // update v_rgb for this gaussian const float fac = alpha * T; float v_alpha = 0.f; diff --git a/vendor/gsplat/bindings.cu b/vendor/gsplat/bindings.cu index 80d581c..8bdbaca 100644 --- a/vendor/gsplat/bindings.cu +++ b/vendor/gsplat/bindings.cu @@ -628,6 +628,7 @@ std:: (float3 *)v_colors.contiguous().data_ptr(), v_opacity.contiguous().data_ptr() ); + cudaDeviceSynchronize(); // TODO REMOVE return std::make_tuple(v_xy, v_conic, v_colors, v_opacity); } \ No newline at end of file From c0ba9dac8b1c89d0cc0cab4bd2a822e5c636d57f Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 16:03:40 +0000 Subject: [PATCH 15/19] Started opensplat CPU integration --- gsplat.hpp | 2 + model.cpp | 122 ++++++++++++++++++-------- opensplat.cpp | 2 - rasterize_gaussians.cpp | 40 ++------- simple_trainer.cpp | 45 +++++----- vendor/gsplat-cpu/bindings.h | 9 +- vendor/gsplat-cpu/gsplat_cpu.cpp | 145 ++++--------------------------- vendor/gsplat/backward.cu | 5 -- 8 files changed, 142 insertions(+), 228 deletions(-) diff --git a/gsplat.hpp b/gsplat.hpp index fab3520..db0368d 100644 --- a/gsplat.hpp +++ b/gsplat.hpp @@ -1,3 +1,5 @@ +#include "vendor/gsplat/config.h" + #if defined(USE_HIP) || defined(USE_CUDA) #include "vendor/gsplat/bindings.h" #endif diff --git a/model.cpp b/model.cpp index ca615c8..3f36ff7 100644 --- a/model.cpp +++ b/model.cpp @@ -74,31 +74,62 @@ torch::Tensor Model::forward(Camera& cam, int step){ float fovY = 2.0f * std::atan(height / (2.0f * fy)); torch::Tensor projMat = projectionMatrix(0.001f, 1000.0f, fovX, fovY, device); - - TileBounds tileBounds = std::make_tuple((width + BLOCK_X - 1) / BLOCK_X, - (height + BLOCK_Y - 1) / BLOCK_Y, - 1); - torch::Tensor colors = torch::cat({featuresDc.index({Slice(), None, Slice()}), featuresRest}, 1); - auto p = ProjectGaussians::apply(means, - torch::exp(scales), - 1, - quats / quats.norm(2, {-1}, true), - viewMat, - torch::matmul(projMat, viewMat), - fx, - fy, - cx, - cy, - height, - width, - tileBounds); - xys = p[0]; - torch::Tensor depths = p[1]; - radii = p[2]; - torch::Tensor conics = p[3]; - torch::Tensor numTilesHit = p[4]; + torch::Tensor conics; + torch::Tensor depths; // GPU-only + torch::Tensor numTilesHit; // GPU-only + torch::Tensor cov2d; // CPU-only + torch::Tensor camDepths; // CPU-only + torch::Tensor rgb; + + if (device == torch::kCPU){ + auto p = ProjectGaussiansCPU::Apply(means, + torch::exp(scales), + 1, + quats / quats.norm(2, {-1}, true), + viewMat, + torch::matmul(projMat, viewMat), + fx, + fy, + cx, + cy, + height, + width); + xys = p[0]; + radii = p[1]; + conics = p[2]; + cov2d = p[3]; + camDepths = p[4]; + }else{ + #if defined(USE_HIP) || defined(USE_CUDA) + + TileBounds tileBounds = std::make_tuple((width + BLOCK_X - 1) / BLOCK_X, + (height + BLOCK_Y - 1) / BLOCK_Y, + 1); + auto p = ProjectGaussians::apply(means, + torch::exp(scales), + 1, + quats / quats.norm(2, {-1}, true), + viewMat, + torch::matmul(projMat, viewMat), + fx, + fy, + cx, + cy, + height, + width, + tileBounds); + + xys = p[0]; + depths = p[1]; + radii = p[2]; + conics = p[3]; + numTilesHit = p[4]; + #else + throw std::runtime_error("GPU support not built"); + #endif + } if (radii.sum().item() == 0.0f) @@ -110,22 +141,39 @@ torch::Tensor Model::forward(Camera& cam, int step){ torch::Tensor viewDirs = means.detach() - T.transpose(0, 1).to(device); viewDirs = viewDirs / viewDirs.norm(2, {-1}, true); int degreesToUse = (std::min)(step / shDegreeInterval, shDegree); + std::cout << degreesToUse; + exit(1); torch::Tensor rgbs = SphericalHarmonics::apply(degreesToUse, viewDirs, colors); - rgbs = torch::clamp_min(rgbs + 0.5f, 0.0f); + rgbs = torch::clamp_min(rgbs + 0.5f, 0.0f); + + if (device == torch::kCPU){ + rgb = RasterizeGaussiansCPU::apply( + xys, + radii, + conics, + rgbs, + torch::sigmoid(opacities), + cov2d, + camDepths, + height, + width, + backgroundColor); + }else{ + #if defined(USE_HIP) || defined(USE_CUDA) + rgb = RasterizeGaussians::apply( + xys, + depths, + radii, + conics, + numTilesHit, + rgbs, + torch::sigmoid(opacities), + height, + width, + backgroundColor); + #endif + } - - torch::Tensor rgb = RasterizeGaussians::apply( - xys, - depths, - radii, - conics, - numTilesHit, - rgbs, // TODO: why not sigmod? - torch::sigmoid(opacities), - height, - width, - backgroundColor); - rgb = torch::clamp_max(rgb, 1.0f); return rgb; diff --git a/opensplat.cpp b/opensplat.cpp index e184059..4273a3d 100644 --- a/opensplat.cpp +++ b/opensplat.cpp @@ -90,8 +90,6 @@ int main(int argc, char *argv[]){ cam.loadImage(downScaleFactor); } - - // Withhold a validation camera if necessary auto t = inputData.getCameras(validate, valImage); std::vector cams = std::get<0>(t); diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index f00d6c6..40d0b12 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -1,8 +1,5 @@ #include "rasterize_gaussians.hpp" #include "gsplat.hpp" -#include "vendor/gsplat/config.h" - -#include "cv_utils.hpp" // TODO REMOVE std::tuple(t); - cv::Mat image = tensorToImage(outImg.detach().cpu()); - cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - cv::imwrite("cudatest.png", image); - std::cout << "WROTE " << imgWidth << "x" << imgHeight; - torch::Tensor finalTs = std::get<1>(t); // Map of tile bin IDs @@ -111,7 +103,6 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ torch::Tensor finalTs = saved[7]; torch::Tensor finalIdx = saved[8]; - // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); auto t = rasterize_backward_tensor(imgHeight, imgWidth, @@ -133,9 +124,6 @@ tensor_list RasterizeGaussians::backward(AutogradContext *ctx, tensor_list grad_ torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - std::cout << v_colors << v_xy; - exit(1); - return { v_xy, none, // depths none, // radii @@ -177,23 +165,13 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx, // Final image torch::Tensor outImg = std::get<0>(t); - // cv::Mat image = tensorToImage(outImg.detach().cpu()); - // cv::cvtColor(image, image, cv::COLOR_RGB2BGR); - // cv::imwrite("test.png", image); - // std::cout << "WROTE " << imgWidth << "x" << imgHeight; - torch::Tensor finalTs = std::get<1>(t); - - // Map of gaussian IDs - // TODO: remove finalIdx (not needed) - torch::Tensor finalIdx = std::get<2>(t); - - std::vector *pxgid = std::get<3>(t); + std::vector *px2gid = std::get<2>(t); ctx->saved_data["imgWidth"] = imgWidth; ctx->saved_data["imgHeight"] = imgHeight; - ctx->saved_data["pxgid"] = reinterpret_cast(pxgid); - ctx->save_for_backward({ xys, conics, colors, opacity, background, cov2d, camDepths, finalTs, finalIdx }); + ctx->saved_data["px2gid"] = reinterpret_cast(px2gid); + ctx->save_for_backward({ xys, conics, colors, opacity, background, cov2d, camDepths, finalTs }); return outImg; } @@ -202,7 +180,7 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr torch::Tensor v_outImg = grad_outputs[0]; int imgHeight = ctx->saved_data["imgHeight"].toInt(); int imgWidth = ctx->saved_data["imgWidth"].toInt(); - const std::vector *pxgid = reinterpret_cast *>(ctx->saved_data["pxgid"].toInt()); + const std::vector *px2gid = reinterpret_cast *>(ctx->saved_data["px2gid"].toInt()); variable_list saved = ctx->get_saved_variables(); torch::Tensor xys = saved[0]; @@ -213,9 +191,7 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr torch::Tensor cov2d = saved[5]; torch::Tensor camDepths = saved[6]; torch::Tensor finalTs = saved[7]; - torch::Tensor finalIdx = saved[8]; - // torch::Tensor v_outAlpha = torch::zeros({imgHeight, imgWidth}, torch::TensorOptions().device(v_outImg.get_device()); torch::Tensor v_outAlpha = torch::zeros_like(v_outImg.index({"...", 0})); auto t = rasterize_backward_tensor_cpu(imgHeight, imgWidth, @@ -227,11 +203,12 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr cov2d, camDepths, finalTs, - finalIdx, - pxgid, + px2gid, v_outImg, v_outAlpha); + delete[] px2gid; + torch::Tensor v_xy = std::get<0>(t); torch::Tensor v_conic = std::get<1>(t); @@ -239,9 +216,6 @@ tensor_list RasterizeGaussiansCPU::backward(AutogradContext *ctx, tensor_list gr torch::Tensor v_opacity = std::get<3>(t); torch::Tensor none; - // std::cout << v_colors << v_xy; - // exit(1); - return { v_xy, none, // radii v_conic, diff --git a/simple_trainer.cpp b/simple_trainer.cpp index 336f7a9..c11ed5b 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "vendor/gsplat/config.h" #include "project_gaussians.hpp" #include "rasterize_gaussians.hpp" #include "constants.hpp" @@ -161,26 +160,30 @@ int main(int argc, char **argv){ width, background); }else{ - auto p = ProjectGaussians::apply(means, scales, 1, - quats, viewMat, viewMat, - focal, focal, - width / 2, - height / 2, - height, - width, - tileBounds); - - outImg = RasterizeGaussians::apply( - p[0], // xys - p[1], // depths - p[2], // radii, - p[3], // conics - p[4], // numTilesHit - torch::sigmoid(rgbs), - torch::sigmoid(opacities), - height, - width, - background); + #if defined(USE_HIP) || defined(USE_CUDA) + auto p = ProjectGaussians::apply(means, scales, 1, + quats, viewMat, viewMat, + focal, focal, + width / 2, + height / 2, + height, + width, + tileBounds); + + outImg = RasterizeGaussians::apply( + p[0], // xys + p[1], // depths + p[2], // radii, + p[3], // conics + p[4], // numTilesHit + torch::sigmoid(rgbs), + torch::sigmoid(opacities), + height, + width, + background); + #else + throw std::runtime_error("GPU support not built"); + #endif } outImg.requires_grad_(); diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index 3f507e1..af0c593 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -1,4 +1,7 @@ -// Originally based on https://github.dev/nerfstudio-project/gsplat +// Originally based on https://github.com/nerfstudio-project/gsplat +// This implementation has been substantially changed and optimized +// Licensed under the AGPLv3 +// Piero Toffanin - 2024 #include #include @@ -31,7 +34,6 @@ project_gaussians_forward_tensor_cpu( ); std::tuple< - torch::Tensor, torch::Tensor, torch::Tensor, std::vector * @@ -65,8 +67,7 @@ std:: const torch::Tensor &cov2d, const torch::Tensor &camDepths, const torch::Tensor &final_Ts, - const torch::Tensor &final_idx, - const std::vector *pxgid, + const std::vector *px2gid, const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ); diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 23330e8..8b39fec 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -1,5 +1,7 @@ -// Originally started from https://github.dev/nerfstudio-project/gsplat -// This implementation has been substantially changed and is licensed under the AGPLv3 +// Originally started from https://github.com/nerfstudio-project/gsplat +// This implementation has been substantially changed and optimized +// Licensed under the AGPLv3 +// Piero Toffanin - 2024 #include "bindings.h" #include "../gsplat/config.h" @@ -11,7 +13,7 @@ using namespace torch::indexing; -torch::Tensor quatToRotMat(const torch::Tensor &quat){ +torch::Tensor quatToRot(const torch::Tensor &quat){ auto u = torch::unbind(torch::nn::functional::normalize(quat, torch::nn::functional::NormalizeFuncOptions().dim(-1)), -1); torch::Tensor w = u[0]; torch::Tensor x = u[1]; @@ -62,8 +64,6 @@ project_gaussians_forward_tensor_cpu( float fovx = 0.5f * static_cast(img_height) / fx; float fovy = 0.5f * static_cast(img_width) / fy; - // TODO: no need to recompute W,p,t below (they are the same) - // clip_near_plane torch::Tensor Rclip = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); torch::Tensor Tclip = viewmat.index({"...", Slice(None, 3), 3}); @@ -71,22 +71,18 @@ project_gaussians_forward_tensor_cpu( // torch::Tensor isClose = pView.index({"...", 2}) < clip_thresh; // scale_rot_to_cov3d - torch::Tensor R = quatToRotMat(quats); + torch::Tensor R = quatToRot(quats); torch::Tensor M = R * glob_scale * scales.index({"...", None, Slice()}); torch::Tensor cov3d = torch::matmul(M, M.transpose(-1, -2)); // project_cov3d_ewa - torch::Tensor W = viewmat.index({"...", Slice(None, 3), Slice(None, 3)}); - torch::Tensor p = viewmat.index({"...", Slice(None, 3), 3}); - torch::Tensor t = torch::matmul(W, means3d.index({"...", None})).index({"...", 0}) + p; - torch::Tensor limX = 1.3f * torch::tensor({fovx}, means3d.device()); torch::Tensor limY = 1.3f * torch::tensor({fovy}, means3d.device()); - torch::Tensor minLimX = t.index({"...", 2}) * torch::min(limX, torch::max(-limX, t.index({"...", 0}) / t.index({"...", 2}))); - torch::Tensor minLimY = t.index({"...", 2}) * torch::min(limY, torch::max(-limY, t.index({"...", 1}) / t.index({"...", 2}))); + torch::Tensor minLimX = pView.index({"...", 2}) * torch::min(limX, torch::max(-limX, pView.index({"...", 0}) / pView.index({"...", 2}))); + torch::Tensor minLimY = pView.index({"...", 2}) * torch::min(limY, torch::max(-limY, pView.index({"...", 1}) / pView.index({"...", 2}))); - t = torch::cat({minLimX.index({"...", None}), minLimY.index({"...", None}), t.index({"...", 2, None})}, -1); + torch::Tensor t = torch::cat({minLimX.index({"...", None}), minLimY.index({"...", None}), pView.index({"...", 2, None})}, -1); torch::Tensor rz = 1.0f / t.index({"...", 2}); torch::Tensor rz2 = rz.pow(2); @@ -95,7 +91,7 @@ project_gaussians_forward_tensor_cpu( torch::stack({torch::zeros_like(rz), fy * rz, -fy * t.index({"...", 1}) * rz2}, -1) }, -2); - torch::Tensor T = torch::matmul(J, W); + torch::Tensor T = torch::matmul(J, Rclip); torch::Tensor cov2d = torch::matmul(T, torch::matmul(cov3d, T.transpose(-1, -2))); // Add blur along axes @@ -135,7 +131,6 @@ project_gaussians_forward_tensor_cpu( } std::tuple< - torch::Tensor, torch::Tensor, torch::Tensor, std::vector * @@ -155,7 +150,7 @@ std::tuple< int channels = colors.size(1); int numPoints = xys.size(0); float *pDepths = static_cast(camDepths.data_ptr()); - std::vector *pxgid = new std::vector[width * height]; + std::vector *px2gid = new std::vector[width * height]; std::vector< size_t > gIndices( numPoints ); std::iota( gIndices.begin(), gIndices.end(), 0 ); @@ -167,7 +162,6 @@ std::tuple< torch::Tensor outImg = torch::zeros({height, width, channels}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); torch::Tensor finalTs = torch::ones({height, width}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - torch::Tensor finalIdx = torch::zeros({height, width}, torch::TensorOptions().dtype(torch::kInt32).device(device)); torch::Tensor done = torch::zeros({height, width}, torch::TensorOptions().dtype(torch::kBool).device(device)); torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); @@ -183,7 +177,6 @@ std::tuple< float *pFinalTs = static_cast(finalTs.data_ptr()); bool *pDone = static_cast(done.data_ptr()); - int32_t *pFinalIdx = static_cast(finalIdx.data_ptr()); float *pColors = static_cast(colors.data_ptr()); float bgX = background[0].item(); @@ -209,7 +202,7 @@ std::tuple< int maxx = (std::min)(height, static_cast(std::ceil(gY + sqy)) + 2); int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); int maxy = (std::min)(width, static_cast(std::ceil(gX + sqx)) + 2); - + for (int i = minx; i < maxx; i++){ for (int j = miny; j < maxy; j++){ size_t pixIdx = (i * width + j); @@ -241,8 +234,7 @@ std::tuple< pOutImg[pixIdx * 3 + 2] += vis * pColors[gaussianId * 3 + 2]; pFinalTs[pixIdx] = nextT; - pFinalIdx[pixIdx] = gaussianId; - pxgid[pixIdx].push_back(gaussianId); + px2gid[pixIdx].push_back(gaussianId); } } } @@ -257,11 +249,11 @@ std::tuple< pOutImg[pixIdx * 3 + 1] += T * bgY; pOutImg[pixIdx * 3 + 2] += T * bgZ; - std::reverse(pxgid[pixIdx].begin(), pxgid[pixIdx].end()); + std::reverse(px2gid[pixIdx].begin(), px2gid[pixIdx].end()); } } - return std::make_tuple(outImg, finalTs, finalIdx, pxgid); + return std::make_tuple(outImg, finalTs, px2gid); } @@ -283,8 +275,7 @@ std:: const torch::Tensor &cov2d, const torch::Tensor &camDepths, const torch::Tensor &final_Ts, - const torch::Tensor &final_idx, - const std::vector *pxgid, + const std::vector *px2gid, const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ){ @@ -304,10 +295,7 @@ std:: float *pv_colors = static_cast(v_colors.data_ptr()); float *pv_opacity = static_cast(v_opacity.data_ptr()); - // torch::Tensor buffer = torch::zeros({height, width, 3}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - float *pColors = static_cast(colors.data_ptr()); - // float *pBuffer = static_cast(buffer.data_ptr()); float *pv_output = static_cast(v_output.data_ptr()); float *pv_outputAlpha = static_cast(v_output_alpha.data_ptr()); float *pConics = static_cast(conics.data_ptr()); @@ -318,111 +306,18 @@ std:: float bgY = background[1].item(); float bgZ = background[2].item(); - // torch::Tensor Ts = final_Ts.clone(); - // float *pTs = static_cast(Ts.data_ptr()); float *pFinalTs = static_cast(final_Ts.data_ptr()); const float alphaThresh = 1.0f / 255.0f; -/* - std::vector< size_t > gIndices( numPoints ); - - float *pDepths = static_cast(camDepths.data_ptr()); - std::iota( gIndices.begin(), gIndices.end(), 0 ); - std::sort(gIndices.begin(), gIndices.end(), [&pDepths](int a, int b){ - return pDepths[a] < pDepths[b]; - }); - - torch::Tensor sqCov2dX = 3.0f * torch::sqrt(cov2d.index({"...", 0, 0})); - torch::Tensor sqCov2dY = 3.0f * torch::sqrt(cov2d.index({"...", 1, 1})); - - - float *pSqCov2dX = static_cast(sqCov2dX.data_ptr()); - float *pSqCov2dY = static_cast(sqCov2dY.data_ptr()); - - const float alphaThresh = 1.0f / 255.0f; - for (int idx = numPoints - 1; idx >= 0; idx--){ - int32_t gaussianId = gIndices[idx]; - - float A = pConics[gaussianId * 3 + 0]; - float B = pConics[gaussianId * 3 + 1]; - float C = pConics[gaussianId * 3 + 2]; - - float gX = pCenters[gaussianId * 2 + 0]; - float gY = pCenters[gaussianId * 2 + 1]; - - float sqx = pSqCov2dX[gaussianId]; - float sqy = pSqCov2dY[gaussianId]; - - int minx = (std::max)(0, static_cast(std::floor(gY - sqy)) - 2); - int maxx = (std::min)(height, static_cast(std::ceil(gY + sqy)) + 2); - int miny = (std::max)(0, static_cast(std::floor(gX - sqx)) - 2); - int maxy = (std::min)(width, static_cast(std::ceil(gX + sqx)) + 2); - - for (int i = minx; i < maxx; i++){ - for (int j = miny; j < maxy; j++){ - size_t pixIdx = (i * width + j); - - float xCam = gX - j; - float yCam = gY - i; - float sigma = ( - 0.5f - * (A * xCam * xCam + C * yCam * yCam) - + B * xCam * yCam - ); - - if (sigma < 0.0f) continue; - float vis = std::exp(-sigma); - float alpha = (std::min)(0.99f, pOpacities[gaussianId] * vis); - if (alpha < alphaThresh) continue; - - - float ra = 1.0f / (1.0f - alpha); - float T = pTs[pixIdx]; - T *= ra; - pTs[pixIdx] = T; - - float Tfinal = pFinalTs[pixIdx]; - float fac = alpha * T; - - pv_colors[gaussianId * 3 + 0] += fac * pv_output[pixIdx * 3 + 0]; - pv_colors[gaussianId * 3 + 1] += fac * pv_output[pixIdx * 3 + 1]; - pv_colors[gaussianId * 3 + 2] += fac * pv_output[pixIdx * 3 + 2]; - - float v_alpha = ((pColors[gaussianId * 3 + 0] * T - pBuffer[pixIdx * 3 + 0] * ra) * pv_output[pixIdx * 3 + 0]) + - ((pColors[gaussianId * 3 + 1] * T - pBuffer[pixIdx * 3 + 1] * ra) * pv_output[pixIdx * 3 + 1]) + - ((pColors[gaussianId * 3 + 2] * T - pBuffer[pixIdx * 3 + 2] * ra) * pv_output[pixIdx * 3 + 2]) + - (Tfinal * ra * pv_outputAlpha[pixIdx]) + - - (-Tfinal * ra * bgX * pv_output[pixIdx * 3 + 0]) + - (-Tfinal * ra * bgY * pv_output[pixIdx * 3 + 1]) + - (-Tfinal * ra * bgZ * pv_output[pixIdx * 3 + 2]); - - pBuffer[pixIdx * 3 + 0] += pColors[gaussianId * 3 + 0] * fac; - pBuffer[pixIdx * 3 + 1] += pColors[gaussianId * 3 + 1] * fac; - pBuffer[pixIdx * 3 + 2] += pColors[gaussianId * 3 + 2] * fac; - - float v_sigma = -pOpacities[gaussianId] * vis * v_alpha; - pv_conic[gaussianId * 3 + 0] += 0.5f * v_sigma * xCam * xCam; - pv_conic[gaussianId * 3 + 1] += 0.5f * v_sigma * xCam * yCam; - pv_conic[gaussianId * 3 + 2] += 0.5f * v_sigma * yCam * yCam; - - pv_xy[gaussianId * 2 + 0] += v_sigma * (A * xCam + B * yCam); - pv_xy[gaussianId * 2 + 1] += v_sigma * (B * xCam + C * yCam); - - pv_opacity[gaussianId] += vis * v_alpha; - } - } - }*/ - - for (int j = 0; j < width; j++){ - for (int i = 0; i < height; i++){ + for (int i = 0; i < height; i++){ + for (int j = 0; j < width; j++){ size_t pixIdx = (i * width + j); float Tfinal = pFinalTs[pixIdx]; float T = Tfinal; float buffer[3] = {0.0f, 0.0f, 0.0f}; - for (const int32_t &gaussianId : pxgid[pixIdx]){ + for (const int32_t &gaussianId : px2gid[pixIdx]){ float A = pConics[gaussianId * 3 + 0]; float B = pConics[gaussianId * 3 + 1]; float C = pConics[gaussianId * 3 + 2]; @@ -469,8 +364,6 @@ std:: pv_conic[gaussianId * 3 + 1] += 0.5f * v_sigma * xCam * yCam; pv_conic[gaussianId * 3 + 2] += 0.5f * v_sigma * yCam * yCam; - // std::cout << v_sigma << std::endl; - pv_xy[gaussianId * 2 + 0] += v_sigma * (A * xCam + B * yCam); pv_xy[gaussianId * 2 + 1] += v_sigma * (B * xCam + C * yCam); diff --git a/vendor/gsplat/backward.cu b/vendor/gsplat/backward.cu index c0e8f1a..5f85cd1 100644 --- a/vendor/gsplat/backward.cu +++ b/vendor/gsplat/backward.cu @@ -10,8 +10,6 @@ #include #endif -#include // TODO REMOVE - namespace cg = cooperative_groups; __global__ void nd_rasterize_backward_kernel( @@ -301,9 +299,6 @@ __global__ void rasterize_backward_kernel( // compute the current T for this gaussian float ra = 1.f / (1.f - alpha); T *= ra; - // if (pix_id == 4999){ - // printf("%f \n", T); - // } // update v_rgb for this gaussian const float fac = alpha * T; From b46fd3aab06672096b350b3e21cf26ef7fa9e151 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 18:35:06 +0000 Subject: [PATCH 16/19] opensplat CPU integration completed --- gsplat.hpp | 7 +- model.cpp | 18 ++++-- opensplat.cpp | 3 +- project_gaussians.cpp | 7 +- project_gaussians.hpp | 7 +- rasterize_gaussians.cpp | 3 + rasterize_gaussians.hpp | 4 ++ simple_trainer.cpp | 4 +- spherical_harmonics.cpp | 30 ++++----- spherical_harmonics.hpp | 13 +++- vendor/gsplat-cpu/bindings.h | 10 +++ vendor/gsplat-cpu/gsplat_cpu.cpp | 107 +++++++++++++++++++++++++++++++ 12 files changed, 184 insertions(+), 29 deletions(-) diff --git a/gsplat.hpp b/gsplat.hpp index db0368d..d427655 100644 --- a/gsplat.hpp +++ b/gsplat.hpp @@ -1,7 +1,12 @@ +#ifndef GSPLAT_H +#define GSPLAT_H + #include "vendor/gsplat/config.h" #if defined(USE_HIP) || defined(USE_CUDA) #include "vendor/gsplat/bindings.h" #endif -#include "vendor/gsplat-cpu/bindings.h" \ No newline at end of file +#include "vendor/gsplat-cpu/bindings.h" + +#endif \ No newline at end of file diff --git a/model.cpp b/model.cpp index 3f36ff7..ebf56f0 100644 --- a/model.cpp +++ b/model.cpp @@ -84,7 +84,7 @@ torch::Tensor Model::forward(Camera& cam, int step){ torch::Tensor rgb; if (device == torch::kCPU){ - auto p = ProjectGaussiansCPU::Apply(means, + auto p = ProjectGaussiansCPU::apply(means, torch::exp(scales), 1, quats / quats.norm(2, {-1}, true), @@ -141,11 +141,21 @@ torch::Tensor Model::forward(Camera& cam, int step){ torch::Tensor viewDirs = means.detach() - T.transpose(0, 1).to(device); viewDirs = viewDirs / viewDirs.norm(2, {-1}, true); int degreesToUse = (std::min)(step / shDegreeInterval, shDegree); - std::cout << degreesToUse; - exit(1); - torch::Tensor rgbs = SphericalHarmonics::apply(degreesToUse, viewDirs, colors); + torch::Tensor rgbs; + + std::cerr << "HERE"; + if (device == torch::kCPU){ + rgbs = SphericalHarmonicsCPU::apply(degreesToUse, viewDirs, colors); + }else{ + #if defined(USE_HIP) || defined(USE_CUDA) + rgbs = SphericalHarmonics::apply(degreesToUse, viewDirs, colors); + #endif + } + + std::cerr << "THERE" << camDepths; rgbs = torch::clamp_min(rgbs + 0.5f, 0.0f); + if (device == torch::kCPU){ rgb = RasterizeGaussiansCPU::apply( xys, diff --git a/opensplat.cpp b/opensplat.cpp index 4273a3d..f21c03d 100644 --- a/opensplat.cpp +++ b/opensplat.cpp @@ -17,6 +17,7 @@ int main(int argc, char *argv[]){ ("s,save-every", "Save output scene every these many steps (set to -1 to disable)", cxxopts::value()->default_value("-1")) ("val", "Withhold a camera shot for validating the scene loss") ("val-image", "Filename of the image to withhold for validating scene loss", cxxopts::value()->default_value("random")) + ("cpu", "Force CPU execution") ("n,num-iters", "Number of iterations to run", cxxopts::value()->default_value("30000")) ("d,downscale-factor", "Scale input images by this factor.", cxxopts::value()->default_value("1")) @@ -77,7 +78,7 @@ int main(int argc, char *argv[]){ torch::Device device = torch::kCPU; - if (torch::cuda::is_available()) { + if (torch::cuda::is_available() && result.count("cpu") == 0) { std::cout << "Using CUDA" << std::endl; device = torch::kCUDA; }else{ diff --git a/project_gaussians.cpp b/project_gaussians.cpp index 9d41697..d57e1d6 100644 --- a/project_gaussians.cpp +++ b/project_gaussians.cpp @@ -1,5 +1,6 @@ #include "project_gaussians.hpp" -#include "gsplat.hpp" + +#if defined(USE_HIP) || defined(USE_CUDA) variable_list ProjectGaussians::forward(AutogradContext *ctx, torch::Tensor means, @@ -88,7 +89,9 @@ tensor_list ProjectGaussians::backward(AutogradContext *ctx, tensor_list grad_ou }; } -variable_list ProjectGaussiansCPU::Apply( +#endif + +variable_list ProjectGaussiansCPU::apply( torch::Tensor means, torch::Tensor scales, float globScale, diff --git a/project_gaussians.hpp b/project_gaussians.hpp index b6c7e6c..8891d22 100644 --- a/project_gaussians.hpp +++ b/project_gaussians.hpp @@ -3,9 +3,12 @@ #include #include "tile_bounds.hpp" +#include "gsplat.hpp" using namespace torch::autograd; +#if defined(USE_HIP) || defined(USE_CUDA) + class ProjectGaussians : public Function{ public: static variable_list forward(AutogradContext *ctx, @@ -26,9 +29,11 @@ class ProjectGaussians : public Function{ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); }; +#endif + class ProjectGaussiansCPU{ public: - static variable_list Apply( + static variable_list apply( torch::Tensor means, torch::Tensor scales, float globScale, diff --git a/rasterize_gaussians.cpp b/rasterize_gaussians.cpp index 40d0b12..9023b9b 100644 --- a/rasterize_gaussians.cpp +++ b/rasterize_gaussians.cpp @@ -34,6 +34,8 @@ std::tuple{ public: static torch::Tensor forward(AutogradContext *ctx, @@ -33,6 +35,8 @@ class RasterizeGaussians : public Function{ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); }; +#endif + class RasterizeGaussiansCPU : public Function{ public: static torch::Tensor forward(AutogradContext *ctx, diff --git a/simple_trainer.cpp b/simple_trainer.cpp index c11ed5b..c15a0f6 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -54,7 +54,7 @@ int main(int argc, char **argv){ int iterations = result["iters"].as(); float learningRate = result["lr"].as(); std::string render = result["render"].as(); - if (!fs::exists(render)) fs::create_directories(render); + if (!render.empty() && !fs::exists(render)) fs::create_directories(render); torch::Device device = torch::kCPU; if (torch::cuda::is_available() && result.count("cpu") == 0){ @@ -140,7 +140,7 @@ int main(int argc, char **argv){ for (size_t i = 0; i < iterations; i++){ if (device == torch::kCPU){ - auto p = ProjectGaussiansCPU::Apply(means, scales, 1, + auto p = ProjectGaussiansCPU::apply(means, scales, 1, quats, viewMat, viewMat, focal, focal, width / 2, diff --git a/spherical_harmonics.cpp b/spherical_harmonics.cpp index c6d4036..6581188 100644 --- a/spherical_harmonics.cpp +++ b/spherical_harmonics.cpp @@ -1,20 +1,4 @@ #include "spherical_harmonics.hpp" -#include "gsplat.hpp" - -int numShBases(int degree){ - switch(degree){ - case 0: - return 1; - case 1: - return 4; - case 2: - return 9; - case 3: - return 16; - default: - return 25; - } -} int degFromSh(int numBases){ switch(numBases){ @@ -37,6 +21,8 @@ torch::Tensor rgb2sh(const torch::Tensor &rgb){ return (rgb - 0.5) / C0; } +#if defined(USE_HIP) || defined(USE_CUDA) + torch::Tensor SphericalHarmonics::forward(AutogradContext *ctx, int degreesToUse, torch::Tensor viewDirs, @@ -67,5 +53,15 @@ tensor_list SphericalHarmonics::backward(AutogradContext *ctx, tensor_list grad_ none, compute_sh_backward_tensor(numPoints, degree, degreesToUse, viewDirs, v_colors) }; - +} + +#endif + +torch::Tensor SphericalHarmonicsCPU::apply(int degreesToUse, + torch::Tensor viewDirs, + torch::Tensor coeffs){ + long long numPoints = coeffs.size(0); + int degree = degFromSh(coeffs.size(-2)); + + return compute_sh_forward_tensor_cpu(numPoints, degree, degreesToUse, viewDirs, coeffs); } \ No newline at end of file diff --git a/spherical_harmonics.hpp b/spherical_harmonics.hpp index a84762d..2ebe1d1 100644 --- a/spherical_harmonics.hpp +++ b/spherical_harmonics.hpp @@ -2,13 +2,15 @@ #define SPHERICAL_HARMONICS_H #include +#include "gsplat.hpp" using namespace torch::autograd; -int numShBases(int degree); int degFromSh(int numBases); torch::Tensor rgb2sh(const torch::Tensor &rgb); +#if defined(USE_HIP) || defined(USE_CUDA) + class SphericalHarmonics : public Function{ public: static torch::Tensor forward(AutogradContext *ctx, @@ -18,4 +20,13 @@ class SphericalHarmonics : public Function{ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs); }; +#endif + +class SphericalHarmonicsCPU{ +public: + static torch::Tensor apply(int degreesToUse, + torch::Tensor viewDirs, + torch::Tensor coeffs); +}; + #endif \ No newline at end of file diff --git a/vendor/gsplat-cpu/bindings.h b/vendor/gsplat-cpu/bindings.h index af0c593..bfcce48 100644 --- a/vendor/gsplat-cpu/bindings.h +++ b/vendor/gsplat-cpu/bindings.h @@ -71,3 +71,13 @@ std:: const torch::Tensor &v_output, // dL_dout_color const torch::Tensor &v_output_alpha ); + +int numShBases(int degree); + +torch::Tensor compute_sh_forward_tensor_cpu( + const int num_points, + const int degree, + const int degrees_to_use, + const torch::Tensor &viewdirs, + const torch::Tensor &coeffs +); \ No newline at end of file diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 8b39fec..0fff7d3 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -373,4 +373,111 @@ std:: } return std::make_tuple(v_xy, v_conic, v_colors, v_opacity); +} + + +const float SH_C0 = 0.28209479177387814f; +const float SH_C1 = 0.4886025119029199f; +const float SH_C2[] = { + 1.0925484305920792f, + -1.0925484305920792f, + 0.31539156525252005f, + -1.0925484305920792f, + 0.5462742152960396f +}; +const float SH_C3[] = { + -0.5900435899266435f, + 2.890611442640554f, + -0.4570457994644658f, + 0.3731763325901154f, + -0.4570457994644658f, + 1.445305721320277f, + -0.5900435899266435f +}; +const float SH_C4[] = { + 2.5033429417967046f, + -1.7701307697799304f, + 0.9461746957575601f, + -0.6690465435572892f, + 0.10578554691520431f, + -0.6690465435572892f, + 0.47308734787878004f, + -1.7701307697799304f, + 0.6258357354491761f +}; + +int numShBases(int degree){ + switch(degree){ + case 0: + return 1; + case 1: + return 4; + case 2: + return 9; + case 3: + return 16; + default: + return 25; + } +} + +torch::Tensor compute_sh_forward_tensor_cpu( + const int num_points, + const int degree, + const int degrees_to_use, + const torch::Tensor &viewdirs, + const torch::Tensor &coeffs +) { + const int numChannels = 3; + unsigned numBases = numShBases(degrees_to_use); + + torch::Tensor result = torch::zeros({viewdirs.size(0), numBases}, torch::TensorOptions().dtype(torch::kFloat32).device(viewdirs.device())); + + result.index_put_({"...", 0}, SH_C0); + if (numBases > 1){ + std::vector xyz = viewdirs.unbind(-1); + torch::Tensor x = xyz[0]; + torch::Tensor y = xyz[1]; + torch::Tensor z = xyz[2]; + + if (numBases > 4){ + torch::Tensor xx = x * x; + torch::Tensor yy = y * y; + torch::Tensor zz = z * z; + torch::Tensor xy = x * y; + torch::Tensor yz = y * z; + torch::Tensor xz = x * z; + + result.index_put_({"...", 4}, SH_C2[0] * xy); + result.index_put_({"...", 5}, SH_C2[1] * yz); + result.index_put_({"...", 6}, SH_C2[2] * (2.0f * zz - xx - yy)); + result.index_put_({"...", 7}, SH_C2[3] * xz); + result.index_put_({"...", 8}, SH_C2[4] * (xx - yy)); + + if (numBases > 9){ + result.index_put_({"...", 9}, SH_C3[0] * y * (3 * xx - yy)); + result.index_put_({"...", 10}, SH_C3[1] * xy * z); + result.index_put_({"...", 11}, SH_C3[2] * y * (4 * zz - xx - yy)); + result.index_put_({"...", 12}, SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy)); + result.index_put_({"...", 13}, SH_C3[4] * x * (4 * zz - xx - yy) ); + result.index_put_({"...", 14}, SH_C3[5] * z * (xx - yy)); + result.index_put_({"...", 15}, SH_C3[6] * x * (xx - 3 * yy)); + + if (numBases > 16){ + result.index_put_({"...", 16}, SH_C4[0] * xy * (xx - yy)); + result.index_put_({"...", 17}, SH_C4[1] * yz * (3 * xx - yy)); + result.index_put_({"...", 18}, SH_C4[2] * xy * (7 * zz - 1)); + result.index_put_({"...", 19}, SH_C4[3] * yz * (7 * zz - 3)); + result.index_put_({"...", 20}, SH_C4[4] * (zz * (35 * zz - 30) + 3)); + result.index_put_({"...", 21}, SH_C4[5] * xz * (7 * zz - 3)); + result.index_put_({"...", 22}, SH_C4[6] * (xx - yy) * (7 * zz - 1)); + result.index_put_({"...", 23}, SH_C4[7] * xz * (xx - 3 * yy)); + result.index_put_({"...", 24}, SH_C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy))); + + } + } + } + } + + return (result.index({"...", None}) * coeffs).sum(-2); } \ No newline at end of file From 17211cc044e470bfcebfcd6b30f7c08d4bdf21e0 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 19:06:37 +0000 Subject: [PATCH 17/19] width/height mismatch fix, cleanup --- model.cpp | 4 +--- opensplat.cpp | 4 +++- simple_trainer.cpp | 4 ++-- vendor/gsplat-cpu/gsplat_cpu.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/model.cpp b/model.cpp index ebf56f0..ae02d2b 100644 --- a/model.cpp +++ b/model.cpp @@ -127,7 +127,7 @@ torch::Tensor Model::forward(Camera& cam, int step){ conics = p[3]; numTilesHit = p[4]; #else - throw std::runtime_error("GPU support not built"); + throw std::runtime_error("GPU support not built, use --cpu"); #endif } @@ -143,7 +143,6 @@ torch::Tensor Model::forward(Camera& cam, int step){ int degreesToUse = (std::min)(step / shDegreeInterval, shDegree); torch::Tensor rgbs; - std::cerr << "HERE"; if (device == torch::kCPU){ rgbs = SphericalHarmonicsCPU::apply(degreesToUse, viewDirs, colors); }else{ @@ -152,7 +151,6 @@ torch::Tensor Model::forward(Camera& cam, int step){ #endif } - std::cerr << "THERE" << camDepths; rgbs = torch::clamp_min(rgbs + 0.5f, 0.0f); diff --git a/opensplat.cpp b/opensplat.cpp index f21c03d..e0c410d 100644 --- a/opensplat.cpp +++ b/opensplat.cpp @@ -77,10 +77,12 @@ int main(int argc, char *argv[]){ const float splitScreenSize = result["split-screen-size"].as(); torch::Device device = torch::kCPU; + int displayStep = 1; if (torch::cuda::is_available() && result.count("cpu") == 0) { std::cout << "Using CUDA" << std::endl; device = torch::kCUDA; + displayStep = 10; }else{ std::cout << "Using CPU" << std::endl; } @@ -120,7 +122,7 @@ int main(int argc, char *argv[]){ torch::Tensor mainLoss = model.mainLoss(rgb, gt, ssimWeight); mainLoss.backward(); - if (step % 10 == 0) std::cout << "Step " << step << ": " << mainLoss.item() << std::endl; + if (step % displayStep == 0) std::cout << "Step " << step << ": " << mainLoss.item() << std::endl; model.optimizersStep(); model.schedulersStep(step); diff --git a/simple_trainer.cpp b/simple_trainer.cpp index c15a0f6..2da1704 100644 --- a/simple_trainer.cpp +++ b/simple_trainer.cpp @@ -147,7 +147,7 @@ int main(int argc, char **argv){ height / 2, height, width); - + outImg = RasterizeGaussiansCPU::apply( p[0], // xys p[1], // radii, @@ -182,7 +182,7 @@ int main(int argc, char **argv){ width, background); #else - throw std::runtime_error("GPU support not built"); + throw std::runtime_error("GPU support not built, use --cpu"); #endif } diff --git a/vendor/gsplat-cpu/gsplat_cpu.cpp b/vendor/gsplat-cpu/gsplat_cpu.cpp index 0fff7d3..250b394 100644 --- a/vendor/gsplat-cpu/gsplat_cpu.cpp +++ b/vendor/gsplat-cpu/gsplat_cpu.cpp @@ -240,8 +240,8 @@ std::tuple< } // Background - for (int i = 0; i < width; i++){ - for (int j = 0; j < height; j++){ + for (int i = 0; i < height; i++){ + for (int j = 0; j < width; j++){ size_t pixIdx = (i * width + j); float T = pFinalTs[pixIdx]; From 19343685083128c0147c302784faf80df5c2112d Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 15:20:44 -0400 Subject: [PATCH 18/19] Cleanup --- model.cpp | 13 ++++++++----- vendor/gsplat/backward.cu | 1 - vendor/gsplat/bindings.cu | 1 - 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/model.cpp b/model.cpp index bc391ae..86a6d2f 100644 --- a/model.cpp +++ b/model.cpp @@ -448,11 +448,14 @@ void Model::afterTrain(int step){ xysGradNorm = torch::Tensor(); visCounts = torch::Tensor(); max2DSize = torch::Tensor(); -#ifdef USE_HIP - c10::hip::HIPCachingAllocator::emptyCache(); -#elif defined(USE_CUDA) - c10::cuda::CUDACachingAllocator::emptyCache(); -#endif + + if (device != torch::kCPU){ + #ifdef USE_HIP + c10::hip::HIPCachingAllocator::emptyCache(); + #elif defined(USE_CUDA) + c10::cuda::CUDACachingAllocator::emptyCache(); + #endif + } } } diff --git a/vendor/gsplat/backward.cu b/vendor/gsplat/backward.cu index 5f85cd1..c338dba 100644 --- a/vendor/gsplat/backward.cu +++ b/vendor/gsplat/backward.cu @@ -299,7 +299,6 @@ __global__ void rasterize_backward_kernel( // compute the current T for this gaussian float ra = 1.f / (1.f - alpha); T *= ra; - // update v_rgb for this gaussian const float fac = alpha * T; float v_alpha = 0.f; diff --git a/vendor/gsplat/bindings.cu b/vendor/gsplat/bindings.cu index 8bdbaca..80d581c 100644 --- a/vendor/gsplat/bindings.cu +++ b/vendor/gsplat/bindings.cu @@ -628,7 +628,6 @@ std:: (float3 *)v_colors.contiguous().data_ptr(), v_opacity.contiguous().data_ptr() ); - cudaDeviceSynchronize(); // TODO REMOVE return std::make_tuple(v_xy, v_conic, v_colors, v_opacity); } \ No newline at end of file From 4457ff7686a27dc448e1dbdb8d52461f88cbbb45 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 20 Mar 2024 15:34:06 -0400 Subject: [PATCH 19/19] Update README --- README.md | 57 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 3d8d43c..a882108 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,37 @@ A free and open source implementation of 3D gaussian splatting written in C++, f OpenSplat takes camera poses + sparse points in [COLMAP](https://colmap.github.io/) or [nerfstudio](https://docs.nerf.studio/quickstart/custom_dataset.html) project format and computes a [scene file](https://drive.google.com/file/d/1w-CBxyWNXF3omA8B_IeOsRmSJel3iwyr/view?usp=sharing) (.ply) that can be later imported for viewing, editing and rendering in other [software](https://github.com/MrNeRF/awesome-3D-gaussian-splatting?tab=readme-ov-file#open-source-implementations). +Graphics card recommended, but not required! OpenSplat runs the fastest on NVIDIA and AMD GPUs, but can also run entirely on CPU power (~100x slower). + Commercial use allowed and encouraged under the terms of the [AGPLv3](https://www.tldrlegal.com/license/gnu-affero-general-public-license-v3-agpl-3-0). ✅ -## Build (CUDA) +## Build Requirements: - * **CUDA**: Make sure you have the CUDA compiler (`nvcc`) in your PATH and that `nvidia-smi` is working. https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html - * **libtorch**: Visit https://pytorch.org/get-started/locally/ and select your OS, for package select "LibTorch". Make sure to match your version of CUDA if you want to leverage GPU support in libtorch. * **OpenCV**: `sudo apt install libopencv-dev` should do it. + * **libtorch**: See instructions below + +### CPU + + **libtorch**: Visit https://pytorch.org/get-started/locally/ and select your OS, for package select "LibTorch". For compute platform you can select "CPU". + + Then: + + ```bash + git clone https://github.com/pierotofy/OpenSplat OpenSplat + cd OpenSplat + mkdir build && cd build + cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch/ .. && make -j$(nproc) + ``` + +### CUDA + +Additional requirement: + + * **CUDA**: Make sure you have the CUDA compiler (`nvcc`) in your PATH and that `nvidia-smi` is working. https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html + + **libtorch**: Visit https://pytorch.org/get-started/locally/ and select your OS, for package select "LibTorch". Make sure to match your version of CUDA if you want to leverage GPU support in libtorch. Then: @@ -27,12 +49,13 @@ Requirements: The software has been tested on Ubuntu 20.04 and Windows. With some changes it could run on macOS (help us by opening a PR?). -## Build (ROCm via HIP) -Requirements: +### ROCm via HIP + +Additional requirement: * **ROCm**: Make sure you have the ROCm installed at /opt/rocm. https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html -* **libtorch**: Visit https://pytorch.org/get-started/locally/ and select your OS, for package select "LibTorch". Make sure to match your version of ROCm (5.7) if you want to leverage AMD GPU support in libtorch. -* **OpenCV**: `sudo apt install libopencv-dev` should do it. + +**libtorch**: Visit https://pytorch.org/get-started/locally/ and select your OS, for package select "LibTorch". Make sure to match your version of ROCm (5.7) if you want to leverage AMD GPU support in libtorch. Then: @@ -44,13 +67,18 @@ Then: cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch/ -DGPU_RUNTIME="HIP" -DHIP_ROOT_DIR=/opt/rocm -DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON .. make ``` + In addition, you can leverage Jinja to build the project - ``` - cmake -GNinja -DCMAKE_PREFIX_PATH=/path/to/libtorch/ -DGPU_RUNTIME="HIP" -DHIP_ROOT_DIR=/opt/rocm -DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON .. - jinja - ``` -## Docker Build (CUDA) +```bash +cmake -GNinja -DCMAKE_PREFIX_PATH=/path/to/libtorch/ -DGPU_RUNTIME="HIP" -DHIP_ROOT_DIR=/opt/rocm -DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON .. +jinja +``` + +## Docker Build + +### CUDA + Navigate to the root directory of OpenSplat repo that has Dockerfile and run the following command to build the Docker image: ```bash @@ -70,7 +98,8 @@ docker build \ --build-arg CMAKE_BUILD_TYPE=Release . ``` -## Docker Build (ROCm via HIP) +### ROCm via HIP + Navigate to the root directory of OpenSplat repo that has Dockerfile and run the following command to build the Docker image: ```bash docker build \ @@ -138,11 +167,11 @@ cd /code/build We recently released OpenSplat, so there's lots of work to do. * Support for running on AMD cards (more testing needed) - * Support for running on CPU-only * Improve speed / reduce memory usage * Distributed computation using multiple machines * Real-time training viewer output * Compressed scene outputs + * Automatic filtering * Your ideas? https://github.com/pierotofy/OpenSplat/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement