Propagate updates from internal branch (1eb506)

VIDILabs · Dec 18, 2023 · 404deb7 · 404deb7
1 parent 5e03f08
commit 404deb7
Show file tree

Hide file tree

Showing 16 changed files with 1,506 additions and 181 deletions.
diff --git a/apps/main_app.cpp b/apps/main_app.cpp
@@ -532,13 +532,40 @@ main(int ac, const char** av)
 
   ovr::Scene scene;
   if (ac < 2) {
-    scene = create_example_scene();
-    scene.camera = { /*from*/ vec3f(0.f, 0.f, -1200.f),
-                     /* at */ vec3f(0.f, 0.f, 0.f),
-                     /* up */ vec3f(0.f, 1.f, 0.f) };
+    // scene = create_example_scene();
+    // scene.camera = { /*from*/ vec3f(0.f, 0.f, -1200.f),
+    //                  /* at */ vec3f(0.f, 0.f, 0.f),
+    //                  /* up */ vec3f(0.f, 1.f, 0.f) };
+    throw std::runtime_error("no scene file specified");
   }
   else {
     scene = ovr::scene::create_scene(std::string(av[1]));
+
+    // TODO hack for testing isosurface rendering
+
+    // const int32_t volume_raw_id = scene.instances[0].models[0].volume_model.volume_texture;
+    // scene.instances[0].models[0].volume_model.volume_texture = volume_raw_id;
+
+    // ovr::scene::Texture volume_tfn;
+    // volume_tfn.type = ovr::scene::Texture::TRANSFER_FUNCTION_TEXTURE;
+    // volume_tfn.transfer_function.transfer_function = scene.instances[0].models[0].volume_model.transfer_function;
+    // volume_tfn.transfer_function.volume_texture = volume_raw_id;
+    // scene.textures.push_back(volume_tfn);
+    // const int32_t volume_tfn_id = scene.textures.size() - 1;
+
+    // ovr::scene::Material volume_mtl;
+    // volume_mtl.type = ovr::scene::Material::OBJ_MATERIAL;
+    // volume_mtl.obj.map_kd = volume_tfn_id;
+    // scene.materials.push_back(volume_mtl);
+    // const int32_t volume_mtl_id = scene.materials.size() - 1;
+
+    // ovr::scene::Model model;
+    // model.type = ovr::scene::Model::GEOMETRIC_MODEL;
+    // model.geometry_model.geometry.type = ovr::scene::Geometry::ISOSURFACE_GEOMETRY;
+    // model.geometry_model.geometry.isosurfaces.volume_texture = volume_raw_id;
+    // model.geometry_model.geometry.isosurfaces.isovalues = { 0.5f };
+    // model.geometry_model.mtl = volume_mtl_id;
+    // scene.instances[0].models[0] = model;
   }
 
   MainWindow::FrameLayer layer;  

diff --git a/ovr/common/cuda/cuda_buffer.h b/ovr/common/cuda/cuda_buffer.h
@@ -94,7 +94,7 @@ struct CUDABuffer {
 
     CUDA_CHECK(cudaMallocAsync((void**)&d_ptr, sizeInBytes, stream));
 
-    util::total_n_bytes_allocated() += sizeInBytes;
+    util::tot_nbytes_allocated() += sizeInBytes;
 
 #ifdef CUDA_BUFFER_VERBOSE_MEMORY_ALLOCS
 		printf("[mem] CUDABuffer alloc %s\n", util::prettyBytes(sizeInBytes).c_str());
@@ -108,7 +108,7 @@ struct CUDABuffer {
   {
     if (owned_data && d_ptr) {
       CUDA_CHECK(cudaFreeAsync(d_ptr, stream));
-      util::total_n_bytes_allocated() -= sizeInBytes;
+      util::tot_nbytes_allocated() -= sizeInBytes;
 #ifdef CUDA_BUFFER_VERBOSE_MEMORY_ALLOCS
       printf("[mem] CUDABuffer free %s\n", util::prettyBytes(sizeInBytes).c_str());
 #endif
@@ -312,7 +312,7 @@ createCudaArray3D(void* dataPtr, const int3& dims)
   // allocate 3D CUDA array
   cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<Type>();
   CUDA_CHECK(cudaMalloc3DArray(&dataArr, &channel_desc, make_cudaExtent(dims.x, dims.y, dims.z)));
-  util::total_n_bytes_allocated() += (size_t)dims.x * dims.y * dims.z * sizeof(Type);
+  util::tot_nbytes_allocated() += (size_t)dims.x * dims.y * dims.z * sizeof(Type);
 #ifdef CUDA_BUFFER_VERBOSE_MEMORY_ALLOCS
   printf("[mem] 3DTex %s\n", util::prettyBytes((size_t)dims.x * dims.y * dims.z * sizeof(Type)).c_str());
 #endif
@@ -339,7 +339,7 @@ createCudaArray1D(const void* dataPtr, const size_t& size)
   // Allocate actually a 2D CUDA array of shape N x 1
   cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<Type>();
   CUDA_CHECK(cudaMallocArray(&dataArr, &channel_desc, size, 1));
-  util::total_n_bytes_allocated() += size * sizeof(Type);
+  util::tot_nbytes_allocated() += size * sizeof(Type);
 #ifdef CUDA_BUFFER_VERBOSE_MEMORY_ALLOCS
   printf("[mem] 3DTex %s\n", util::prettyBytes(size * sizeof(Type)).c_str());
 #endif
@@ -370,7 +370,7 @@ allocateCudaArray1D(const void* dataPtr, const size_t& size)
   // Allocate a 1D CUDA array of size N
   cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<Type>();
   CUDA_CHECK(cudaMallocArray(&dataArr, &channel_desc, size));
-  util::total_n_bytes_allocated() += size * sizeof(Type);
+  util::tot_nbytes_allocated() += size * sizeof(Type);
 #ifdef CUDA_BUFFER_VERBOSE_MEMORY_ALLOCS
   printf("[mem] Array1D %s\n", util::prettyBytes(size * sizeof(Type)).c_str());
 #endif

diff --git a/ovr/common/cuda/cuda_misc.h b/ovr/common/cuda/cuda_misc.h
@@ -93,6 +93,9 @@
 #define OPTIX_CHECK_NOEXCEPT(call) \
   do {                             \
     OptixResult res = call;        \
+    if (res != OPTIX_SUCCESS) {                                                                \
+      fprintf(stderr, "OptiX call (%s) failed with %d (line %d)\n", #call, res, __LINE__);     \
+    }                                                                                          \
   } while (0)
 
 
@@ -400,14 +403,101 @@ static void getUsedGPUMemory(unsigned long long* out)
   *out = total-free;
 }
 
-inline std::atomic<size_t>& total_n_bytes_allocated() 
+inline std::atomic<size_t>& tot_nbytes_allocated() noexcept { static std::atomic<size_t> s{0}; return s; }
+inline std::atomic<size_t>& max_nbytes_allocated() noexcept { static std::atomic<size_t> s{0}; return s; }
+inline void update_max_nbytes_allocated(const size_t& value) noexcept
 {
-	static std::atomic<size_t> s_total_n_bytes_allocated{0};
-	return s_total_n_bytes_allocated;
+  auto& maximum_value = max_nbytes_allocated();
+  size_t prev_value = maximum_value;
+  while(prev_value < value && !maximum_value.compare_exchange_weak(prev_value, value)) {}
+}
+
+}
+
+inline cudaError_t cudaTrackedFree(void *devPtr, size_t size) {
+  size_t tot = ::util::tot_nbytes_allocated().fetch_sub(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Linear -%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaFree(devPtr);
 }
 
+template<typename T>
+inline cudaError_t cudaTrackedMalloc(T **devPtr, size_t size) {
+  size_t tot = ::util::tot_nbytes_allocated().fetch_add(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Linear +%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaMalloc(devPtr, size);
 }
 
+inline cudaError_t cudaTrackedFreeAsync(void *devPtr, size_t size, cudaStream_t stream) {
+  size_t tot = ::util::tot_nbytes_allocated().fetch_sub(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Linear -%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaFreeAsync(devPtr, stream);
+}
+
+template<typename T>
+inline cudaError_t cudaTrackedMallocAsync(T **devPtr, size_t size, cudaStream_t stream) {
+  size_t tot = ::util::tot_nbytes_allocated().fetch_add(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Linear +%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaMallocAsync(devPtr, size, stream);
+}
+
+inline cudaError_t CUDARTAPI cudaTrackedFreeArray(cudaArray_t array) {
+  // Gets info about the specified cudaArray.
+  cudaChannelFormatDesc desc;
+  cudaExtent extent;
+  unsigned int flags;
+  CUDA_CHECK(cudaArrayGetInfo(&desc, &extent, &flags, array));
+  // Computes the size of the array
+  size_t size = (desc.x+desc.y+desc.z+desc.w);
+  if (extent.width == 0 && extent.height == 0 && extent.depth == 0) {
+    size = 0;
+  }
+  else {
+    if (extent.width)  size *= (size_t)extent.width;
+    if (extent.height) size *= (size_t)extent.height;
+    if (extent.depth)  size *= (size_t)extent.depth;
+  }
+  size_t tot = ::util::tot_nbytes_allocated().fetch_sub(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Array -%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaFreeArray(array);
+}
+
+inline cudaError_t cudaTrackedMallocArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, size_t height = 0, unsigned int flags = 0) {
+  size_t size = (size_t)width * (desc->x+desc->y+desc->z+desc->w);
+  if (height) size *= (size_t)height;
+  size_t tot = ::util::tot_nbytes_allocated().fetch_add(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] Array +%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaMallocArray(array, desc, width, height, flags);
+}
+
+inline cudaError_t cudaTrackedMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc* desc, struct cudaExtent extent, unsigned int flags = 0) {
+  size_t size = (size_t)extent.width * (size_t)extent.height * (size_t)extent.depth * (desc->x+desc->y+desc->z+desc->w);
+  size_t tot = ::util::tot_nbytes_allocated().fetch_add(size);
+#ifdef VNR_VERBOSE_MEMORY_ALLOCS
+  printf("[mem] 3D Array +%s\n", util::prettyBytes(size).c_str());
+#endif
+  ::util::update_max_nbytes_allocated(tot);
+  return cudaMalloc3DArray(array, desc, extent, flags);
+}
+
+
 namespace ovr {
 namespace misc = ::util;
 }
diff --git a/ovr/devices/optix7/device_impl.cpp b/ovr/devices/optix7/device_impl.cpp
@@ -281,34 +281,24 @@ DeviceOptix7::Impl::mapframe(FrameBufferData* fb)
 }
 
 void
-DeviceOptix7::Impl::buildScene(Scene& scene)
+DeviceOptix7::Impl::buildScene(const Scene& scene)
 {
-  assert(scene.instances.size() == 1);
-  assert(scene.instances[0].models.size() == 1);
-  assert(scene.instances[0].models[0].type == scene::Model::VOLUMETRIC_MODEL);
-  assert(scene.instances[0].models[0].volume_model.volume.type == scene::Volume::STRUCTURED_REGULAR_VOLUME);
-
-  // if (!scene.lights.empty())
-  //   params.light_directional_pos = scene.lights[0].position;
-
-  auto& scene_tfn = scene.instances[0].models[0].volume_model.transfer_function;
-  auto& scene_volume = scene.instances[0].models[0].volume_model.volume.structured_regular;
+  auto& scene_volume = parse_single_volume_scene(scene, scene::Volume::STRUCTURED_REGULAR_VOLUME).structured_regular;
 
   vec3f scale = scene_volume.grid_spacing * vec3f(scene_volume.data->dims);
   vec3f translate = scene_volume.grid_origin;
 
-  std::cout << "scale " << scale.x << " " << scale.y << " " << scale.z << std::endl;
-  std::cout << "translate " << translate.x << " " << translate.y << " " << translate.z << std::endl;
+  // std::cout << "scale " << scale.x << " " << scale.y << " " << scale.z << std::endl;
+  // std::cout << "translate " << translate.x << " " << translate.y << " " << translate.z << std::endl;
 
   // TODO support other parameters //
   auto v = StructuredRegularVolume();
   v.matrix = affine3f::translate(translate) * affine3f::scale(scale);
-
   v.load_from_array3d_scalar(scene_volume.data);
-  v.set_transfer_function(scene_tfn.color, scene_tfn.opacity, scene_tfn.value_range);
   v.set_sampling_rate(scene.volume_sampling_rate);
-
   volumes.emplace_back(std::move(v));
+
+  commit();
 }
 
 vec3i

diff --git a/ovr/devices/optix7/device_impl.h b/ovr/devices/optix7/device_impl.h
@@ -50,7 +50,7 @@ struct DeviceOptix7::Impl {
   void buildSBT();
 
   /*! constructs the scene data */
-  void buildScene(Scene& scene);
+  void buildScene(const Scene& scene);
 
   /*! build the instance acceleration structure */
   OptixTraversableHandle buildTLAS();

diff --git a/ovr/devices/optix7/render/method_optix.cpp b/ovr/devices/optix7/render/method_optix.cpp
@@ -0,0 +1,89 @@
+//. ======================================================================== //
+//.                                                                          //
+//. Copyright 2019-2022 Qi Wu                                                //
+//.                                                                          //
+//. Licensed under the MIT License                                           //
+//.                                                                          //
+//. ======================================================================== //
+//. ======================================================================== //
+//. Copyright 2018-2019 Ingo Wald                                            //
+//.                                                                          //
+//. Licensed under the Apache License, Version 2.0 (the "License");          //
+//. you may not use this file except in compliance with the License.         //
+//. You may obtain a copy of the License at                                  //
+//.                                                                          //
+//.     http://www.apache.org/licenses/LICENSE-2.0                           //
+//.                                                                          //
+//. Unless required by applicable law or agreed to in writing, software      //
+//. distributed under the License is distributed on an "AS IS" BASIS,        //
+//. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+//. See the License for the specific language governing permissions and      //
+//. limitations under the License.                                           //
+//. ======================================================================== //
+
+#include "method_optix.h"
+
+namespace vnr {
+
+extern "C" char embedded_ptx_code_optix[];
+
+// ------------------------------------------------------------------
+//
+// ------------------------------------------------------------------
+
+MethodOptiX::MethodOptiX() : OptixProgram(embedded_ptx_code_optix, RAY_TYPE_COUNT)
+{
+  OptixProgram::HitGroupShaders s;
+
+  shader_raygen = "__raygen__default";
+
+  shader_misses.push_back("__miss__radiance");
+  shader_misses.push_back("__miss__shadow");
+
+  /* objects */
+  {
+    OptixProgram::ObjectGroup group;
+    group.type = VOLUME_STRUCTURED_REGULAR;
+
+    s.shader_CH = "__closesthit__volume_radiance";
+    s.shader_AH = "__anyhit__volume_radiance";
+    s.shader_IS = "__intersection__volume";
+    group.hitgroup.push_back(s);
+
+    s.shader_CH = "__closesthit__volume_shadow";
+    s.shader_AH = "__anyhit__volume_shadow";
+    s.shader_IS = "__intersection__volume";
+    group.hitgroup.push_back(s);
+
+    shader_objects.push_back(group);
+  }
+
+  params_buffer.resize(sizeof(DefaultUserData), /*stream=*/0);
+}
+
+void
+MethodOptiX::render(cudaStream_t stream, const LaunchParams& _params, ShadingMode mode)
+{
+  DefaultUserData params = _params;
+  {
+    params.mode = mode;
+    params.traversable = ias[0].traversable;
+    params.geometry_traversable = ias[1].traversable;
+  }
+
+  params_buffer.upload_async(&params, 1, stream);
+
+  OPTIX_CHECK(optixLaunch(/*! pipeline we're launching launch: */
+                          pipeline.handle,
+                          stream,
+                          /*! parameters and SBT */
+                          params_buffer.d_pointer(),
+                          params_buffer.sizeInBytes,
+                          &sbt,
+                          /*! dimensions of the launch: */
+                          params.frame.size.x,
+                          params.frame.size.y,
+                          1));
+}
+
+} // namespace ovr