diff --git a/CMakeLists.txt b/CMakeLists.txt index d0481c4e..6698cd8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,10 @@ project(anari_library_visionaray LANGUAGES C CXX) include(GNUInstallDirs) +# build external libraries + +add_subdirectory(external) + # note: we're often tracking the most recent changes from ANARI-SDK # (on branch "next_release") find_package(anari 0.10.0 REQUIRED) @@ -33,9 +37,6 @@ endif() option(ANARI_VISIONARAY_ENABLE_NANOVDB "Enable NanoVDB spatial field type" OFF) set(nanovdb ${ANARI_VISIONARAY_ENABLE_NANOVDB}) -if (nanovdb) - find_package(OpenVDB COMPONENTS nanovdb REQUIRED) -endif() anari_generate_queries( NAME visionaray @@ -130,6 +131,8 @@ target_sources(${PROJECT_NAME} PRIVATE if (nanovdb) target_sources(${PROJECT_NAME} PRIVATE scene/volume/spatial_field/NanoVDBField.cpp) target_compile_definitions(${PROJECT_NAME} PRIVATE WITH_NANOVDB=1) + target_link_libraries(${PROJECT_NAME} PRIVATE + $) endif() include(GenerateExportHeader) @@ -137,7 +140,7 @@ generate_export_header(${PROJECT_NAME} EXPORT_MACRO_NAME "VISIONARAY_DEVICE_INTERFACE" ) -target_link_libraries(${PROJECT_NAME} +target_link_libraries(${PROJECT_NAME} PUBLIC visionaray::visionaray anari::anari anari::helium) target_include_directories(${PROJECT_NAME} PUBLIC @@ -173,7 +176,13 @@ if (cuda) target_compile_definitions(${PROJECT_NAME}_cuda PRIVATE WITH_CUDA=1) if (nanovdb) target_sources(${PROJECT_NAME}_cuda PRIVATE - scene/volume/spatial_field/NanoVDBField.cpp) + scene/volume/spatial_field/NanoVDBField.cu) + set_source_files_properties( + scene/volume/spatial_field/NanoVDBField.cu + PROPERTIES COMPILE_FLAGS "--extended-lambda --expt-relaxed-constexpr" + ) + target_link_libraries(${PROJECT_NAME}_cuda PRIVATE + $) target_compile_definitions(${PROJECT_NAME}_cuda PRIVATE WITH_NANOVDB=1) endif() @@ -181,11 +190,11 @@ if (cuda) EXPORT_MACRO_NAME "VISIONARAY_DEVICE_INTERFACE" ) - target_link_libraries(${PROJECT_NAME}_cuda + target_link_libraries(${PROJECT_NAME}_cuda PUBLIC visionaray::visionaray anari::anari anari::helium) if (TARGET CUDA::cudart) - target_link_libraries(${PROJECT_NAME}_cuda CUDA::cudart) + target_link_libraries(${PROJECT_NAME}_cuda PUBLIC CUDA::cudart) endif() target_include_directories(${PROJECT_NAME}_cuda PUBLIC @@ -205,9 +214,7 @@ if (hip) ) target_compile_definitions(${PROJECT_NAME}_hip PRIVATE WITH_HIP=1) if (nanovdb) - target_sources(${PROJECT_NAME}_hip PRIVATE - scene/volume/spatial_field/NanoVDBField.cpp) - target_compile_definitions(${PROJECT_NAME}_hip PRIVATE WITH_NANOVDB=1) + message(WARNING "No VDB support with HIP") endif() target_link_libraries(${PROJECT_NAME}_hip diff --git a/external/nanovdb/CMakeLists.txt b/external/nanovdb/CMakeLists.txt new file mode 100644 index 00000000..dae56ed2 --- /dev/null +++ b/external/nanovdb/CMakeLists.txt @@ -0,0 +1,3 @@ +project(vsnray_nanovdb LANGUAGES CXX CUDA) +add_library(${PROJECT_NAME} INTERFACE) +target_include_directories(${PROJECT_NAME} INTERFACE ${CMAKE_CURRENT_LIST_DIR}/..) diff --git a/external/nanovdb/CNanoVDB.h b/external/nanovdb/CNanoVDB.h new file mode 100644 index 00000000..c714f94d --- /dev/null +++ b/external/nanovdb/CNanoVDB.h @@ -0,0 +1,715 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +// +// Simple C-wrapper for the nanovdb structure +// Meant for systems where you lack a C++ compiler. +// +#ifndef __CNANOVDB__ +#define __CNANOVDB__ + +#define CNANOVDB_DATA_ALIGNMENT 32 +#define CNANOVDB_ALIGNMENT_PADDING(x, n) (-(x) & ((n)-1)) + +#define USE_SINGLE_ROOT_KEY + +#ifdef __OPENCL_VERSION__ + +#define CNANOVDB_GLOBAL __global +#define RESTRICT restrict + +// OpenCL doesn't define these basic types: +typedef unsigned long uint64_t; +typedef long int64_t; +typedef unsigned int uint32_t; +typedef int int32_t; +typedef short int16_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; + +#else + +#define CNANOVDB_GLOBAL +#define RESTRICT __restrict + +#endif + + +enum cnanovdb_GridType +{ + cnanovdb_GridType_Unknown = 0, + cnanovdb_GridType_Float = 1, + cnanovdb_GridType_Double = 2, + cnanovdb_GridType_Int16 = 3, + cnanovdb_GridType_Int32 = 4, + cnanovdb_GridType_Int64 = 5, + cnanovdb_GridType_Vec3f = 6, + cnanovdb_GridType_Vec3d = 7, + cnanovdb_GridType_Mask = 8, + cnanovdb_GridType_FP16 = 9, + cnanovdb_GridType_End = 10 +}; + +#define ROOT_LEVEL 3 + +#define DEFINEMASK_int(LOG2DIM, SIZE) \ +typedef struct \ +{ \ + uint64_t mWords[SIZE >> 6]; \ +} cnanovdb_mask##LOG2DIM; \ +\ +static void cnanovdb_mask##LOG2DIM##_clear(CNANOVDB_GLOBAL cnanovdb_mask##LOG2DIM *RESTRICT mask) \ +{ for (uint32_t i = 0; i < (SIZE >> 6); i++) mask->mWords[i] = 0; } \ +\ +static bool cnanovdb_mask##LOG2DIM##_isOn(const CNANOVDB_GLOBAL cnanovdb_mask##LOG2DIM *RESTRICT mask, uint32_t n) \ +{ return 0 != (mask->mWords[n >> 6] & (((uint64_t)(1)) << (n & 63))); } \ +/**/ + +#define DEFINEMASK(LOG2DIM) \ + DEFINEMASK_int(LOG2DIM, (1U << (3*LOG2DIM))) + +#define INSTANTIATE(LOG2DIM) \ + DEFINEMASK(LOG2DIM) + +INSTANTIATE(3) +INSTANTIATE(4) +INSTANTIATE(5) + +typedef struct +{ + float mMatF[9]; // r,c = 3*r + c + float mInvMatF[9]; // r,c = 3*r + c + float mVecF[3]; + float mTaperF; + double mMatD[9]; // r,c = 3*r + c + double mInvMatD[9]; // r,c = 3*r + c + double mVecD[3]; + double mTaperD; +} cnanovdb_map; + +typedef struct +{ + float mVec[3]; +} cnanovdb_Vec3F; + +typedef struct +{ + int32_t mVec[3]; +} cnanovdb_coord; + +static int +cnanovdb_coord_compare(const CNANOVDB_GLOBAL cnanovdb_coord *a, const cnanovdb_coord *b) +{ + if (a->mVec[0] < b->mVec[0]) + return -1; + if (a->mVec[0] > b->mVec[0]) + return 1; + if (a->mVec[1] < b->mVec[1]) + return -1; + if (a->mVec[1] > b->mVec[1]) + return 1; + if (a->mVec[2] < b->mVec[2]) + return -1; + if (a->mVec[2] > b->mVec[2]) + return 1; + return 0; +} + +#ifdef USE_SINGLE_ROOT_KEY +static uint64_t +cnanovdb_coord_to_key(const cnanovdb_coord *RESTRICT ijk) +{ + // Define to workaround a bug with 64-bit shifts in the AMD OpenCL compiler. +#if defined(AVOID_64BIT_SHIFT) + uint2 key = (uint2)( ((uint32_t)ijk->mVec[2]) >> 12, 0) | + (uint2)((((uint32_t)ijk->mVec[1]) >> 12) << 21, + ((uint32_t)ijk->mVec[1]) >> 23) | + (uint2)(0, (((uint32_t)ijk->mVec[0]) >> 12) << 10); + return *(uint64_t *)&key; +#else + return ((uint64_t) (((uint32_t)ijk->mVec[2]) >> 12)) | + (((uint64_t) (((uint32_t)ijk->mVec[1]) >> 12)) << 21) | + (((uint64_t) (((uint32_t)ijk->mVec[0]) >> 12)) << 42); +#endif +} +#else +static void +cnanovdb_coord_to_key(cnanovdb_coord *RESTRICT key, const cnanovdb_coord *RESTRICT ijk) +{ + key->mVec[0] = ijk->mVec[0] & ~((1u << 12) - 1u); + key->mVec[1] = ijk->mVec[1] & ~((1u << 12) - 1u); + key->mVec[2] = ijk->mVec[2] & ~((1u << 12) - 1u); +} +#endif + +static void +cnanovdb_map_apply(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_map *RESTRICT map, const cnanovdb_Vec3F *src) +{ + float sx = src->mVec[0]; + float sy = src->mVec[1]; + float sz = src->mVec[2]; + dst->mVec[0] = sx * map->mMatF[0] + sy * map->mMatF[1] + sz * map->mMatF[2] + map->mVecF[0]; + dst->mVec[1] = sx * map->mMatF[3] + sy * map->mMatF[4] + sz * map->mMatF[5] + map->mVecF[1]; + dst->mVec[2] = sx * map->mMatF[6] + sy * map->mMatF[7] + sz * map->mMatF[8] + map->mVecF[2]; +} + +static void +cnanovdb_map_applyInverse(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_map *RESTRICT map, const cnanovdb_Vec3F *src) +{ + float sx = src->mVec[0] - map->mVecF[0]; + float sy = src->mVec[1] - map->mVecF[1]; + float sz = src->mVec[2] - map->mVecF[2]; + dst->mVec[0] = sx * map->mInvMatF[0] + sy * map->mInvMatF[1] + sz * map->mInvMatF[2]; + dst->mVec[1] = sx * map->mInvMatF[3] + sy * map->mInvMatF[4] + sz * map->mInvMatF[5]; + dst->mVec[2] = sx * map->mInvMatF[6] + sy * map->mInvMatF[7] + sz * map->mInvMatF[8]; +} + +static void +cnanovdb_map_applyJacobi(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_map *RESTRICT map, const cnanovdb_Vec3F *src) +{ + float sx = src->mVec[0]; + float sy = src->mVec[1]; + float sz = src->mVec[2]; + dst->mVec[0] = sx * map->mMatF[0] + sy * map->mMatF[1] + sz * map->mMatF[2]; + dst->mVec[1] = sx * map->mMatF[3] + sy * map->mMatF[4] + sz * map->mMatF[5]; + dst->mVec[2] = sx * map->mMatF[6] + sy * map->mMatF[7] + sz * map->mMatF[8]; +} + +static void +cnanovdb_map_applyInverseJacobi(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_map *RESTRICT map, const cnanovdb_Vec3F *src) +{ + float sx = src->mVec[0]; + float sy = src->mVec[1]; + float sz = src->mVec[2]; + dst->mVec[0] = sx * map->mInvMatF[0] + sy * map->mInvMatF[1] + sz * map->mInvMatF[2]; + dst->mVec[1] = sx * map->mInvMatF[3] + sy * map->mInvMatF[4] + sz * map->mInvMatF[5]; + dst->mVec[2] = sx * map->mInvMatF[6] + sy * map->mInvMatF[7] + sz * map->mInvMatF[8]; +} + +static void +cnanovdb_map_applyIJT(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_map *RESTRICT map, const cnanovdb_Vec3F *src) +{ + float sx = src->mVec[0]; + float sy = src->mVec[1]; + float sz = src->mVec[2]; + dst->mVec[0] = sx * map->mInvMatF[0] + sy * map->mInvMatF[3] + sz * map->mInvMatF[6]; + dst->mVec[1] = sx * map->mInvMatF[1] + sy * map->mInvMatF[4] + sz * map->mInvMatF[7]; + dst->mVec[2] = sx * map->mInvMatF[2] + sy * map->mInvMatF[5] + sz * map->mInvMatF[8]; +} + +typedef struct +{ + int64_t mByteOffset; // byte offset to the blind data, relative to the GridData. + uint64_t mElementCount; // number of elements, e.g. point count + uint32_t mFlags; // flags + uint32_t mSemantic; // semantic meaning of the data. + uint32_t mDataClass; // 4 bytes + uint32_t mDataType; // 4 bytes + char mName[256]; + uint8_t _reserved[CNANOVDB_ALIGNMENT_PADDING(sizeof(int64_t)+sizeof(uint64_t)+2*sizeof(uint32_t)+2*sizeof(uint32_t)+256*sizeof(char), CNANOVDB_DATA_ALIGNMENT)]; +} cnanovdb_gridblindmetadata; + +typedef struct +{ + uint64_t mMagic; // 8B magic to validate it is valid grid data. + uint64_t mChecksum; // 8B. Checksum of grid buffer. + uint32_t mVersion;// 4B. compacted major.minor.path version number. + uint32_t mFlags; // 4B. flags for grid. + uint32_t mGridIndex;// 4B. Index of this grid in the buffer + uint32_t mGridCount; // 4B. Total number of grids in the buffer + uint64_t mGridSize; // 8B. byte count of this entire grid occupied in the buffer. + char mGridName[256]; // 256B + cnanovdb_map mMap; // 264B. affine transformation between index and world space in both single and double precision + double mBBox[6]; // 48B. floating-point bounds of active values in WORLD SPACE + double mVoxelSize[3]; // 24B. size of a voxel in world units + uint32_t mGridClass; // 4B. + uint32_t mGridType; // 4B. + uint64_t mBlindMetadataOffset; // 8B. offset of GridBlindMetaData structures. + int32_t mBlindMetadataCount; // 4B. count of GridBlindMetaData structures. + uint32_t _reserved[CNANOVDB_ALIGNMENT_PADDING(8 + 8 + 4 + 4 + 4 + 4 + 8 + 256 + 24 + 24 + sizeof(cnanovdb_map) + 24 + 4 + 4 + 8 + 4, CNANOVDB_DATA_ALIGNMENT) / 4]; +} cnanovdb_griddata; + +static void +cnanovdb_griddata_worldToIndex(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid, const cnanovdb_Vec3F *src) +{ + cnanovdb_map_applyInverse(dst, &grid->mMap, src); +} + +static void +cnanovdb_griddata_indexToWorld(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid, const cnanovdb_Vec3F *src) +{ + cnanovdb_map_apply(dst, &grid->mMap, src); +} + +static void +cnanovdb_griddata_worldToIndexDir(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid, const cnanovdb_Vec3F *src) +{ + cnanovdb_map_applyInverseJacobi(dst, &grid->mMap, src); +} + +static void +cnanovdb_griddata_indexToWorldDir(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid, const cnanovdb_Vec3F *src) +{ + cnanovdb_map_applyJacobi(dst, &grid->mMap, src); +} + +static void +cnanovdb_griddata_applyIJT(cnanovdb_Vec3F *dst, const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid, const cnanovdb_Vec3F *src) +{ + cnanovdb_map_applyIJT(dst, &grid->mMap, src); +} + +typedef struct +{ + uint64_t mNodeOffset[ROOT_LEVEL + 1]; + uint32_t mNodeCount[ROOT_LEVEL]; + uint32_t mTileCount[ROOT_LEVEL]; + uint64_t mVoxelCount; + uint8_t _reserved[CNANOVDB_ALIGNMENT_PADDING(4*sizeof(uint64_t)+(3+3)*sizeof(uint32_t)+sizeof(uint64_t), CNANOVDB_DATA_ALIGNMENT)]; +} cnanovdb_treedata; + +static const CNANOVDB_GLOBAL cnanovdb_treedata * +cnanovdb_griddata_tree(const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT griddata) +{ + return (const CNANOVDB_GLOBAL cnanovdb_treedata *)(griddata + 1); +} + +#define CREATE_TILEENTRY(VALUETYPE, SUFFIX) \ +typedef union \ +{ \ + VALUETYPE value; \ + uint64_t child; \ +} cnanovdb_tileentry##SUFFIX; \ +/**/ + +typedef struct +{ + cnanovdb_coord mKey; + const CNANOVDB_GLOBAL void *mNode[4]; +} cnanovdb_readaccessor; + + +static void +cnanovdb_readaccessor_insert(cnanovdb_readaccessor *RESTRICT acc, int childlevel, const CNANOVDB_GLOBAL void *RESTRICT node, const cnanovdb_coord *RESTRICT ijk) +{ + acc->mNode[childlevel] = node; + acc->mKey.mVec[0] = ijk->mVec[0]; + acc->mKey.mVec[1] = ijk->mVec[1]; + acc->mKey.mVec[2] = ijk->mVec[2]; +} + +#define CREATE_LEAF_NODE_int(LEVEL, LOG2DIM, CHILDTOTAL, TOTAL, MASK, VALUETYPE, STATSTYPE, SUFFIX) \ +typedef struct \ +{ \ + cnanovdb_coord mBBox_min; \ + uint8_t mBBoxDif[3]; \ + uint8_t mFlags; \ + cnanovdb_mask##LOG2DIM mValueMask; \ + VALUETYPE mMinimum; \ + VALUETYPE mMaximum; \ + STATSTYPE mAverage; \ + STATSTYPE mStdDevi; \ + uint32_t _reserved[ CNANOVDB_ALIGNMENT_PADDING(sizeof(cnanovdb_mask##LOG2DIM)+2*sizeof(VALUETYPE)+2*sizeof(STATSTYPE)+sizeof(cnanovdb_coord)+sizeof(uint8_t[3])+sizeof(uint8_t), CNANOVDB_DATA_ALIGNMENT)/4]; \ + VALUETYPE mVoxels[1u << (3*LOG2DIM)]; \ +} cnanovdb_node##LEVEL##SUFFIX; \ +\ +static uint32_t \ +cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(const cnanovdb_coord *RESTRICT ijk) \ +{ \ + return ( ( ( ijk->mVec[0] & MASK ) >> CHILDTOTAL ) << ( 2 * LOG2DIM ) ) + \ + ( ( ( ijk->mVec[1] & MASK ) >> CHILDTOTAL ) << ( LOG2DIM ) ) + \ + ( ( ijk->mVec[2] & MASK ) >> CHILDTOTAL ); \ +} \ +\ +static VALUETYPE \ +cnanovdb_node##LEVEL##SUFFIX##_getValue(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + return node->mVoxels[n]; \ +} \ +\ +static VALUETYPE \ +cnanovdb_node##LEVEL##SUFFIX##_getValueAndCache(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT /* DO NOT REMOVE: Required for C99 compliance */ acc) \ +{ \ + (void)(acc); \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + return node->mVoxels[n]; \ +} \ +\ +static bool \ +cnanovdb_node##LEVEL##SUFFIX##_isActive(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mValueMask, n)) \ + return true; \ + return false; \ +} \ +\ +static bool \ +cnanovdb_node##LEVEL##SUFFIX##_isActiveAndCache(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT /* DO NOT REMOVE: Required for C99 compliance */ acc) \ +{ \ + (void)(acc); \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mValueMask, n)) \ + return true; \ + return false; \ +} \ +\ +static const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX * \ +cnanovdb_tree_getNode##LEVEL##SUFFIX(const CNANOVDB_GLOBAL cnanovdb_treedata *RESTRICT tree, uint64_t i) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *basenode = (const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *)((CNANOVDB_GLOBAL uint8_t *)(tree) + tree->mNodeOffset[LEVEL]); \ + return basenode + i; \ +} \ +\ +/**/ + +#define CREATE_LEAF_NODE(LEVEL, LOG2DIM, TOTAL, VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_LEAF_NODE_int(LEVEL, LOG2DIM, (TOTAL-LOG2DIM), TOTAL, ((1u << TOTAL) - 1u), VALUETYPE, STATSTYPE, SUFFIX) + +#define CREATE_INTERNAL_NODE_int(CHILDLEVEL, LEVEL, LOG2DIM, CHILDTOTAL, TOTAL, MASK, VALUETYPE, STATSTYPE, SUFFIX) \ +typedef struct \ +{ \ + cnanovdb_coord mBBox_min, mBBox_max; \ + int32_t mOffset; \ + uint32_t mFlags; \ + cnanovdb_mask##LOG2DIM mValueMask, mChildMask; \ + VALUETYPE mMinimum, mMaximum; \ + STATSTYPE mAverage, mStdDevi; \ + uint8_t _reserved[CNANOVDB_ALIGNMENT_PADDING(sizeof(cnanovdb_mask##LOG2DIM)+sizeof(VALUETYPE)*2+sizeof(STATSTYPE)*2+sizeof(cnanovdb_coord)*2+sizeof(int32_t)+sizeof(uint32_t), CNANOVDB_DATA_ALIGNMENT)]; \ + cnanovdb_tileentry##SUFFIX mTable[1u << (3*LOG2DIM)]; \ +} cnanovdb_node##LEVEL##SUFFIX; \ +\ +static uint32_t \ +cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(const cnanovdb_coord *RESTRICT ijk) \ +{ \ + return ( ( ( ijk->mVec[0] & MASK ) >> CHILDTOTAL ) << ( 2 * LOG2DIM ) ) + \ + ( ( ( ijk->mVec[1] & MASK ) >> CHILDTOTAL ) << ( LOG2DIM ) ) + \ + ( ( ijk->mVec[2] & MASK ) >> CHILDTOTAL ); \ +} \ +\ +static const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX * \ +cnanovdb_node##LEVEL##SUFFIX##_getChild(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, uint32_t n) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *childnode = (const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *)( ((CNANOVDB_GLOBAL uint8_t *)node) + node->mTable[n].child); \ + return childnode; \ +} \ +\ +static VALUETYPE \ +cnanovdb_node##LEVEL##SUFFIX##_getValue(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mChildMask, n)) \ + { \ + const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *child = cnanovdb_node##LEVEL##SUFFIX##_getChild(node, n); \ + return cnanovdb_node##CHILDLEVEL##SUFFIX##_getValue(child, ijk); \ + } \ + return node->mTable[n].value; \ +} \ +\ +static VALUETYPE \ +cnanovdb_node##LEVEL##SUFFIX##_getValueAndCache(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT acc) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mChildMask, n)) \ + { \ + const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *child = cnanovdb_node##LEVEL##SUFFIX##_getChild(node, n); \ + cnanovdb_readaccessor_insert(acc, CHILDLEVEL, child, ijk); \ + return cnanovdb_node##CHILDLEVEL##SUFFIX##_getValueAndCache(child, ijk, acc); \ + } \ + return node->mTable[n].value; \ +} \ +\ +static bool \ +cnanovdb_node##LEVEL##SUFFIX##_isActive(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mChildMask, n)) \ + { \ + const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *child = cnanovdb_node##LEVEL##SUFFIX##_getChild(node, n); \ + return cnanovdb_node##CHILDLEVEL##SUFFIX##_isActive(child, ijk); \ + } \ + return cnanovdb_mask##LOG2DIM##_isOn(&node->mValueMask, n) ? true : false; \ +} \ +\ +static bool \ +cnanovdb_node##LEVEL##SUFFIX##_isActiveAndCache(const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *RESTRICT node, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT acc) \ +{ \ + uint32_t n = cnanovdb_node##LEVEL##SUFFIX##_CoordToOffset(ijk); \ + if (cnanovdb_mask##LOG2DIM##_isOn(&node->mChildMask, n)) \ + { \ + const CNANOVDB_GLOBAL cnanovdb_node##CHILDLEVEL##SUFFIX *child = cnanovdb_node##LEVEL##SUFFIX##_getChild(node, n); \ + cnanovdb_readaccessor_insert(acc, CHILDLEVEL, child, ijk); \ + return cnanovdb_node##CHILDLEVEL##SUFFIX##_isActiveAndCache(child, ijk, acc); \ + } \ + return cnanovdb_mask##LOG2DIM##_isOn(&node->mValueMask, n) ? true : false; \ +} \ +\ +static const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX * \ +cnanovdb_tree_getNode##LEVEL##SUFFIX(const CNANOVDB_GLOBAL cnanovdb_treedata *RESTRICT tree, uint64_t i) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *basenode = (const CNANOVDB_GLOBAL cnanovdb_node##LEVEL##SUFFIX *)((CNANOVDB_GLOBAL uint8_t *)(tree) + tree->mNodeOffset[LEVEL]); \ + return basenode + i; \ +} \ +\ +/**/ + +#define CREATE_INTERNAL_NODE(CHILDLEVEL, LEVEL, LOG2DIM, TOTAL, VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_INTERNAL_NODE_int(CHILDLEVEL, LEVEL, LOG2DIM, (TOTAL-LOG2DIM), TOTAL, ((1u << TOTAL) - 1u), VALUETYPE, STATSTYPE, SUFFIX) + + +#ifdef USE_SINGLE_ROOT_KEY +#define DEFINE_KEY(KEY) \ + uint64_t KEY; +#define KEYSIZE sizeof(uint64_t) + +#define KEYSEARCH(SUFFIX) \ + uint64_t key; \ + key = cnanovdb_coord_to_key(ijk); \ +\ + for (int i = low; i < high; i++) \ + { \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = tiles + i; \ + if (tile->key == key) \ + return tile; \ + } \ +/**/ +#else +#define DEFINE_KEY(KEY) \ + cnanovdb_coord KEY; +#define KEYSIZE sizeof(cnanovdb_coord) +#define KEYSEARCH(SUFFIX) \ + cnanovdb_coord key; \ + cnanovdb_coord_to_key(&key, ijk); \ + \ + while (low != high) \ + { \ + int32_t mid = low + (( high - low ) >> 1 ); \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = tiles + mid; \ + \ + int keycmp = cnanovdb_coord_compare(&tile->key, &key); \ + if (keycmp == 0) \ + { \ + return tile; \ + } \ + \ + if (keycmp < 0) \ + low = mid + 1; \ + else \ + high = mid; \ + } \ +/**/ +#endif + + +#define CREATE_ROOTDATA(VALUETYPE, STATSTYPE, SUFFIX) \ +typedef struct \ +{ \ + DEFINE_KEY(key); \ + int64_t child; \ + uint32_t state; \ + VALUETYPE value; \ + uint8_t _reserved[CNANOVDB_ALIGNMENT_PADDING(sizeof(KEYSIZE)+sizeof(VALUETYPE)+sizeof(int64_t)+sizeof(uint32_t), CNANOVDB_DATA_ALIGNMENT)]; \ +} cnanovdb_rootdata_tile##SUFFIX; \ + \ +typedef struct \ +{ \ + cnanovdb_coord mBBox_min, mBBox_max; \ + uint32_t mTableSize; \ + VALUETYPE mBackground; \ + VALUETYPE mMinimum, mMaximum; \ + STATSTYPE mAverage, mStdDevi; \ + uint32_t _reserved[CNANOVDB_ALIGNMENT_PADDING(sizeof(cnanovdb_coord)*2+sizeof(uint32_t)+sizeof(VALUETYPE)*3+sizeof(STATSTYPE)*2, CNANOVDB_DATA_ALIGNMENT)/4]; \ +} cnanovdb_rootdata##SUFFIX; \ + \ +static const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX * \ +cnanovdb_treedata_root##SUFFIX(const CNANOVDB_GLOBAL cnanovdb_treedata *RESTRICT treedata) \ +{ \ + return (const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *) ((const CNANOVDB_GLOBAL uint8_t *)(treedata) + treedata->mNodeOffset[ROOT_LEVEL]); \ +} \ + \ +static const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX * \ +cnanovdb_rootdata##SUFFIX##_getTile(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, uint32_t n) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *basetile = (const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *) (rootdata + 1); \ + return basetile + n; \ +} \ + \ +static const CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX * \ +cnanovdb_rootdata##SUFFIX##_getChild(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *RESTRICT tile) \ +{ \ + CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *basenode = (CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *) (((CNANOVDB_GLOBAL uint8_t *) rootdata) + tile->child); \ + return basenode; \ +} \ + \ +static const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX * \ +cnanovdb_rootdata##SUFFIX##_findTile(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + int32_t low = 0, high = rootdata->mTableSize; \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tiles = cnanovdb_rootdata##SUFFIX##_getTile(rootdata, 0); \ + \ + KEYSEARCH(SUFFIX) \ + return 0; \ +} \ + \ +static VALUETYPE \ +cnanovdb_rootdata##SUFFIX##_getValue(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = cnanovdb_rootdata##SUFFIX##_findTile(rootdata, ijk); \ + if (!tile) \ + return rootdata->mBackground; \ + if (tile->child == 0) \ + return tile->value; \ + return cnanovdb_node2##SUFFIX##_getValue( cnanovdb_rootdata##SUFFIX##_getChild(rootdata, tile), ijk ); \ +} \ + \ +static VALUETYPE \ +cnanovdb_rootdata##SUFFIX##_getValueAndCache(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT acc) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = cnanovdb_rootdata##SUFFIX##_findTile(rootdata, ijk); \ + if (!tile) \ + return rootdata->mBackground; \ + if (tile->child == 0) \ + return tile->value; \ + const CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *child = cnanovdb_rootdata##SUFFIX##_getChild(rootdata, tile); \ + cnanovdb_readaccessor_insert(acc, 2, child, ijk); \ + return cnanovdb_node2##SUFFIX##_getValueAndCache( child, ijk, acc ); \ +} \ +\ +static bool \ +cnanovdb_rootdata##SUFFIX##_isActive(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = cnanovdb_rootdata##SUFFIX##_findTile(rootdata, ijk); \ + if (!tile) \ + return false; \ + if (tile->child == 0) \ + return tile->state; \ + return cnanovdb_node2##SUFFIX##_isActive( cnanovdb_rootdata##SUFFIX##_getChild(rootdata, tile), ijk ); \ +} \ + \ +static bool \ +cnanovdb_rootdata##SUFFIX##_isActiveAndCache(const CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *RESTRICT rootdata, const cnanovdb_coord *RESTRICT ijk, cnanovdb_readaccessor *RESTRICT acc) \ +{ \ + const CNANOVDB_GLOBAL cnanovdb_rootdata_tile##SUFFIX *tile = cnanovdb_rootdata##SUFFIX##_findTile(rootdata, ijk); \ + if (!tile) \ + return false; \ + if (tile->child == 0) \ + return tile->state; \ + const CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *child = cnanovdb_rootdata##SUFFIX##_getChild(rootdata, tile); \ + cnanovdb_readaccessor_insert(acc, 2, child, ijk); \ + return cnanovdb_node2##SUFFIX##_isActiveAndCache( child, ijk, acc ); \ +} \ +/**/ + + +inline void +cnanovdb_readaccessor_init(cnanovdb_readaccessor *RESTRICT acc, + const CNANOVDB_GLOBAL void /*cnanovdb_rootdata* */ *RESTRICT rootdata) +{ + acc->mNode[0] = acc->mNode[1] = acc->mNode[2] = 0; + acc->mNode[3] = rootdata; +} + +#define DEFINE_ISCACHED(LEVEL, MASK) \ +inline bool \ +cnanovdb_readaccessor_isCached##LEVEL(cnanovdb_readaccessor *RESTRICT acc, int32_t dirty) \ +{ \ + if (!acc->mNode[LEVEL]) \ + return false; \ + if (dirty & ~MASK) \ + { \ + acc->mNode[LEVEL] = 0; \ + return false; \ + } \ + return true; \ +} \ +/**/ + +DEFINE_ISCACHED(0, ((1u << 3) - 1u) ) +DEFINE_ISCACHED(1, ((1u << 7) - 1u) ) +DEFINE_ISCACHED(2, ((1u << 12) - 1u) ) + +inline int32_t +cnanovdb_readaccessor_computeDirty(const cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_coord *RESTRICT ijk) +{ + return (ijk->mVec[0] ^ acc->mKey.mVec[0]) | + (ijk->mVec[1] ^ acc->mKey.mVec[1]) | + (ijk->mVec[2] ^ acc->mKey.mVec[2]); +} + +#define CREATE_ACCESSOR(VALUETYPE, SUFFIX) \ +inline VALUETYPE \ +cnanovdb_readaccessor_getValue##SUFFIX(cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + int32_t dirty = cnanovdb_readaccessor_computeDirty(acc, ijk); \ + \ + if (cnanovdb_readaccessor_isCached0(acc, dirty)) \ + return cnanovdb_node0##SUFFIX##_getValue( ((CNANOVDB_GLOBAL cnanovdb_node0##SUFFIX *) acc->mNode[0]), ijk); \ + if (cnanovdb_readaccessor_isCached1(acc, dirty)) \ + return cnanovdb_node1##SUFFIX##_getValueAndCache( ((CNANOVDB_GLOBAL cnanovdb_node1##SUFFIX *) acc->mNode[1]), ijk, acc); \ + if (cnanovdb_readaccessor_isCached2(acc, dirty)) \ + return cnanovdb_node2##SUFFIX##_getValueAndCache( ((CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *) acc->mNode[2]), ijk, acc); \ + \ + return cnanovdb_rootdata##SUFFIX##_getValueAndCache( ((CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *)acc->mNode[3]), ijk, acc); \ +} \ +\ +inline bool \ +cnanovdb_readaccessor_isActive##SUFFIX(cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_coord *RESTRICT ijk) \ +{ \ + int32_t dirty = cnanovdb_readaccessor_computeDirty(acc, ijk); \ + \ + if (cnanovdb_readaccessor_isCached0(acc, dirty)) \ + return cnanovdb_node0##SUFFIX##_isActive( ((CNANOVDB_GLOBAL cnanovdb_node0##SUFFIX *) acc->mNode[0]), ijk); \ + if (cnanovdb_readaccessor_isCached1(acc, dirty)) \ + return cnanovdb_node1##SUFFIX##_isActiveAndCache( ((CNANOVDB_GLOBAL cnanovdb_node1##SUFFIX *) acc->mNode[1]), ijk, acc); \ + if (cnanovdb_readaccessor_isCached2(acc, dirty)) \ + return cnanovdb_node2##SUFFIX##_isActiveAndCache( ((CNANOVDB_GLOBAL cnanovdb_node2##SUFFIX *) acc->mNode[2]), ijk, acc); \ + \ + return cnanovdb_rootdata##SUFFIX##_isActiveAndCache( ((CNANOVDB_GLOBAL cnanovdb_rootdata##SUFFIX *)acc->mNode[3]), ijk, acc); \ +} \ +/**/ + + +#define CREATE_GRIDTYPE(VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_TILEENTRY(VALUETYPE, SUFFIX) \ +CREATE_LEAF_NODE(0, 3, 3, VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_INTERNAL_NODE(0, 1, 4, 7, VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_INTERNAL_NODE(1, 2, 5, 12, VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_ROOTDATA(VALUETYPE, STATSTYPE, SUFFIX) \ +CREATE_ACCESSOR(VALUETYPE, SUFFIX) \ +/**/ + +CREATE_GRIDTYPE(float, float, F) +CREATE_GRIDTYPE(cnanovdb_Vec3F, float, F3) + +static int +cnanovdb_griddata_valid(const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid) +{ + if (!grid) + return 0; + if (grid->mMagic != 0x304244566f6e614eUL && grid->mMagic != 0x314244566f6e614eUL) + return 0; + return 1; +} + +static int +cnanovdb_griddata_validF(const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid) +{ + if (!cnanovdb_griddata_valid(grid)) + return 0; + if (grid->mGridType != cnanovdb_GridType_Float) + return 0; + return 1; +} + +static int +cnanovdb_griddata_validF3(const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid) +{ + if (!cnanovdb_griddata_valid(grid)) + return 0; + if (grid->mGridType != cnanovdb_GridType_Vec3f) + return 0; + return 1; +} + +#endif diff --git a/external/nanovdb/GridHandle.h b/external/nanovdb/GridHandle.h new file mode 100644 index 00000000..05e49204 --- /dev/null +++ b/external/nanovdb/GridHandle.h @@ -0,0 +1,493 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/GridHandle.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Defines GridHandle, which manages a host, and possibly a device, + memory buffer containing one or more NanoVDB grids. +*/ + +#ifndef NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED +#define NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED + +#include // for std::ifstream +#include // for std::cerr/cout +#include +#include + +#include // for toGridType +#include +#include // for updateGridCount + +namespace nanovdb { + +// --------------------------> GridHandle <------------------------------------ + +struct GridHandleMetaData {uint64_t offset, size; GridType gridType;}; + +/// @brief This class serves to manage a buffer containing one or more NanoVDB Grids. +/// +/// @note It is important to note that this class does NOT depend on OpenVDB. +template +class GridHandle +{ + std::vector mMetaData; + BufferT mBuffer; + + template + static T* no_const(const T* ptr) { return const_cast(ptr); } + +public: + using BufferType = BufferT; + + /// @brief Move constructor from a host buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); + + /// @brief Move constructor from a dual host-device buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); + + /// @brief Constructs an empty GridHandle + GridHandle() = default; + + /// @brief Disallow copy-construction + GridHandle(const GridHandle&) = delete; + + /// @brief Move copy-constructor + GridHandle(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); + } + + /// @brief clear this GridHandle to an empty handle + void reset() { + mBuffer.clear(); + mMetaData.clear(); + } + + /// @brief Disallow copy assignment operation + GridHandle& operator=(const GridHandle&) = delete; + + /// @brief Move copy assignment operation + GridHandle& operator=(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); + return *this; + } + + /// @brief Performs a deep copy of the GridHandle, possibly templated on a different buffer type + /// @tparam OtherBufferT Buffer type of the deep copy + /// @param buffer optional buffer used for allocation + /// @return A new handle of the specified buffer type that contains a deep copy of the current handle + template + GridHandle copy(const OtherBufferT& buffer = OtherBufferT()) const; + + /// @brief Return a reference to the buffer + BufferT& buffer() { return mBuffer; } + + /// @brief Return a const reference to the buffer + const BufferT& buffer() const { return mBuffer; } + + /// @brief Returns a non-const pointer to the data. + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + void* data() { return mBuffer.data(); } + + /// @brief Returns a const pointer to the data. + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + const void* data() const { return mBuffer.data(); } + + template + typename util::enable_if::hasDeviceDual, const void*>::type + deviceData() const { return mBuffer.deviceData(); } + template + typename util::enable_if::hasDeviceDual, void*>::type + deviceData() { return mBuffer.deviceData(); } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle. + uint64_t size() const { return mBuffer.size(); } + + //@{ + /// @brief Return true if this handle is empty, i.e. has no allocated memory + bool empty() const { return this->size() == 0; } + bool isEmpty() const { return this->size() == 0; } + //@} + + /// @brief Return true if this handle contains any grids + operator bool() const { return !this->empty(); } + + /// @brief Returns a const host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid + /// or if the template parameter does not match the specified grid! + template + const NanoGrid* grid(uint32_t n = 0) const; + + /// @brief Returns a host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid + /// or if the template parameter does not match the specified grid! + template + NanoGrid* grid(uint32_t n = 0) {return const_cast*>(static_cast(this)->template grid(n));} + + /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (device) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle has no device grid, @a n is invalid, + /// or if the template parameter does not match the specified grid. + template + typename util::enable_if::hasDeviceDual, const NanoGrid*>::type + deviceGrid(uint32_t n=0) const; + + /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index if of the grid pointer to be returned + /// @param verbose if non-zero error messages will be printed in case something failed + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid, + /// or if the template parameter does not match the specified grid. + template + typename util::enable_if::hasDeviceDual, NanoGrid*>::type + deviceGrid(uint32_t n=0){return const_cast*>(static_cast(this)->template deviceGrid(n));} + + /// @brief Upload the grid to the device, e.g. from CPU to GPU + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceUpload(void* stream = nullptr, bool sync = true) { mBuffer.deviceUpload(stream, sync); } + + /// @brief Download the grid to from the device, e.g. from GPU to CPU + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceDownload(void* stream = nullptr, bool sync = true) { mBuffer.deviceDownload(stream, sync); } + + /// @brief Check if the buffer is this handle has any padding, i.e. if the buffer is larger than the combined size of all its grids + /// @return true is the combined size of all grid is smaller than the buffer size + bool isPadded() const {return mMetaData.empty() ? false : mMetaData.back().offset + mMetaData.back().size != mBuffer.size();} + + /// @brief Return the total number of grids contained in this buffer + uint32_t gridCount() const {return static_cast(mMetaData.size());} + + /// @brief Return the grid size of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the byte size of the specified grid + uint64_t gridSize(uint32_t n = 0) const {return mMetaData[n].size; } + + /// @brief Return the GridType of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the GridType of the specified grid + GridType gridType(uint32_t n = 0) const {return mMetaData[n].gridType; } + + /// @brief Access to the GridData of the n'th grid in the current handle + /// @param n zero-based ID of the grid + /// @return Const pointer to the n'th GridData in the current handle + const GridData* gridData(uint32_t n = 0) const; + + /// @brief Returns a const point to the @a n'th grid meta data + /// @param n zero-based ID of the grid + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + const GridMetaData* gridMetaData(uint32_t n = 0) const; + + /// @brief Write a specific grid in this buffer to an output stream + /// @param os output stream that the buffer will be written to + /// @param n zero-based index of the grid to be written to stream + void write(std::ostream& os, uint32_t n) const { + if (const GridData* data = this->gridData(n)) { + os.write((const char*)data, data->mGridSize); + } else { + throw std::runtime_error("GridHandle does not contain a #" + std::to_string(n) + " grid"); + } + } + + /// @brief Write the entire grid buffer to an output stream + /// @param os output stream that the buffer will be written to + void write(std::ostream& os) const { + for (uint32_t n=0; ngridCount(); ++n) this->write(os, n); + } + + /// @brief Write this entire grid buffer to a file + /// @param fileName string name of the output file + void write(const std::string &fileName) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os); + } + + /// @brief Write a specific grid to file + /// @param fileName string name of the output file + /// @param n zero-based index of the grid to be written to file + void write(const std::string &fileName, uint32_t n) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os, n); + } + + /// @brief Read an entire raw grid buffer from an input stream + /// @param is input stream containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, uint32_t n, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid with the speficied name + void read(std::istream& is, const std::string &gridName, const BufferT& pool = BufferT()); + + /// @brief Read a raw grid buffer from a file + /// @param filename string name of the input file containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffe + void read(const std::string &fileName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid + void read(const std::string &fileName, uint32_t n, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, n, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid withe the specified name + void read(const std::string &fileName, const std::string &gridName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, gridName, pool); + } +}; // GridHandle + +// --------------------------> Implementation of private methods in GridHandle <------------------------------------ + +template +inline const GridData* GridHandle::gridData(uint32_t n) const +{ + const void *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return util::PtrAdd(data, mMetaData[n].offset); +}// const GridData* GridHandle::gridData(uint32_t n) const + +template +inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const +{ + const auto *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return util::PtrAdd(data, mMetaData[n].offset); +}// const GridMetaData* GridHandle::gridMetaData(uint32_t n) const + +inline __hostdev__ void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) +{ + uint64_t offset = 0; + for (auto *p=meta, *q=p+data->mGridCount; p!=q; ++p) { + *p = {offset, data->mGridSize, data->mGridType}; + offset += p->size; + data = util::PtrAdd(data, p->size); + } +}// void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(util::is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyGridHandleMeta(data, mMetaData.data()); + } +}// GridHandle::GridHandle(T&& buffer) + +template +template +inline GridHandle GridHandle::copy(const OtherBufferT& other) const +{ + if (mBuffer.isEmpty()) return GridHandle();// return an empty handle + auto buffer = OtherBufferT::create(mBuffer.size(), &other); + std::memcpy(buffer.data(), mBuffer.data(), mBuffer.size());// deep copy of buffer + return GridHandle(std::move(buffer)); +}// GridHandle GridHandle::copy(const OtherBufferT& other) const + +template +template +inline const NanoGrid* GridHandle::grid(uint32_t n) const +{ + const void *data = mBuffer.data(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); +}// const NanoGrid* GridHandle::grid(uint32_t n) const + +template +template +inline typename util::enable_if::hasDeviceDual, const NanoGrid*>::type +GridHandle::deviceGrid(uint32_t n) const +{ + const void *data = mBuffer.deviceData(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); +}// GridHandle::deviceGrid(uint32_t n) cons + +template +void GridHandle::read(std::istream& is, const BufferT& pool) +{ + GridData data; + is.read((char*)&data, sizeof(GridData)); + if (data.isValid()) { + uint64_t size = data.mGridSize, sum = 0u; + while(data.mGridIndex + 1u < data.mGridCount) {// loop over remaining raw grids in stream + is.seekg(data.mGridSize - sizeof(GridData), std::ios::cur);// skip grid + is.read((char*)&data, sizeof(GridData)); + sum += data.mGridSize; + } + auto buffer = BufferT::create(size + sum, &pool); + is.seekg(-int64_t(sum + sizeof(GridData)), std::ios::cur);// rewind to start + is.read((char*)(buffer.data()), buffer.size()); + *this = GridHandle(std::move(buffer)); + } else { + is.seekg(-sizeof(GridData), std::ios::cur);// rewind + throw std::logic_error("This stream does not contain a valid raw grid buffer"); + } +}// void GridHandle::read(std::istream& is, const BufferT& pool) + +template +void GridHandle::read(std::istream& is, uint32_t n, const BufferT& pool) +{ + GridData data; + is.read((char*)&data, sizeof(GridData)); + if (data.isValid()) { + if (n>=data.mGridCount) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); + while(data.mGridIndex != n) { + is.seekg(data.mGridSize - sizeof(GridData), std::ios::cur);// skip grid + is.read((char*)&data, sizeof(GridData)); + } + auto buffer = BufferT::create(data.mGridSize, &pool); + is.seekg(-sizeof(GridData), std::ios::cur);// rewind + is.read((char*)(buffer.data()), data.mGridSize); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + *this = GridHandle(std::move(buffer)); + } else { + is.seekg(-sizeof(GridData), std::ios::cur);// rewind sizeof(GridData) bytes to undo initial read + throw std::logic_error("This file does not contain a valid raw buffer"); + } +}// void GridHandle::read(std::istream& is, uint32_t n, const BufferT& pool) + +template +void GridHandle::read(std::istream& is, const std::string &gridName, const BufferT& pool) +{ + static const std::streamsize byteSize = sizeof(GridData); + GridData data; + is.read((char*)&data, byteSize); + is.seekg(-byteSize, std::ios::cur);// rewind + if (data.isValid()) { + uint32_t n = 0; + while(data.mGridName != gridName && n++ < data.mGridCount) { + is.seekg(data.mGridSize, std::ios::cur);// skip grid + is.read((char*)&data, byteSize);// read sizeof(GridData) bytes + is.seekg(-byteSize, std::ios::cur);// rewind + } + if (n>data.mGridCount) throw std::runtime_error("No raw grid named \""+gridName+"\""); + auto buffer = BufferT::create(data.mGridSize, &pool); + is.read((char*)(buffer.data()), data.mGridSize); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + *this = GridHandle(std::move(buffer)); + } else { + throw std::logic_error("This file does not contain a valid raw buffer"); + } +}// void GridHandle::read(std::istream& is, const std::string &gridName n, const BufferT& pool) + +// --------------------------> free-standing functions <------------------------------------ + +/// @brief Split all grids in a single GridHandle into a vector of multiple GridHandles each with a single grid +/// @tparam BufferT Type of the input and output grid buffers +/// @param handle GridHandle with grids that will be slip into individual GridHandles +/// @param pool optional pool used for allocation of output GridHandle +/// @return Vector of GridHandles each containing a single grid +template class VectorT = std::vector> +inline VectorT> +splitGrids(const GridHandle &handle, const BufferT* other = nullptr) +{ + using HandleT = GridHandle; + const void *ptr = handle.data(); + if (ptr == nullptr) return VectorT(); + VectorT handles(handle.gridCount()); + for (auto &h : handles) { + const GridData *src = reinterpret_cast(ptr); + NANOVDB_ASSERT(src->isValid()); + auto buffer = BufferT::create(src->mGridSize, other); + GridData *dst = reinterpret_cast(buffer.data()); + std::memcpy(dst, src, src->mGridSize); + tools::updateGridCount(dst, 0u, 1u); + h = HandleT(std::move(buffer)); + ptr = util::PtrAdd(ptr, src->mGridSize); + } + return std::move(handles); +}// splitGrids + +/// @brief Combines (or merges) multiple GridHandles into a single GridHandle containing all grids +/// @tparam BufferT Type of the input and output grid buffers +/// @param handles Vector of GridHandles to be combined +/// @param pool optional pool used for allocation of output GridHandle +/// @return single GridHandle containing all input grids +template class VectorT> +inline GridHandle +mergeGrids(const VectorT> &handles, const BufferT* pool = nullptr) +{ + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; n(dst); + NANOVDB_ASSERT(data->isValid()); + tools::updateGridCount(data, counter++, gridCount); + dst = util::PtrAdd(dst, data->mGridSize); + src = util::PtrAdd(src, data->mGridSize); + } + } + return GridHandle(std::move(buffer)); +}// mergeGrids + +} // namespace nanovdb + +#if defined(__CUDACC__) +#include +#endif// defined(__CUDACC__) + +#endif // NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/HostBuffer.h b/external/nanovdb/HostBuffer.h new file mode 100644 index 00000000..70c9ce0f --- /dev/null +++ b/external/nanovdb/HostBuffer.h @@ -0,0 +1,590 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + @file nanovdb/HostBuffer.h + + @date April 20, 2021 + + @brief HostBuffer - a buffer that contains a shared or private bump + pool to either externally or internally managed host memory. + + @details This HostBuffer can be used in multiple ways, most of which are + demonstrated in the examples below. Memory in the pool can + be managed or unmanged (e.g. internal or external) and can + be shared between multiple buffers or belong to a single buffer. + + Example that uses HostBuffer::create inside io::readGrids to create a + full self-managed buffer, i.e. not shared and without padding, per grid in the file. + @code + auto handles = nanovdb::io::readGrids("file.nvdb"); + @endcode + + Example that uses HostBuffer::createFull. Assuming you have a raw pointer + to a NanoVDB grid of unknown type, this examples shows how to create its + GridHandle which can be used to enquire about the grid type and meta data. + @code + void *data;// pointer to a NanoVDB grid of unknown type + uint64_t size;// byte size of NanoVDB grid of unknown type + auto buffer = nanovdb::HostBuffer::createFull(size, data); + nanovdb::GridHandle<> gridHandle(std::move(buffer)); + @endcode + + Example that uses HostBuffer::createPool for internally managed host memory. + Suppose you want to read multiple grids in multiple files, but reuse the same + fixed sized memory buffer to both avoid memory fragmentation as well as + exceeding the fixed memory ceiling! + @code + auto pool = nanovdb::HostBuffer::createPool(1 << 30);// 1 GB memory pool + std::vector> frames;// vector of grid names + for (int i=0; i array(new char[size + NANOVDB_DATA_ALIGNMENT]);// scoped pool of 1 GB with padding + void *buffer = nanovdb::alignPtr(array.get());// 32B aligned buffer + auto pool = nanovdb::HostBuffer::createPool(poolSize, buffer); + auto handles = nanovdb::io::readGrids("file.nvdb", 0, pool); + @endcode +*/ + +#ifndef NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED + +#include // for NANOVDB_DATA_ALIGNMENT; +#include // for types like int32_t etc +#include // for fprintf +#include // for std::malloc/std::realloc/std::free +#include // for std::make_shared +#include // for std::mutex +#include // for std::unordered_set +#include // for assert +#include // for std::stringstream +#include // for memcpy + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +namespace nanovdb { + +template +struct BufferTraits +{ + static constexpr bool hasDeviceDual = false; +}; + +// ----------------------------> HostBuffer <-------------------------------------- + +/// @brief This is a buffer that contains a shared or private pool +/// to either externally or internally managed host memory. +/// +/// @note Terminology: +/// Pool: 0 = buffer.size() < buffer.poolSize() +/// Buffer: 0 < buffer.size() < buffer.poolSize() +/// Full: 0 < buffer.size() = buffer.poolSize() +/// Empty: 0 = buffer.size() = buffer.poolSize() +class HostBuffer +{ + struct Pool;// forward declaration of private pool struct + std::shared_ptr mPool; + uint64_t mSize; // total number of bytes for the NanoVDB grid. + void* mData; // raw buffer for the NanoVDB grid. + +#if defined(DEBUG) || defined(_DEBUG) + static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { + fprintf(stderr, "Alignment pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + } +#else + static inline void ptrAssert(void*, const char*, const char*, int, bool = true) + { + } +#endif + +public: + /// @brief Return a full buffer or an empty buffer + HostBuffer(uint64_t bufferSize = 0); + + /// @brief Move copy-constructor + HostBuffer(HostBuffer&& other); + + /// @brief Custom descructor + ~HostBuffer() { this->clear(); } + + /// @brief Move copy assignment operation + HostBuffer& operator=(HostBuffer&& other); + + /// @brief Disallow copy-construction + HostBuffer(const HostBuffer&) = delete; + + /// @brief Disallow copy assignment operation + HostBuffer& operator=(const HostBuffer&) = delete; + + /// @brief Return a pool buffer which satisfies: buffer.size == 0, + /// buffer.poolSize() == poolSize, and buffer.data() == nullptr. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If poolSize is zero. + static HostBuffer createPool(uint64_t poolSize, void *data = nullptr); + + /// @brief Return a full buffer which satisfies: buffer.size == bufferSize, + /// buffer.poolSize() == bufferSize, and buffer.data() == data. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If bufferSize is zero. + static HostBuffer createFull(uint64_t bufferSize, void *data = nullptr); + + /// @brief Return a buffer with @c bufferSize bytes managed by + /// the specified memory @c pool. If none is provided, i.e. + /// @c pool == nullptr or @c pool->poolSize() == 0, one is + /// created with size @c bufferSize, i.e. a full buffer is returned. + /// + /// @throw If the specified @c pool has insufficient memory for + /// the requested buffer size. + static HostBuffer create(uint64_t bufferSize, const HostBuffer* pool = nullptr); + + /// @brief Initialize as a full buffer with the specified size. If data is NULL + /// the memory is internally allocated. + void init(uint64_t bufferSize, void *data = nullptr); + + //@{ + /// @brief Retuns a pointer to the raw memory buffer managed by this allocator. + /// + /// @warning Note that the pointer can be NULL if the allocator was not initialized! + const void* data() const { return mData; } + void* data() { return mData; } + //@} + + //@{ + /// @brief Returns the size in bytes associated with this buffer. + uint64_t bufferSize() const { return mSize; } + uint64_t size() const { return this->bufferSize(); } + //@} + + /// @brief Returns the size in bytes of the memory pool shared with this instance. + uint64_t poolSize() const; + + /// @brief Return true if memory is managed (using std::malloc and std:free) by the + /// shared pool in this buffer. Else memory is assumed to be managed externally. + bool isManaged() const; + + //@{ + /// @brief Returns true if this buffer has no memory associated with it + bool isEmpty() const { return !mPool || mSize == 0 || mData == nullptr; } + bool empty() const { return this->isEmpty(); } + //@} + + /// @brief Return true if this is a pool, i.e. an empty buffer with a nonempty + /// internal pool, i.e. this->size() == 0 and this->poolSize() != 0 + bool isPool() const { return mSize == 0 && this->poolSize() > 0; } + + /// @brief Return true if the pool exists, is nonempty but has no more available memory + bool isFull() const; + + /// @brief Clear this buffer so it is empty. + void clear(); + + /// @brief Clears all existing buffers that are registered against the memory pool + /// and resets the pool so it can be reused to create new buffers. + /// + /// @throw If this instance is not empty or contains no pool. + /// + /// @warning This method is not thread-safe! + void reset(); + + /// @brief Total number of bytes from the pool currently in use by buffers + uint64_t poolUsage() const; + + /// @brief resize the pool size. It will attempt to resize the existing + /// memory block, but if that fails a deep copy is performed. + /// If @c data is not NULL it will be used as new externally + /// managed memory for the pool. All registered buffers are + /// updated so GridHandle::grid might return a new address (if + /// deep copy was performed). + /// + /// @note This method can be use to resize the memory pool and even + /// change it from internally to externally managed memory or vice versa. + /// + /// @throw if @c poolSize is less than this->poolUsage() the used memory + /// or allocations fail. + void resizePool(uint64_t poolSize, void *data = nullptr); + +}; // HostBuffer class + +// --------------------------> Implementation of HostBuffer::Pool <------------------------------------ + +// This is private struct of HostBuffer so you can safely ignore the API +struct HostBuffer::Pool +{ + using HashTableT = std::unordered_set; + std::mutex mMutex; // mutex for updating mRegister and mFree + HashTableT mRegister; + void *mData, *mFree; + uint64_t mSize, mPadding; + bool mManaged; + + /// @brief External memory ctor + Pool(uint64_t size = 0, void* data = nullptr) + : mData(data) + , mFree(mData) + , mSize(size) + , mPadding(0) + , mManaged(data == nullptr) + { + if (mManaged) { + mData = Pool::alloc(mSize); + if (mData == nullptr) throw std::runtime_error("Pool::Pool malloc failed"); + } + mPadding = alignmentPadding(mData); + if (!mManaged && mPadding != 0) { + throw std::runtime_error("Pool::Pool: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + + " bytes.\nHint: use nanovdb::alignPtr or std::aligned_alloc (C++17 only)"); + } + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Custom destructor + ~Pool() + { + assert(mRegister.empty()); + if (mManaged) std::free(mData); + } + + /// @brief Disallow copy-construction + Pool(const Pool&) = delete; + + /// @brief Disallow move-construction + Pool(const Pool&&) = delete; + + /// @brief Disallow copy assignment operation + Pool& operator=(const Pool&) = delete; + + /// @brief Disallow move assignment operation + Pool& operator=(const Pool&&) = delete; + + /// @brief Return the total number of bytes used from this Pool by buffers + uint64_t usage() const { return util::PtrDiff(mFree, mData) - mPadding; } + + /// @brief Allocate a buffer of the specified size and add it to the register + void add(HostBuffer* buffer, uint64_t size) + { + void *alignedFree = util::PtrAdd(mFree, alignmentPadding(mFree)); + + if (util::PtrAdd(alignedFree, size) > util::PtrAdd(mData, mPadding + mSize)) { + std::stringstream ss; + ss << "HostBuffer::Pool: insufficient memory\n" + << "\tA buffer requested " << size << " bytes with " << NANOVDB_DATA_ALIGNMENT + << "-bytes alignment from a pool with " + << mSize << " bytes of which\n\t" << (util::PtrDiff(alignedFree, mData) - mPadding) + << " bytes are used by " << mRegister.size() << " other buffer(s). " + << "Pool is " << (mManaged ? "internally" : "externally") << " managed.\n"; + //std::cerr << ss.str(); + throw std::runtime_error(ss.str()); + } + buffer->mSize = size; + const std::lock_guard lock(mMutex); + mRegister.insert(buffer); + buffer->mData = alignedFree; + mFree = util::PtrAdd(alignedFree, size); + } + + /// @brief Remove the specified buffer from the register + void remove(HostBuffer *buffer) + { + const std::lock_guard lock(mMutex); + mRegister.erase(buffer); + } + + /// @brief Replaces buffer1 with buffer2 in the register + void replace(HostBuffer *buffer1, HostBuffer *buffer2) + { + const std::lock_guard lock(mMutex); + mRegister.erase( buffer1); + mRegister.insert(buffer2); + } + + /// @brief Reset the register and all its buffers + void reset() + { + for (HostBuffer *buffer : mRegister) { + buffer->mPool.reset(); + buffer->mSize = 0; + buffer->mData = nullptr; + } + mRegister.clear(); + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Resize this Pool and update registered buffers as needed. If data is no NULL + /// it is used as externally managed memory. + void resize(uint64_t size, void *data = nullptr) + { + const uint64_t memUsage = this->usage(); + + const bool managed = (data == nullptr); + + if (!managed && alignmentPadding(data) != 0) { + throw std::runtime_error("Pool::resize: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + " bytes"); + } + + if (memUsage > size) { + throw std::runtime_error("Pool::resize: insufficient memory"); + } + + uint64_t padding = 0; + if (mManaged && managed && size != mSize) { // managed -> managed + padding = mPadding; + data = Pool::realloc(mData, memUsage, size, padding); // performs both copy and free of mData + } else if (!mManaged && managed) { // un-managed -> managed + data = Pool::alloc(size); + padding = alignmentPadding(data); + } + + if (data == nullptr) { + throw std::runtime_error("Pool::resize: allocation failed"); + } else if (data != mData) { + void* paddedData = util::PtrAdd(data, padding); + + if (!(mManaged && managed)) { // no need to copy if managed -> managed + memcpy(paddedData, util::PtrAdd(mData, mPadding), memUsage); + } + + for (HostBuffer* buffer : mRegister) { // update registered buffers + //buffer->mData = paddedData + ptrdiff_t(buffer->mData - (mData + mPadding)); + buffer->mData = util::PtrAdd(paddedData, util::PtrDiff(buffer->mData, util::PtrAdd(mData, mPadding))); + } + mFree = util::PtrAdd(paddedData, memUsage); // update the free pointer + if (mManaged && !managed) {// only free if managed -> un-managed + std::free(mData); + } + + mData = data; + mPadding = padding; + } + mSize = size; + mManaged = managed; + } + /// @brief Return true is all the memory in this pool is in use. + bool isFull() const + { + assert(mFree <= util::PtrAdd(mData, mPadding + mSize)); + return mSize > 0 ? mFree == util::PtrAdd(mData, mPadding + mSize) : false; + } + +private: + + static void* alloc(uint64_t size) + { +//#if (__cplusplus >= 201703L) +// return std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, size);//C++17 or newer +//#else + // make sure we alloc enough space to align the result + return std::malloc(size + NANOVDB_DATA_ALIGNMENT); +//#endif + } + + static void* realloc(void* const origData, + uint64_t origSize, + uint64_t desiredSize, + uint64_t& padding) + { + // make sure we alloc enough space to align the result + void* data = std::realloc(origData, desiredSize + NANOVDB_DATA_ALIGNMENT); + + if (data != nullptr && data != origData) { + uint64_t newPadding = alignmentPadding(data); + // Number of padding bytes may have changed -- move data if that's the case + if (newPadding != padding) { + // Realloc should not happen when shrinking down buffer, but let's be safe + std::memmove(util::PtrAdd(data, newPadding), + util::PtrAdd(data, padding), + math::Min(origSize, desiredSize)); + padding = newPadding; + } + } + + return data; + } + +};// struct HostBuffer::Pool + +// --------------------------> Implementation of HostBuffer <------------------------------------ + +inline HostBuffer::HostBuffer(uint64_t size) : mPool(nullptr), mSize(size), mData(nullptr) +{ + if (size>0) { + mPool = std::make_shared(size); + mData = mPool->mFree; + mPool->mRegister.insert(this); + mPool->mFree = util::PtrAdd(mPool->mFree, size); + } +} + +inline HostBuffer::HostBuffer(HostBuffer&& other) : mPool(other.mPool), mSize(other.mSize), mData(other.mData) +{ + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; +} + +inline void HostBuffer::init(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + if (mPool) { + mPool.reset(); + } + if (!mPool || mPool->mSize != bufferSize) { + mPool = std::make_shared(bufferSize, data); + } + mPool->add(this, bufferSize); +} + +inline HostBuffer& HostBuffer::operator=(HostBuffer&& other) +{ + if (mPool) { + mPool->remove(this); + } + mPool = other.mPool; + mSize = other.mSize; + mData = other.mData; + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; + return *this; +} + +inline uint64_t HostBuffer::poolSize() const +{ + return mPool ? mPool->mSize : 0u; +} + +inline uint64_t HostBuffer::poolUsage() const +{ + return mPool ? mPool->usage(): 0u; +} + +inline bool HostBuffer::isManaged() const +{ + return mPool ? mPool->mManaged : false; +} + +inline bool HostBuffer::isFull() const +{ + return mPool ? mPool->isFull() : false; +} + +inline HostBuffer HostBuffer::createPool(uint64_t poolSize, void *data) +{ + if (poolSize == 0) { + throw std::runtime_error("HostBuffer: invalid pool size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(poolSize, data); + // note the buffer is NOT registered by its pool since it is not using its memory + buffer.mSize = 0; + buffer.mData = nullptr; + return buffer; +} + +inline HostBuffer HostBuffer::createFull(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(bufferSize, data); + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline HostBuffer HostBuffer::create(uint64_t bufferSize, const HostBuffer* pool) +{ + HostBuffer buffer; + if (pool == nullptr || !pool->mPool) { + buffer.mPool = std::make_shared(bufferSize); + } else { + buffer.mPool = pool->mPool; + } + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline void HostBuffer::clear() +{ + if (mPool) {// remove self from the buffer register in the pool + mPool->remove(this); + } + mPool.reset(); + mSize = 0; + mData = nullptr; +} + +inline void HostBuffer::reset() +{ + if (this->size()>0) { + throw std::runtime_error("HostBuffer: only empty buffers can call reset"); + } + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to reset"); + } + mPool->reset(); +} + +inline void HostBuffer::resizePool(uint64_t size, void *data) +{ + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to resize"); + } + mPool->resize(size, data); +} + +} // namespace nanovdb + +#endif // end of NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/NanoVDB.h b/external/nanovdb/NanoVDB.h new file mode 100644 index 00000000..36b412b0 --- /dev/null +++ b/external/nanovdb/NanoVDB.h @@ -0,0 +1,6624 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/NanoVDB.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Implements a light-weight self-contained VDB data-structure in a + single file! In other words, this is a significantly watered-down + version of the OpenVDB implementation, with few dependencies - so + a one-stop-shop for a minimalistic VDB data structure that run on + most platforms! + + \note It is important to note that NanoVDB (by design) is a read-only + sparse GPU (and CPU) friendly data structure intended for applications + like rendering and collision detection. As such it obviously lacks + a lot of the functionality and features of OpenVDB grids. NanoVDB + is essentially a compact linearized (or serialized) representation of + an OpenVDB tree with getValue methods only. For best performance use + the ReadAccessor::getValue method as opposed to the Tree::getValue + method. Note that since a ReadAccessor caches previous access patterns + it is by design not thread-safe, so use one instantiation per thread + (it is very light-weight). Also, it is not safe to copy accessors between + the GPU and CPU! In fact, client code should only interface + with the API of the Grid class (all other nodes of the NanoVDB data + structure can safely be ignored by most client codes)! + + + \warning NanoVDB grids can only be constructed via tools like createNanoGrid + or the GridBuilder. This explains why none of the grid nodes defined below + have public constructors or destructors. + + \details Please see the following paper for more details on the data structure: + K. Museth, “VDB: High-Resolution Sparse Volumes with Dynamic Topology”, + ACM Transactions on Graphics 32(3), 2013, which can be found here: + http://www.museth.org/Ken/Publications_files/Museth_TOG13.pdf + + NanoVDB was first published there: https://dl.acm.org/doi/fullHtml/10.1145/3450623.3464653 + + + Overview: This file implements the following fundamental class that when combined + forms the backbone of the VDB tree data structure: + + Coord- a signed integer coordinate + Vec3 - a 3D vector + Vec4 - a 4D vector + BBox - a bounding box + Mask - a bitmask essential to the non-root tree nodes + Map - an affine coordinate transformation + Grid - contains a Tree and a map for world<->index transformations. Use + this class as the main API with client code! + Tree - contains a RootNode and getValue methods that should only be used for debugging + RootNode - the top-level node of the VDB data structure + InternalNode - the internal nodes of the VDB data structure + LeafNode - the lowest level tree nodes that encode voxel values and state + ReadAccessor - implements accelerated random access operations + + Semantics: A VDB data structure encodes values and (binary) states associated with + signed integer coordinates. Values encoded at the leaf node level are + denoted voxel values, and values associated with other tree nodes are referred + to as tile values, which by design cover a larger coordinate index domain. + + + Memory layout: + + It's important to emphasize that all the grid data (defined below) are explicitly 32 byte + aligned, which implies that any memory buffer that contains a NanoVDB grid must also be at + 32 byte aligned. That is, the memory address of the beginning of a buffer (see ascii diagram below) + must be divisible by 32, i.e. uintptr_t(&buffer)%32 == 0! If this is not the case, the C++ standard + says the behaviour is undefined! Normally this is not a concerns on GPUs, because they use 256 byte + aligned allocations, but the same cannot be said about the CPU. + + GridData is always at the very beginning of the buffer immediately followed by TreeData! + The remaining nodes and blind-data are allowed to be scattered throughout the buffer, + though in practice they are arranged as: + + GridData: 672 bytes (e.g. magic, checksum, major, flags, index, count, size, name, map, world bbox, voxel size, class, type, offset, count) + + TreeData: 64 bytes (node counts and byte offsets) + + ... optional padding ... + + RootData: size depends on ValueType (index bbox, voxel count, tile count, min/max/avg/standard deviation) + + Array of: RootData::Tile + + ... optional padding ... + + Array of: Upper InternalNodes of size 32^3: bbox, two bit masks, 32768 tile values, and min/max/avg/standard deviation values + + ... optional padding ... + + Array of: Lower InternalNodes of size 16^3: bbox, two bit masks, 4096 tile values, and min/max/avg/standard deviation values + + ... optional padding ... + + Array of: LeafNodes of size 8^3: bbox, bit masks, 512 voxel values, and min/max/avg/standard deviation values + + + Notation: "]---[" implies it has optional padding, and "][" implies zero padding + + [GridData(672B)][TreeData(64B)]---[RootData][N x Root::Tile]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + ^ ^ ^ ^ ^ ^ + | | | | | | + +-- Start of 32B aligned buffer | | | | +-- Node0::DataType* leafData + GridType::DataType* gridData | | | | + | | | +-- Node1::DataType* lowerData + RootType::DataType* rootData --+ | | + | +-- Node2::DataType* upperData + | + +-- RootType::DataType::Tile* tile + +*/ + +#ifndef NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED + +// The following two header files are the only mandatory dependencies +#include // for __hostdev__ and lots of other utility functions +#include // for Coord, BBox, Vec3, Vec4 etc + +// Do not change this value! 32 byte alignment is fixed in NanoVDB +#define NANOVDB_DATA_ALIGNMENT 32 + +// NANOVDB_MAGIC_NUMB is currently used for both grids and files (starting with v32.6.0) +// NANOVDB_MAGIC_GRID will soon be used exclusively for grids (serialized to a single buffer) +// NANOVDB_MAGIC_FILE will soon be used exclusively for files +// NANOVDB_MAGIC_NODE will soon be used exclusively for NodeManager +// NANOVDB_MAGIC_FRAG will soon be used exclusively for a fragmented grid, i.e. a grid that is not serialized +// | : 0 in 30 corresponds to 0 in NanoVDB0 +#define NANOVDB_MAGIC_NUMB 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_GRID 0x314244566f6e614eUL // "NanoVDB1" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FILE 0x324244566f6e614eUL // "NanoVDB2" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_NODE 0x334244566f6e614eUL // "NanoVDB3" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FRAG 0x344244566f6e614eUL // "NanoVDB4" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_MASK 0x00FFFFFFFFFFFFFFUL // use this mask to remove the number + +//#define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL +//#define NANOVDB_USE_NEW_MAGIC_NUMBERS// used to enable use of the new magic numbers described above + +#define NANOVDB_MAJOR_VERSION_NUMBER 32 // reflects changes to the ABI and hence also the file format +#define NANOVDB_MINOR_VERSION_NUMBER 7 // reflects changes to the API but not ABI +#define NANOVDB_PATCH_VERSION_NUMBER 0 // reflects changes that does not affect the ABI or API + +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 + +// This replaces a Coord key at the root level with a single uint64_t +#define NANOVDB_USE_SINGLE_ROOT_KEY + +// This replaces three levels of Coord keys in the ReadAccessor with one Coord +//#define NANOVDB_USE_SINGLE_ACCESSOR_KEY + +// Use this to switch between std::ofstream or FILE implementations +//#define NANOVDB_USE_IOSTREAMS + +// Use this to switch between old and new accessor methods +#define NANOVDB_NEW_ACCESSOR_METHODS + +#define NANOVDB_FPN_BRANCHLESS + +#if !defined(NANOVDB_ALIGN) +#define NANOVDB_ALIGN(n) alignas(n) +#endif // !defined(NANOVDB_ALIGN) + +namespace nanovdb {// ================================================================= + +// --------------------------> Build types <------------------------------------ + +/// @brief Dummy type for a voxel whose value equals an offset into an external value array +class ValueIndex{}; + +/// @brief Dummy type for a voxel whose value equals an offset into an external value array of active values +class ValueOnIndex{}; + +/// @brief Like @c ValueIndex but with a mutable mask +class ValueIndexMask{}; + +/// @brief Like @c ValueOnIndex but with a mutable mask +class ValueOnIndexMask{}; + +/// @brief Dummy type for a voxel whose value equals its binary active state +class ValueMask{}; + +/// @brief Dummy type for a 16 bit floating point values (placeholder for IEEE 754 Half) +class Half{}; + +/// @brief Dummy type for a 4bit quantization of float point values +class Fp4{}; + +/// @brief Dummy type for a 8bit quantization of float point values +class Fp8{}; + +/// @brief Dummy type for a 16bit quantization of float point values +class Fp16{}; + +/// @brief Dummy type for a variable bit quantization of floating point values +class FpN{}; + +/// @brief Dummy type for indexing points into voxels +class Point{}; + +// --------------------------> GridType <------------------------------------ + +/// @brief return the number of characters (including null termination) required to convert enum type to a string +template +__hostdev__ inline constexpr uint32_t strlen(){return (uint32_t)EnumT::StrLen - (uint32_t)EnumT::End;} + +/// @brief List of types that are currently supported by NanoVDB +/// +/// @note To expand on this list do: +/// 1) Add the new type between Unknown and End in the enum below +/// 2) Add the new type to OpenToNanoVDB::processGrid that maps OpenVDB types to GridType +/// 3) Verify that the ConvertTrait in NanoToOpenVDB.h works correctly with the new type +/// 4) Add the new type to toGridType (defined below) that maps NanoVDB types to GridType +/// 5) Add the new type to toStr (defined below) +enum class GridType : uint32_t { Unknown = 0, // unknown value type - should rarely be used + Float = 1, // single precision floating point value + Double = 2, // double precision floating point value + Int16 = 3, // half precision signed integer value + Int32 = 4, // single precision signed integer value + Int64 = 5, // double precision signed integer value + Vec3f = 6, // single precision floating 3D vector + Vec3d = 7, // double precision floating 3D vector + Mask = 8, // no value, just the active state + Half = 9, // half precision floating point value (placeholder for IEEE 754 Half) + UInt32 = 10, // single precision unsigned integer value + Boolean = 11, // boolean value, encoded in bit array + RGBA8 = 12, // RGBA packed into 32bit word in reverse-order, i.e. R is lowest byte. + Fp4 = 13, // 4bit quantization of floating point value + Fp8 = 14, // 8bit quantization of floating point value + Fp16 = 15, // 16bit quantization of floating point value + FpN = 16, // variable bit quantization of floating point value + Vec4f = 17, // single precision floating 4D vector + Vec4d = 18, // double precision floating 4D vector + Index = 19, // index into an external array of active and inactive values + OnIndex = 20, // index into an external array of active values + IndexMask = 21, // like Index but with a mutable mask + OnIndexMask = 22, // like OnIndex but with a mutable mask + PointIndex = 23, // voxels encode indices to co-located points + Vec3u8 = 24, // 8bit quantization of floating point 3D vector (only as blind data) + Vec3u16 = 25, // 16bit quantization of floating point 3D vector (only as blind data) + UInt8 = 26, // 8 bit unsigned integer values (eg 0 -> 255 gray scale) + End = 27,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 12};// this entry is used to determine the minimum size of c-string + +/// @brief Maps a GridType to a c-string +/// @param dst destination string of size 12 or larger +/// @param gridType GridType enum to be mapped to a string +/// @return Retuns a c-string used to describe a GridType +__hostdev__ inline char* toStr(char *dst, GridType gridType) +{ + switch (gridType){ + case GridType::Unknown: return util::strcpy(dst, "?"); + case GridType::Float: return util::strcpy(dst, "float"); + case GridType::Double: return util::strcpy(dst, "double"); + case GridType::Int16: return util::strcpy(dst, "int16"); + case GridType::Int32: return util::strcpy(dst, "int32"); + case GridType::Int64: return util::strcpy(dst, "int64"); + case GridType::Vec3f: return util::strcpy(dst, "Vec3f"); + case GridType::Vec3d: return util::strcpy(dst, "Vec3d"); + case GridType::Mask: return util::strcpy(dst, "Mask"); + case GridType::Half: return util::strcpy(dst, "Half"); + case GridType::UInt32: return util::strcpy(dst, "uint32"); + case GridType::Boolean: return util::strcpy(dst, "bool"); + case GridType::RGBA8: return util::strcpy(dst, "RGBA8"); + case GridType::Fp4: return util::strcpy(dst, "Float4"); + case GridType::Fp8: return util::strcpy(dst, "Float8"); + case GridType::Fp16: return util::strcpy(dst, "Float16"); + case GridType::FpN: return util::strcpy(dst, "FloatN"); + case GridType::Vec4f: return util::strcpy(dst, "Vec4f"); + case GridType::Vec4d: return util::strcpy(dst, "Vec4d"); + case GridType::Index: return util::strcpy(dst, "Index"); + case GridType::OnIndex: return util::strcpy(dst, "OnIndex"); + case GridType::IndexMask: return util::strcpy(dst, "IndexMask"); + case GridType::OnIndexMask: return util::strcpy(dst, "OnIndexMask"); + case GridType::PointIndex: return util::strcpy(dst, "PointIndex"); + case GridType::Vec3u8: return util::strcpy(dst, "Vec3u8"); + case GridType::Vec3u16: return util::strcpy(dst, "Vec3u16"); + case GridType::UInt8: return util::strcpy(dst, "uint8"); + default: return util::strcpy(dst, "End"); + } +} + +// --------------------------> GridClass <------------------------------------ + +/// @brief Classes (superset of OpenVDB) that are currently supported by NanoVDB +enum class GridClass : uint32_t { Unknown = 0, + LevelSet = 1, // narrow band level set, e.g. SDF + FogVolume = 2, // fog volume, e.g. density + Staggered = 3, // staggered MAC grid, e.g. velocity + PointIndex = 4, // point index grid + PointData = 5, // point data grid + Topology = 6, // grid with active states only (no values) + VoxelVolume = 7, // volume of geometric cubes, e.g. colors cubes in Minecraft + IndexGrid = 8, // grid whose values are offsets, e.g. into an external array + TensorGrid = 9, // Index grid for indexing learnable tensor features + End = 10,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 7};// this entry is used to determine the minimum size of c-string + + +/// @brief Retuns a c-string used to describe a GridClass +/// @param dst destination string of size 7 or larger +/// @param gridClass GridClass enum to be converted to a string +__hostdev__ inline char* toStr(char *dst, GridClass gridClass) +{ + switch (gridClass){ + case GridClass::Unknown: return util::strcpy(dst, "?"); + case GridClass::LevelSet: return util::strcpy(dst, "SDF"); + case GridClass::FogVolume: return util::strcpy(dst, "FOG"); + case GridClass::Staggered: return util::strcpy(dst, "MAC"); + case GridClass::PointIndex: return util::strcpy(dst, "PNTIDX"); + case GridClass::PointData: return util::strcpy(dst, "PNTDAT"); + case GridClass::Topology: return util::strcpy(dst, "TOPO"); + case GridClass::VoxelVolume: return util::strcpy(dst, "VOX"); + case GridClass::IndexGrid: return util::strcpy(dst, "INDEX"); + case GridClass::TensorGrid: return util::strcpy(dst, "TENSOR"); + default: return util::strcpy(dst, "END"); + } +} + +// --------------------------> GridFlags <------------------------------------ + +/// @brief Grid flags which indicate what extra information is present in the grid buffer. +enum class GridFlags : uint32_t { + HasLongGridName = 1 << 0, // grid name is longer than 256 characters + HasBBox = 1 << 1, // nodes contain bounding-boxes of active values + HasMinMax = 1 << 2, // nodes contain min/max of active values + HasAverage = 1 << 3, // nodes contain averages of active values + HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values + IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory + End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags + StrLen = End + 23,// this entry is used to determine the minimum size of c-string +}; + +/// @brief Retuns a c-string used to describe a GridFlags +/// @param dst destination string of size 23 or larger +/// @param gridFlags GridFlags enum to be converted to a string +__hostdev__ inline const char* toStr(char *dst, GridFlags gridFlags) +{ + switch (gridFlags){ + case GridFlags::HasLongGridName: return util::strcpy(dst, "has long grid name"); + case GridFlags::HasBBox: return util::strcpy(dst, "has bbox"); + case GridFlags::HasMinMax: return util::strcpy(dst, "has min/max"); + case GridFlags::HasAverage: return util::strcpy(dst, "has average"); + case GridFlags::HasStdDeviation: return util::strcpy(dst, "has standard deviation"); + case GridFlags::IsBreadthFirst: return util::strcpy(dst, "is breadth-first"); + default: return util::strcpy(dst, "end"); + } +} + +// --------------------------> MagicType <------------------------------------ + +/// @brief Enums used to identify magic numbers recognized by NanoVDB +enum class MagicType : uint32_t { Unknown = 0,// first 64 bits are neither of the cases below + OpenVDB = 1,// first 32 bits = 0x56444220UL + NanoVDB = 2,// first 64 bits = NANOVDB_MAGIC_NUMB + NanoGrid = 3,// first 64 bits = NANOVDB_MAGIC_GRID + NanoFile = 4,// first 64 bits = NANOVDB_MAGIC_FILE + NanoNode = 5,// first 64 bits = NANOVDB_MAGIC_NODE + NanoFrag = 6,// first 64 bits = NANOVDB_MAGIC_FRAG + End = 7, + StrLen = End + 25};// this entry is used to determine the minimum size of c-string + +/// @brief maps 64 bits of magic number to enum +__hostdev__ inline MagicType toMagic(uint64_t magic) +{ + switch (magic){ + case NANOVDB_MAGIC_NUMB: return MagicType::NanoVDB; + case NANOVDB_MAGIC_GRID: return MagicType::NanoGrid; + case NANOVDB_MAGIC_FILE: return MagicType::NanoFile; + case NANOVDB_MAGIC_NODE: return MagicType::NanoNode; + case NANOVDB_MAGIC_FRAG: return MagicType::NanoFrag; + default: return (magic & ~uint32_t(0)) == 0x56444220UL ? MagicType::OpenVDB : MagicType::Unknown; + } +} + +/// @brief print 64-bit magic number to string +/// @param dst destination string of size 25 or larger +/// @param magic 64 bit magic number to be printed +/// @return return destination string @c dst +__hostdev__ inline char* toStr(char *dst, MagicType magic) +{ + switch (magic){ + case MagicType::Unknown: return util::strcpy(dst, "unknown"); + case MagicType::NanoVDB: return util::strcpy(dst, "nanovdb"); + case MagicType::NanoGrid: return util::strcpy(dst, "nanovdb::Grid"); + case MagicType::NanoFile: return util::strcpy(dst, "nanovdb::File"); + case MagicType::NanoNode: return util::strcpy(dst, "nanovdb::NodeManager"); + case MagicType::NanoFrag: return util::strcpy(dst, "fragmented nanovdb::Grid"); + case MagicType::OpenVDB: return util::strcpy(dst, "openvdb"); + default: return util::strcpy(dst, "end"); + } +} + +// --------------------------> PointType enums <------------------------------------ + +// Define the type used when the points are encoded as blind data in the output grid +enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Point + PointID = 1,// linear index of type uint32_t to points + World64 = 2,// Vec3d in world space + World32 = 3,// Vec3f in world space + Grid64 = 4,// Vec3d in grid space + Grid32 = 5,// Vec3f in grid space + Voxel32 = 6,// Vec3f in voxel space + Voxel16 = 7,// Vec3u16 in voxel space + Voxel8 = 8,// Vec3u8 in voxel space + Default = 9,// output matches input, i.e. Vec3d or Vec3f in world space + End =10 }; + +// --------------------------> GridBlindData enums <------------------------------------ + +/// @brief Blind-data Classes that are currently supported by NanoVDB +enum class GridBlindDataClass : uint32_t { Unknown = 0, + IndexArray = 1, + AttributeArray = 2, + GridName = 3, + ChannelArray = 4, + End = 5 }; + +/// @brief Blind-data Semantics that are currently understood by NanoVDB +enum class GridBlindDataSemantic : uint32_t { Unknown = 0, + PointPosition = 1, // 3D coordinates in an unknown space + PointColor = 2, + PointNormal = 3, + PointRadius = 4, + PointVelocity = 5, + PointId = 6, + WorldCoords = 7, // 3D coordinates in world space, e.g. (0.056, 0.8, 1,8) + GridCoords = 8, // 3D coordinates in grid space, e.g. (1.2, 4.0, 5.7), aka index-space + VoxelCoords = 9, // 3D coordinates in voxel space, e.g. (0.2, 0.0, 0.7) + End = 10 }; + +// --------------------------> BuildTraits <------------------------------------ + +/// @brief Define static boolean tests for template build types +template +struct BuildTraits +{ + // check if T is an index type + static constexpr bool is_index = util::is_same::value; + static constexpr bool is_onindex = util::is_same::value; + static constexpr bool is_offindex = util::is_same::value; + static constexpr bool is_indexmask = util::is_same::value; + // check if T is a compressed float type with fixed bit precision + static constexpr bool is_FpX = util::is_same::value; + // check if T is a compressed float type with fixed or variable bit precision + static constexpr bool is_Fp = util::is_same::value; + // check if T is a POD float type, i.e float or double + static constexpr bool is_float = util::is_floating_point::value; + // check if T is a template specialization of LeafData, i.e. has T mValues[512] + static constexpr bool is_special = is_index || is_Fp || util::is_same::value; +}; // BuildTraits + +// --------------------------> BuildToValueMap <------------------------------------ + +/// @brief Maps one type (e.g. the build types above) to other (actual) types +template +struct BuildToValueMap +{ + using Type = T; + using type = T; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = bool; + using type = bool; +}; + +template<> +struct BuildToValueMap +{ + using Type = float; + using type = float; +}; + +template<> +struct BuildToValueMap +{ + using Type = float; + using type = float; +}; + +template<> +struct BuildToValueMap +{ + using Type = float; + using type = float; +}; + +template<> +struct BuildToValueMap +{ + using Type = float; + using type = float; +}; + +template<> +struct BuildToValueMap +{ + using Type = float; + using type = float; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +// --------------------------> utility functions related to alignment <------------------------------------ + +/// @brief return true if the specified pointer is 32 byte aligned +__hostdev__ inline static bool isAligned(const void* p){return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;} + +/// @brief return the smallest number of bytes that when added to the specified pointer results in a 32 byte aligned pointer. +__hostdev__ inline static uint64_t alignmentPadding(const void* p) +{ + NANOVDB_ASSERT(p); + return (NANOVDB_DATA_ALIGNMENT - (uint64_t(p) % NANOVDB_DATA_ALIGNMENT)) % NANOVDB_DATA_ALIGNMENT; +} + +/// @brief offset the specified pointer so it is 32 byte aligned. Works with both const and non-const pointers. +template +__hostdev__ inline static T* alignPtr(T* p){return util::PtrAdd(p, alignmentPadding(p));} + +// --------------------------> isFloatingPoint(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a floating point type +__hostdev__ inline bool isFloatingPoint(GridType gridType) +{ + return gridType == GridType::Float || + gridType == GridType::Double || + gridType == GridType::Half || + gridType == GridType::Fp4 || + gridType == GridType::Fp8 || + gridType == GridType::Fp16 || + gridType == GridType::FpN; +} + +// --------------------------> isFloatingPointVector(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a floating point vec3. +__hostdev__ inline bool isFloatingPointVector(GridType gridType) +{ + return gridType == GridType::Vec3f || + gridType == GridType::Vec3d || + gridType == GridType::Vec4f || + gridType == GridType::Vec4d; +} + +// --------------------------> isInteger(GridType) <------------------------------------ + +/// @brief Return true if the GridType maps to a POD integer type. +/// @details These types are used to associate a voxel with a POD integer type +__hostdev__ inline bool isInteger(GridType gridType) +{ + return gridType == GridType::Int16 || + gridType == GridType::Int32 || + gridType == GridType::Int64 || + gridType == GridType::UInt32|| + gridType == GridType::UInt8; +} + +// --------------------------> isIndex(GridType) <------------------------------------ + +/// @brief Return true if the GridType maps to a special index type (not a POD integer type). +/// @details These types are used to index from a voxel into an external array of values, e.g. sidecar or blind data. +__hostdev__ inline bool isIndex(GridType gridType) +{ + return gridType == GridType::Index ||// index both active and inactive values + gridType == GridType::OnIndex ||// index active values only + gridType == GridType::IndexMask ||// as Index, but with an additional mask + gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask +} + +// --------------------------> isValue(GridType, GridClass) <------------------------------------ + +/// @brief return true if the combination of GridType and GridClass is valid. +__hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) +{ + if (gridClass == GridClass::LevelSet || gridClass == GridClass::FogVolume) { + return isFloatingPoint(gridType); + } else if (gridClass == GridClass::Staggered) { + return isFloatingPointVector(gridType); + } else if (gridClass == GridClass::PointIndex || gridClass == GridClass::PointData) { + return gridType == GridType::PointIndex || gridType == GridType::UInt32; + } else if (gridClass == GridClass::Topology) { + return gridType == GridType::Mask; + } else if (gridClass == GridClass::IndexGrid) { + return isIndex(gridType); + } else if (gridClass == GridClass::VoxelVolume) { + return gridType == GridType::RGBA8 || gridType == GridType::Float || + gridType == GridType::Double || gridType == GridType::Vec3f || + gridType == GridType::Vec3d || gridType == GridType::UInt32 || + gridType == GridType::UInt8; + } + return gridClass < GridClass::End && gridType < GridType::End; // any valid combination +} + +// --------------------------> validation of blind data meta data <------------------------------------ + +/// @brief return true if the combination of GridBlindDataClass, GridBlindDataSemantic and GridType is valid. +__hostdev__ inline bool isValid(const GridBlindDataClass& blindClass, + const GridBlindDataSemantic& blindSemantics, + const GridType& blindType) +{ + bool test = false; + switch (blindClass) { + case GridBlindDataClass::IndexArray: + test = (blindSemantics == GridBlindDataSemantic::Unknown || + blindSemantics == GridBlindDataSemantic::PointId) && + isInteger(blindType); + break; + case GridBlindDataClass::AttributeArray: + if (blindSemantics == GridBlindDataSemantic::PointPosition || + blindSemantics == GridBlindDataSemantic::WorldCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3d; + } else if (blindSemantics == GridBlindDataSemantic::GridCoords) { + test = blindType == GridType::Vec3f; + } else if (blindSemantics == GridBlindDataSemantic::VoxelCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3u8 || blindType == GridType::Vec3u16; + } else { + test = blindSemantics != GridBlindDataSemantic::PointId; + } + break; + case GridBlindDataClass::GridName: + test = blindSemantics == GridBlindDataSemantic::Unknown && blindType == GridType::Unknown; + break; + default: // captures blindClass == Unknown and ChannelArray + test = blindClass < GridBlindDataClass::End && + blindSemantics < GridBlindDataSemantic::End && + blindType < GridType::End; // any valid combination + break; + } + //if (!test) printf("Invalid combination: GridBlindDataClass=%u, GridBlindDataSemantic=%u, GridType=%u\n",(uint32_t)blindClass, (uint32_t)blindSemantics, (uint32_t)blindType); + return test; +} + +// ----------------------------> Version class <------------------------------------- + +/// @brief Bit-compacted representation of all three version numbers +/// +/// @details major is the top 11 bits, minor is the 11 middle bits and patch is the lower 10 bits +class Version +{ + uint32_t mData; // 11 + 11 + 10 bit packing of major + minor + patch +public: + static constexpr uint32_t End = 0, StrLen = 8;// for strlen() + /// @brief Default constructor + __hostdev__ Version() + : mData(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | + uint32_t(NANOVDB_MINOR_VERSION_NUMBER) << 10 | + uint32_t(NANOVDB_PATCH_VERSION_NUMBER)) + { + } + /// @brief Constructor from a raw uint32_t data representation + __hostdev__ Version(uint32_t data) : mData(data) {} + /// @brief Constructor from major.minor.patch version numbers + __hostdev__ Version(uint32_t major, uint32_t minor, uint32_t patch) + : mData(major << 21 | minor << 10 | patch) + { + NANOVDB_ASSERT(major < (1u << 11)); // max value of major is 2047 + NANOVDB_ASSERT(minor < (1u << 11)); // max value of minor is 2047 + NANOVDB_ASSERT(patch < (1u << 10)); // max value of patch is 1023 + } + __hostdev__ bool operator==(const Version& rhs) const { return mData == rhs.mData; } + __hostdev__ bool operator<( const Version& rhs) const { return mData < rhs.mData; } + __hostdev__ bool operator<=(const Version& rhs) const { return mData <= rhs.mData; } + __hostdev__ bool operator>( const Version& rhs) const { return mData > rhs.mData; } + __hostdev__ bool operator>=(const Version& rhs) const { return mData >= rhs.mData; } + __hostdev__ uint32_t id() const { return mData; } + __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1); } + __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1); } + __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1); } + __hostdev__ bool isCompatible() const { return this->getMajor() == uint32_t(NANOVDB_MAJOR_VERSION_NUMBER); } + /// @brief Returns the difference between major version of this instance and NANOVDB_MAJOR_VERSION_NUMBER + /// @return return 0 if the major version equals NANOVDB_MAJOR_VERSION_NUMBER, else a negative age if this + /// instance has a smaller major verion (is older), and a positive age if it is newer, i.e. larger. + __hostdev__ int age() const {return int(this->getMajor()) - int(NANOVDB_MAJOR_VERSION_NUMBER);} +}; // Version + +/// @brief print the verion number to a c-string +/// @param dst destination string of size 8 or more +/// @param v version to be printed +/// @return returns destination string @c dst +__hostdev__ inline char* toStr(char *dst, const Version &v) +{ + return util::sprint(dst, v.getMajor(), ".",v.getMinor(), ".",v.getPatch()); +} + +// ----------------------------> TensorTraits <-------------------------------------- + +template::value || util::is_specialization::value || util::is_same::value) ? 1 : 0> +struct TensorTraits; + +template +struct TensorTraits +{ + static const int Rank = 0; // i.e. scalar + static const bool IsScalar = true; + static const bool IsVector = false; + static const int Size = 1; + using ElementType = T; + static T scalar(const T& s) { return s; } +}; + +template +struct TensorTraits +{ + static const int Rank = 1; // i.e. vector + static const bool IsScalar = false; + static const bool IsVector = true; + static const int Size = T::SIZE; + using ElementType = typename T::ValueType; + static ElementType scalar(const T& v) { return v.length(); } +}; + +// ----------------------------> FloatTraits <-------------------------------------- + +template::ElementType)> +struct FloatTraits +{ + using FloatType = float; +}; + +template +struct FloatTraits +{ + using FloatType = double; +}; + +template<> +struct FloatTraits +{ + using FloatType = bool; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = bool; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = double; +}; + +// ----------------------------> mapping BuildType -> GridType <-------------------------------------- + +/// @brief Maps from a templated build type to a GridType enum +template +__hostdev__ inline GridType toGridType() +{ + if constexpr(util::is_same::value) { // resolved at compile-time + return GridType::Float; + } else if constexpr(util::is_same::value) { + return GridType::Double; + } else if constexpr(util::is_same::value) { + return GridType::Int16; + } else if constexpr(util::is_same::value) { + return GridType::Int32; + } else if constexpr(util::is_same::value) { + return GridType::Int64; + } else if constexpr(util::is_same::value) { + return GridType::Vec3f; + } else if constexpr(util::is_same::value) { + return GridType::Vec3d; + } else if constexpr(util::is_same::value) { + return GridType::UInt32; + } else if constexpr(util::is_same::value) { + return GridType::Mask; + } else if constexpr(util::is_same::value) { + return GridType::Half; + } else if constexpr(util::is_same::value) { + return GridType::Index; + } else if constexpr(util::is_same::value) { + return GridType::OnIndex; + } else if constexpr(util::is_same::value) { + return GridType::IndexMask; + } else if constexpr(util::is_same::value) { + return GridType::OnIndexMask; + } else if constexpr(util::is_same::value) { + return GridType::Boolean; + } else if constexpr(util::is_same::value) { + return GridType::RGBA8; + } else if constexpr(util::is_same::value) { + return GridType::Fp4; + } else if constexpr(util::is_same::value) { + return GridType::Fp8; + } else if constexpr(util::is_same::value) { + return GridType::Fp16; + } else if constexpr(util::is_same::value) { + return GridType::FpN; + } else if constexpr(util::is_same::value) { + return GridType::Vec4f; + } else if constexpr(util::is_same::value) { + return GridType::Vec4d; + } else if constexpr(util::is_same::value) { + return GridType::PointIndex; + } else if constexpr(util::is_same::value) { + return GridType::Vec3u8; + } else if constexpr(util::is_same::value) { + return GridType::Vec3u16; + } else if constexpr(util::is_same::value) { + return GridType::UInt8; + } + return GridType::Unknown; +}// toGridType + +template +[[deprecated("Use toGridType() instead.")]] +__hostdev__ inline GridType mapToGridType(){return toGridType();} + +// ----------------------------> mapping BuildType -> GridClass <-------------------------------------- + +/// @brief Maps from a templated build type to a GridClass enum +template +__hostdev__ inline GridClass toGridClass(GridClass defaultClass = GridClass::Unknown) +{ + if constexpr(util::is_same::value) { + return GridClass::Topology; + } else if constexpr(BuildTraits::is_index) { + return GridClass::IndexGrid; + } else if constexpr(util::is_same::value) { + return GridClass::VoxelVolume; + } else if constexpr(util::is_same::value) { + return GridClass::PointIndex; + } + return defaultClass; +} + +template +[[deprecated("Use toGridClass() instead.")]] +__hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass::Unknown) +{ + return toGridClass(); +} + +// ----------------------------> BitFlags <-------------------------------------- + +template +struct BitArray; +template<> +struct BitArray<8> +{ + uint8_t mFlags{0}; +}; +template<> +struct BitArray<16> +{ + uint16_t mFlags{0}; +}; +template<> +struct BitArray<32> +{ + uint32_t mFlags{0}; +}; +template<> +struct BitArray<64> +{ + uint64_t mFlags{0}; +}; + +template +class BitFlags : public BitArray +{ +protected: + using BitArray::mFlags; + +public: + using Type = decltype(mFlags); + BitFlags() {} + BitFlags(Type mask) : BitArray{mask} {} + BitFlags(std::initializer_list list) + { + for (auto bit : list) mFlags |= static_cast(1 << bit); + } + template + BitFlags(std::initializer_list list) + { + for (auto mask : list) mFlags |= static_cast(mask); + } + __hostdev__ Type data() const { return mFlags; } + __hostdev__ Type& data() { return mFlags; } + __hostdev__ void initBit(std::initializer_list list) + { + mFlags = 0u; + for (auto bit : list) mFlags |= static_cast(1 << bit); + } + template + __hostdev__ void initMask(std::initializer_list list) + { + mFlags = 0u; + for (auto mask : list) mFlags |= static_cast(mask); + } + //__hostdev__ Type& data() { return mFlags; } + //__hostdev__ Type data() const { return mFlags; } + __hostdev__ Type getFlags() const { return mFlags & (static_cast(GridFlags::End) - 1u); } // mask out everything except relevant bits + + __hostdev__ void setOn() { mFlags = ~Type(0u); } + __hostdev__ void setOff() { mFlags = Type(0u); } + + __hostdev__ void setBitOn(uint8_t bit) { mFlags |= static_cast(1 << bit); } + __hostdev__ void setBitOff(uint8_t bit) { mFlags &= ~static_cast(1 << bit); } + + __hostdev__ void setBitOn(std::initializer_list list) + { + for (auto bit : list) mFlags |= static_cast(1 << bit); + } + __hostdev__ void setBitOff(std::initializer_list list) + { + for (auto bit : list) mFlags &= ~static_cast(1 << bit); + } + + template + __hostdev__ void setMaskOn(MaskT mask) { mFlags |= static_cast(mask); } + template + __hostdev__ void setMaskOff(MaskT mask) { mFlags &= ~static_cast(mask); } + + template + __hostdev__ void setMaskOn(std::initializer_list list) + { + for (auto mask : list) mFlags |= static_cast(mask); + } + template + __hostdev__ void setMaskOff(std::initializer_list list) + { + for (auto mask : list) mFlags &= ~static_cast(mask); + } + + __hostdev__ void setBit(uint8_t bit, bool on) { on ? this->setBitOn(bit) : this->setBitOff(bit); } + template + __hostdev__ void setMask(MaskT mask, bool on) { on ? this->setMaskOn(mask) : this->setMaskOff(mask); } + + __hostdev__ bool isOn() const { return mFlags == ~Type(0u); } + __hostdev__ bool isOff() const { return mFlags == Type(0u); } + __hostdev__ bool isBitOn(uint8_t bit) const { return 0 != (mFlags & static_cast(1 << bit)); } + __hostdev__ bool isBitOff(uint8_t bit) const { return 0 == (mFlags & static_cast(1 << bit)); } + template + __hostdev__ bool isMaskOn(MaskT mask) const { return 0 != (mFlags & static_cast(mask)); } + template + __hostdev__ bool isMaskOff(MaskT mask) const { return 0 == (mFlags & static_cast(mask)); } + /// @brief return true if any of the masks in the list are on + template + __hostdev__ bool isMaskOn(std::initializer_list list) const + { + for (auto mask : list) { + if (0 != (mFlags & static_cast(mask))) return true; + } + return false; + } + /// @brief return true if any of the masks in the list are off + template + __hostdev__ bool isMaskOff(std::initializer_list list) const + { + for (auto mask : list) { + if (0 == (mFlags & static_cast(mask))) return true; + } + return false; + } + /// @brief required for backwards compatibility + __hostdev__ BitFlags& operator=(Type n) + { + mFlags = n; + return *this; + } +}; // BitFlags + +// ----------------------------> Mask <-------------------------------------- + +/// @brief Bit-mask to encode active states and facilitate sequential iterators +/// and a fast codec for I/O compression. +template +class Mask +{ +public: + static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask + static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words + + /// @brief Return the memory footprint in bytes of this Mask + __hostdev__ static size_t memUsage() { return sizeof(Mask); } + + /// @brief Return the number of bits available in this Mask + __hostdev__ static uint32_t bitCount() { return SIZE; } + + /// @brief Return the number of machine words used by this Mask + __hostdev__ static uint32_t wordCount() { return WORD_COUNT; } + + /// @brief Return the total number of set bits in this Mask + __hostdev__ uint32_t countOn() const + { + uint32_t sum = 0; + for (const uint64_t *w = mWords, *q = w + WORD_COUNT; w != q; ++w) + sum += util::countOn(*w); + return sum; + } + + /// @brief Return the number of lower set bits in mask up to but excluding the i'th bit + inline __hostdev__ uint32_t countOn(uint32_t i) const + { + uint32_t n = i >> 6, sum = util::countOn(mWords[n] & ((uint64_t(1) << (i & 63u)) - 1u)); + for (const uint64_t* w = mWords; n--; ++w) + sum += util::countOn(*w); + return sum; + } + + template + class Iterator + { + public: + __hostdev__ Iterator() + : mPos(Mask::SIZE) + , mParent(nullptr) + { + } + __hostdev__ Iterator(uint32_t pos, const Mask* parent) + : mPos(pos) + , mParent(parent) + { + } + Iterator& operator=(const Iterator&) = default; + __hostdev__ uint32_t operator*() const { return mPos; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ Iterator& operator++() + { + mPos = mParent->findNext(mPos + 1); + return *this; + } + __hostdev__ Iterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + + private: + uint32_t mPos; + const Mask* mParent; + }; // Member class Iterator + + class DenseIterator + { + public: + __hostdev__ DenseIterator(uint32_t pos = Mask::SIZE) + : mPos(pos) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ uint32_t operator*() const { return mPos; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ DenseIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ DenseIterator operator++(int) + { + auto tmp = *this; + ++mPos; + return tmp; + } + + private: + uint32_t mPos; + }; // Member class DenseIterator + + using OnIterator = Iterator; + using OffIterator = Iterator; + + __hostdev__ OnIterator beginOn() const { return OnIterator(this->findFirst(), this); } + + __hostdev__ OffIterator beginOff() const { return OffIterator(this->findFirst(), this); } + + __hostdev__ DenseIterator beginAll() const { return DenseIterator(0); } + + /// @brief Initialize all bits to zero. + __hostdev__ Mask() + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) + mWords[i] = 0; + } + __hostdev__ Mask(bool on) + { + const uint64_t v = on ? ~uint64_t(0) : uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i) + mWords[i] = v; + } + + /// @brief Copy constructor + __hostdev__ Mask(const Mask& other) + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) + mWords[i] = other.mWords[i]; + } + + /// @brief Return a pointer to the list of words of the bit mask + __hostdev__ uint64_t* words() { return mWords; } + __hostdev__ const uint64_t* words() const { return mWords; } + + /// @brief Assignment operator that works with openvdb::util::NodeMask + template + __hostdev__ typename util::enable_if::value, Mask&>::type operator=(const MaskT& other) + { + static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof"); + static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count"); + static_assert(LOG2DIM == MaskT::LOG2DIM, "Mismatching LOG2DIM"); + auto* src = reinterpret_cast(&other); + for (uint64_t *dst = mWords, *end = dst + WORD_COUNT; dst != end; ++dst) + *dst = *src++; + return *this; + } + + //__hostdev__ Mask& operator=(const Mask& other){return *util::memcpy(this, &other);} + Mask& operator=(const Mask&) = default; + + __hostdev__ bool operator==(const Mask& other) const + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) { + if (mWords[i] != other.mWords[i]) + return false; + } + return true; + } + + __hostdev__ bool operator!=(const Mask& other) const { return !((*this) == other); } + + /// @brief Return true if the given bit is set. + __hostdev__ bool isOn(uint32_t n) const { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); } + + /// @brief Return true if the given bit is NOT set. + __hostdev__ bool isOff(uint32_t n) const { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); } + + /// @brief Return true if all the bits are set in this Mask. + __hostdev__ bool isOn() const + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) + if (mWords[i] != ~uint64_t(0)) + return false; + return true; + } + + /// @brief Return true if none of the bits are set in this Mask. + __hostdev__ bool isOff() const + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) + if (mWords[i] != uint64_t(0)) + return false; + return true; + } + + /// @brief Set the specified bit on. + __hostdev__ void setOn(uint32_t n) { mWords[n >> 6] |= uint64_t(1) << (n & 63); } + /// @brief Set the specified bit off. + __hostdev__ void setOff(uint32_t n) { mWords[n >> 6] &= ~(uint64_t(1) << (n & 63)); } + +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline void setOnAtomic(uint32_t n) + { + atomicOr(reinterpret_cast(this) + (n >> 6), 1ull << (n & 63)); + } + __device__ inline void setOffAtomic(uint32_t n) + { + atomicAnd(reinterpret_cast(this) + (n >> 6), ~(1ull << (n & 63))); + } + __device__ inline void setAtomic(uint32_t n, bool on) + { + on ? this->setOnAtomic(n) : this->setOffAtomic(n); + } +#endif + /// @brief Set the specified bit on or off. + __hostdev__ void set(uint32_t n, bool on) + { +#if 1 // switch between branchless + auto& word = mWords[n >> 6]; + n &= 63; + word &= ~(uint64_t(1) << n); + word |= uint64_t(on) << n; +#else + on ? this->setOn(n) : this->setOff(n); +#endif + } + + /// @brief Set all bits on + __hostdev__ void setOn() + { + for (uint32_t i = 0; i < WORD_COUNT; ++i)mWords[i] = ~uint64_t(0); + } + + /// @brief Set all bits off + __hostdev__ void setOff() + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = uint64_t(0); + } + + /// @brief Set all bits off + __hostdev__ void set(bool on) + { + const uint64_t v = on ? ~uint64_t(0) : uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = v; + } + /// brief Toggle the state of all bits in the mask + __hostdev__ void toggle() + { + uint32_t n = WORD_COUNT; + for (auto* w = mWords; n--; ++w) *w = ~*w; + } + __hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); } + + /// @brief Bitwise intersection + __hostdev__ Mask& operator&=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2; + return *this; + } + /// @brief Bitwise union + __hostdev__ Mask& operator|=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2; + return *this; + } + /// @brief Bitwise difference + __hostdev__ Mask& operator-=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2; + return *this; + } + /// @brief Bitwise XOR + __hostdev__ Mask& operator^=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2; + return *this; + } + + NANOVDB_HOSTDEV_DISABLE_WARNING + template + __hostdev__ uint32_t findFirst() const + { + uint32_t n = 0u; + const uint64_t* w = mWords; + for (; n < WORD_COUNT && !(ON ? *w : ~*w); ++w, ++n); + return n < WORD_COUNT ? (n << 6) + util::findLowestOn(ON ? *w : ~*w) : SIZE; + } + + NANOVDB_HOSTDEV_DISABLE_WARNING + template + __hostdev__ uint32_t findNext(uint32_t start) const + { + uint32_t n = start >> 6; // initiate + if (n >= WORD_COUNT) return SIZE; // check for out of bounds + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off + b &= ~uint64_t(0u) << m; // mask out lower bits + while (!b && ++n < WORD_COUNT) b = ON ? mWords[n] : ~mWords[n]; // find next non-zero word + return b ? (n << 6) + util::findLowestOn(b) : SIZE; // catch last word=0 + } + + NANOVDB_HOSTDEV_DISABLE_WARNING + template + __hostdev__ uint32_t findPrev(uint32_t start) const + { + uint32_t n = start >> 6; // initiate + if (n >= WORD_COUNT) return SIZE; // check for out of bounds + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off + b &= (uint64_t(1u) << m) - 1u; // mask out higher bits + while (!b && n) b = ON ? mWords[--n] : ~mWords[--n]; // find previous non-zero word + return b ? (n << 6) + util::findHighestOn(b) : SIZE; // catch first word=0 + } + +private: + uint64_t mWords[WORD_COUNT]; +}; // Mask class + +// ----------------------------> Map <-------------------------------------- + +/// @brief Defines an affine transform and its inverse represented as a 3x3 matrix and a vec3 translation +struct Map +{ // 264B (not 32B aligned!) + float mMatF[9]; // 9*4B <- 3x3 matrix + float mInvMatF[9]; // 9*4B <- 3x3 matrix + float mVecF[3]; // 3*4B <- translation + float mTaperF; // 4B, placeholder for taper value + double mMatD[9]; // 9*8B <- 3x3 matrix + double mInvMatD[9]; // 9*8B <- 3x3 matrix + double mVecD[3]; // 3*8B <- translation + double mTaperD; // 8B, placeholder for taper value + + /// @brief Default constructor for the identity map + __hostdev__ Map() + : mMatF{ 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mInvMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mVecF{0.0f, 0.0f, 0.0f} + , mTaperF{1.0f} + , mMatD{ 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mInvMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mVecD{0.0, 0.0, 0.0} + , mTaperD{1.0} + { + } + __hostdev__ Map(double s, const Vec3d& t = Vec3d(0.0, 0.0, 0.0)) + : mMatF{float(s), 0.0f, 0.0f, 0.0f, float(s), 0.0f, 0.0f, 0.0f, float(s)} + , mInvMatF{1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s)} + , mVecF{float(t[0]), float(t[1]), float(t[2])} + , mTaperF{1.0f} + , mMatD{s, 0.0, 0.0, 0.0, s, 0.0, 0.0, 0.0, s} + , mInvMatD{1.0 / s, 0.0, 0.0, 0.0, 1.0 / s, 0.0, 0.0, 0.0, 1.0 / s} + , mVecD{t[0], t[1], t[2]} + , mTaperD{1.0} + { + } + + /// @brief Initialize the member data from 3x3 or 4x4 matrices + /// @note This is not _hostdev__ since then MatT=openvdb::Mat4d will produce warnings + template + void set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper = 1.0); + + /// @brief Initialize the member data from 4x4 matrices + /// @note The last (4th) row of invMat is actually ignored. + /// This is not _hostdev__ since then Mat4T=openvdb::Mat4d will produce warnings + template + void set(const Mat4T& mat, const Mat4T& invMat, double taper = 1.0) { this->set(mat, invMat, mat[3], taper); } + + template + void set(double scale, const Vec3T& translation, double taper = 1.0); + + /// @brief Apply the forward affine transformation to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation + template + __hostdev__ Vec3T applyMap(const Vec3T& ijk) const { return math::matMult(mMatD, mVecD, ijk); } + + /// @brief Apply the forward affine transformation to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation + template + __hostdev__ Vec3T applyMapF(const Vec3T& ijk) const { return math::matMult(mMatF, mVecF, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector + template + __hostdev__ Vec3T applyJacobian(const Vec3T& ijk) const { return math::matMult(mMatD, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector + template + __hostdev__ Vec3T applyJacobianF(const Vec3T& ijk) const { return math::matMult(mMatF, ijk); } + + /// @brief Apply the inverse affine mapping to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 + template + __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const + { + return math::matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); + } + + /// @brief Apply the inverse affine mapping to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 + template + __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const + { + return math::matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); + } + + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 + template + __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return math::matMult(mInvMatD, xyz); } + + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 + template + __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return math::matMult(mInvMatF, xyz); } + + /// @brief Apply the transposed inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 + template + __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return math::matMultT(mInvMatD, xyz); } + template + __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return math::matMultT(mInvMatF, xyz); } + + /// @brief Return a voxels size in each coordinate direction, measured at the origin + __hostdev__ Vec3d getVoxelSize() const { return this->applyMap(Vec3d(1)) - this->applyMap(Vec3d(0)); } +}; // Map + +template +inline void Map::set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper) +{ + float * mf = mMatF, *vf = mVecF, *mif = mInvMatF; + double *md = mMatD, *vd = mVecD, *mid = mInvMatD; + mTaperF = static_cast(taper); + mTaperD = taper; + for (int i = 0; i < 3; ++i) { + *vd++ = translate[i]; //translation + *vf++ = static_cast(translate[i]); //translation + for (int j = 0; j < 3; ++j) { + *md++ = mat[j][i]; //transposed + *mid++ = invMat[j][i]; + *mf++ = static_cast(mat[j][i]); //transposed + *mif++ = static_cast(invMat[j][i]); + } + } +} + +template +inline void Map::set(double dx, const Vec3T& trans, double taper) +{ + NANOVDB_ASSERT(dx > 0.0); + const double mat[3][3] = { {dx, 0.0, 0.0}, // row 0 + {0.0, dx, 0.0}, // row 1 + {0.0, 0.0, dx} }; // row 2 + const double idx = 1.0 / dx; + const double invMat[3][3] = { {idx, 0.0, 0.0}, // row 0 + {0.0, idx, 0.0}, // row 1 + {0.0, 0.0, idx} }; // row 2 + this->set(mat, invMat, trans, taper); +} + +// ----------------------------> GridBlindMetaData <-------------------------------------- + +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData +{ // 288 bytes + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less! + int64_t mDataOffset; // byte offset to the blind data, relative to this GridBlindMetaData. + uint64_t mValueCount; // number of blind values, e.g. point count + uint32_t mValueSize;// byte size of each value, e.g. 4 if mDataType=Float and 1 if mDataType=Unknown since that amounts to char + GridBlindDataSemantic mSemantic; // semantic meaning of the data. + GridBlindDataClass mDataClass; // 4 bytes + GridType mDataType; // 4 bytes + char mName[MaxNameSize]; // note this includes the NULL termination + // no padding required for 32 byte alignment + + // disallow copy-construction since methods like blindData and getBlindData uses the this pointer! + GridBlindMetaData(const GridBlindMetaData&) = delete; + + // disallow copy-assignment since methods like blindData and getBlindData uses the this pointer! + const GridBlindMetaData& operator=(const GridBlindMetaData&) = delete; + + __hostdev__ void setBlindData(void* blindData) { mDataOffset = util::PtrDiff(blindData, this); } + + // unsafe + __hostdev__ const void* blindData() const {return util::PtrAdd(this, mDataOffset);} + + /// @brief Get a const pointer to the blind data represented by this meta data + /// @tparam BlindDataT Expected value type of the blind data. + /// @return Returns NULL if mGridType!=toGridType(), else a const point of type BlindDataT. + /// @note Use mDataType=Unknown if BlindDataT is a custom data type unknown to NanoVDB. + template + __hostdev__ const BlindDataT* getBlindData() const + { + //if (mDataType != toGridType()) printf("getBlindData mismatch\n"); + return mDataType == toGridType() ? util::PtrAdd(this, mDataOffset) : nullptr; + } + + /// @brief return true if this meta data has a valid combination of semantic, class and value tags + __hostdev__ bool isValid() const + { + auto check = [&]()->bool{ + switch (mDataType){ + case GridType::Unknown: return mValueSize==1u;// i.e. we encode data as mValueCount chars + case GridType::Float: return mValueSize==4u; + case GridType::Double: return mValueSize==8u; + case GridType::Int16: return mValueSize==2u; + case GridType::Int32: return mValueSize==4u; + case GridType::Int64: return mValueSize==8u; + case GridType::Vec3f: return mValueSize==12u; + case GridType::Vec3d: return mValueSize==24u; + case GridType::Half: return mValueSize==2u; + case GridType::RGBA8: return mValueSize==4u; + case GridType::Fp8: return mValueSize==1u; + case GridType::Fp16: return mValueSize==2u; + case GridType::Vec4f: return mValueSize==16u; + case GridType::Vec4d: return mValueSize==32u; + case GridType::Vec3u8: return mValueSize==3u; + case GridType::Vec3u16: return mValueSize==6u; + default: return true;}// all other combinations are valid + }; + return nanovdb::isValid(mDataClass, mSemantic, mDataType) && check(); + } + + /// @brief return size in bytes of the blind data represented by this blind meta data + /// @note This size includes possible padding for 32 byte alignment. The actual amount + /// of bind data is mValueCount * mValueSize + __hostdev__ uint64_t blindDataSize() const + { + return math::AlignUp(mValueCount * mValueSize); + } +}; // GridBlindMetaData + +// ----------------------------> NodeTrait <-------------------------------------- + +/// @brief Struct to derive node type from its level in a given +/// grid, tree or root while preserving constness +template +struct NodeTrait; + +// Partial template specialization of above Node struct +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::LeafNodeType; + using type = typename GridOrTreeOrRootT::LeafNodeType; +}; +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::LeafNodeType; + using type = const typename GridOrTreeOrRootT::LeafNodeType; +}; + +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; +}; +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; +}; +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; +}; +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; +}; +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType; +}; + +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType; +}; + +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- + +template +struct GetValue; +template +struct SetValue; +template +struct SetVoxel; +template +struct GetState; +template +struct GetDim; +template +struct GetLeaf; +template +struct ProbeValue; +template +struct GetNodeInfo; + +// ----------------------------> CheckMode <---------------------------------- + +/// @brief List of different modes for computing for a checksum +enum class CheckMode : uint32_t { Disable = 0, // no computation + Empty = 0, + Half = 1, + Partial = 1, // fast but approximate + Default = 1, // defaults to Partial + Full = 2, // slow but accurate + End = 3, // marks the end of the enum list + StrLen = 9 + End}; + +/// @brief Prints CheckMode enum to a c-string +/// @param dst Destination c-string +/// @param mode CheckMode enum to be converted to string +/// @return destinations string @c dst +__hostdev__ inline char* toStr(char *dst, CheckMode mode) +{ + switch (mode){ + case CheckMode::Half: return util::strcpy(dst, "half"); + case CheckMode::Full: return util::strcpy(dst, "full"); + default: return util::strcpy(dst, "disabled"); + } +} + +// ----------------------------> Checksum <---------------------------------- + +/// @brief Class that encapsulates two CRC32 checksums, one for the Grid, Tree and Root node meta data +/// and one for the remaining grid nodes. +class Checksum +{ + /// Three types of checksums: + /// 1) Empty: all 64 bits are on (used to signify a disabled or undefined checksum) + /// 2) Half: Upper 32 bits are on and not all of lower 32 bits are on (lower 32 bits checksum head of grid) + /// 3) Full: Not all of the 64 bits are one (lower 32 bits checksum head of grid and upper 32 bits checksum tail of grid) + union { uint32_t mCRC32[2]; uint64_t mCRC64; };// mCRC32[0] is checksum of Grid, Tree and Root, and mCRC32[1] is checksum of nodes + +public: + + static constexpr uint32_t EMPTY32 = ~uint32_t{0}; + static constexpr uint64_t EMPTY64 = ~uint64_t(0); + + /// @brief default constructor initiates checksum to EMPTY + __hostdev__ Checksum() : mCRC64{EMPTY64} {} + + /// @brief Constructor that allows the two 32bit checksums to be initiated explicitly + /// @param head Initial 32bit CRC checksum of grid, tree and root data + /// @param tail Initial 32bit CRC checksum of all the nodes and blind data + __hostdev__ Checksum(uint32_t head, uint32_t tail) : mCRC32{head, tail} {} + + /// @brief + /// @param checksum + /// @param mode + __hostdev__ Checksum(uint64_t checksum, CheckMode mode = CheckMode::Full) : mCRC64{mode == CheckMode::Disable ? EMPTY64 : checksum} + { + if (mode == CheckMode::Partial) mCRC32[1] = EMPTY32; + } + + /// @brief return the 64 bit checksum of this instance + [[deprecated("Use Checksum::data instead.")]] + __hostdev__ uint64_t checksum() const { return mCRC64; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + + __hostdev__ uint64_t full() const { return mCRC64; } + __hostdev__ uint64_t& full() { return mCRC64; } + __hostdev__ uint32_t head() const { return mCRC32[0]; } + __hostdev__ uint32_t& head() { return mCRC32[0]; } + __hostdev__ uint32_t tail() const { return mCRC32[1]; } + __hostdev__ uint32_t& tail() { return mCRC32[1]; } + + /// @brief return true if the 64 bit checksum is partial, i.e. of head only + [[deprecated("Use Checksum::isHalf instead.")]] + __hostdev__ bool isPartial() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + __hostdev__ bool isHalf() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + + /// @brief return true if the 64 bit checksum is fill, i.e. of both had and nodes + __hostdev__ bool isFull() const { return mCRC64 != EMPTY64 && mCRC32[1] != EMPTY32; } + + /// @brief return true if the 64 bit checksum is disables (unset) + __hostdev__ bool isEmpty() const { return mCRC64 == EMPTY64; } + + __hostdev__ void disable() { mCRC64 = EMPTY64; } + + /// @brief return the mode of the 64 bit checksum + __hostdev__ CheckMode mode() const + { + return mCRC64 == EMPTY64 ? CheckMode::Disable : + mCRC32[1] == EMPTY32 ? CheckMode::Partial : CheckMode::Full; + } + + /// @brief return true if the checksums are identical + /// @param rhs other Checksum + __hostdev__ bool operator==(const Checksum &rhs) const {return mCRC64 == rhs.mCRC64;} + + /// @brief return true if the checksums are not identical + /// @param rhs other Checksum + __hostdev__ bool operator!=(const Checksum &rhs) const {return mCRC64 != rhs.mCRC64;} +};// Checksum + +/// @brief Maps 64 bit checksum to CheckMode enum +/// @param checksum 64 bit checksum with two CRC32 codes +/// @return CheckMode enum +__hostdev__ inline CheckMode toCheckMode(const Checksum &checksum){return checksum.mode();} + +// ----------------------------> Grid <-------------------------------------- + +/* + The following class and comment is for internal use only + + Memory layout: + + Grid -> 39 x double (world bbox and affine transformation) + Tree -> Root 3 x ValueType + int32_t + N x Tiles (background,min,max,tileCount + tileCount x Tiles) + + N2 upper InternalNodes each with 2 bit masks, N2 tiles, and min/max values + + N1 lower InternalNodes each with 2 bit masks, N1 tiles, and min/max values + + N0 LeafNodes each with a bit mask, N0 ValueTypes and min/max + + Example layout: ("---" implies it has a custom offset, "..." implies zero or more) + [GridData][TreeData]---[RootData][ROOT TILES...]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. +*/ + +/// @brief Struct with all the member data of the Grid (useful during serialization of an openvdb grid) +/// +/// @note The transform is assumed to be affine (so linear) and have uniform scale! So frustum transforms +/// and non-uniform scaling are not supported (primarily because they complicate ray-tracing in index space) +/// +/// @note No client code should (or can) interface with this struct so it can safely be ignored! +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData +{ // sizeof(GridData) = 672B + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less + uint64_t mMagic; // 8B (0) magic to validate it is valid grid data. + Checksum mChecksum; // 8B (8). Checksum of grid buffer. + Version mVersion; // 4B (16) major, minor, and patch version numbers + BitFlags<32> mFlags; // 4B (20). flags for grid. + uint32_t mGridIndex; // 4B (24). Index of this grid in the buffer + uint32_t mGridCount; // 4B (28). Total number of grids in the buffer + uint64_t mGridSize; // 8B (32). byte count of this entire grid occupied in the buffer. + char mGridName[MaxNameSize]; // 256B (40) + Map mMap; // 264B (296). affine transformation between index and world space in both single and double precision + Vec3dBBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3d mVoxelSize; // 24B (608). size of a voxel in world units + GridClass mGridClass; // 4B (632). + GridType mGridType; // 4B (636). + int64_t mBlindMetadataOffset; // 8B (640). offset to beginning of GridBlindMetaData structures that follow this grid. + uint32_t mBlindMetadataCount; // 4B (648). count of GridBlindMetaData structures that follow this grid. + uint32_t mData0; // 4B (652) unused + uint64_t mData1; // 8B (656) is use for the total number of values indexed by an IndexGrid + uint64_t mData2; // 8B (664) padding to 32 B alignment + /// @brief Use this method to initiate most member data + GridData& operator=(const GridData&) = default; + //__hostdev__ GridData& operator=(const GridData& other){return *util::memcpy(this, &other);} + __hostdev__ void init(std::initializer_list list = {GridFlags::IsBreadthFirst}, + uint64_t gridSize = 0u, + const Map& map = Map(), + GridType gridType = GridType::Unknown, + GridClass gridClass = GridClass::Unknown) + { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + mMagic = NANOVDB_MAGIC_GRID; +#else + mMagic = NANOVDB_MAGIC_NUMB; +#endif + mChecksum.disable();// all 64 bits ON means checksum is disabled + mVersion = Version(); + mFlags.initMask(list); + mGridIndex = 0u; + mGridCount = 1u; + mGridSize = gridSize; + mGridName[0] = '\0'; + mMap = map; + mWorldBBox = Vec3dBBox();// invalid bbox + mVoxelSize = map.getVoxelSize(); + mGridClass = gridClass; + mGridType = gridType; + mBlindMetadataOffset = mGridSize; // i.e. no blind data + mBlindMetadataCount = 0u; // i.e. no blind data + mData0 = 0u; // zero padding + mData1 = 0u; // only used for index and point grids + mData2 = NANOVDB_MAGIC_GRID; // since version 32.6.0 (will change in the future) + } + /// @brief return true if the magic number and the version are both valid + __hostdev__ bool isValid() const { + // Before v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and mData2 was undefined + // For v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and toMagic(mData2) = MagicType::NanoGrid + // After v32.7.X: toMagic(mMagic) = MagicType::NanoGrid and mData2 will again be undefined + const MagicType magic = toMagic(mMagic); + if (magic == MagicType::NanoGrid || toMagic(mData2) == MagicType::NanoGrid) return true; + bool test = magic == MagicType::NanoVDB;// could be GridData or io::FileHeader + if (test) test = mVersion.isCompatible(); + if (test) test = mGridCount > 0u && mGridIndex < mGridCount; + if (test) test = mGridClass < GridClass::End && mGridType < GridType::End; + return test; + } + // Set and unset various bit flags + __hostdev__ void setMinMaxOn(bool on = true) { mFlags.setMask(GridFlags::HasMinMax, on); } + __hostdev__ void setBBoxOn(bool on = true) { mFlags.setMask(GridFlags::HasBBox, on); } + __hostdev__ void setLongGridNameOn(bool on = true) { mFlags.setMask(GridFlags::HasLongGridName, on); } + __hostdev__ void setAverageOn(bool on = true) { mFlags.setMask(GridFlags::HasAverage, on); } + __hostdev__ void setStdDeviationOn(bool on = true) { mFlags.setMask(GridFlags::HasStdDeviation, on); } + __hostdev__ bool setGridName(const char* src) + { + const bool success = (util::strncpy(mGridName, src, MaxNameSize)[MaxNameSize-1] == '\0'); + if (!success) mGridName[MaxNameSize-1] = '\0'; + return success; // returns true if input grid name is NOT longer than MaxNameSize characters + } + // Affine transformations based on double precision + template + __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world + template + __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { return mMap.applyInverseMap(xyz); } // Pos: world -> index + template + __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return mMap.applyJacobian(xyz); } // Dir: index -> world + template + __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index + template + __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return mMap.applyIJT(xyz); } + // Affine transformations based on single precision + template + __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return mMap.applyMapF(xyz); } // Pos: index -> world + template + __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { return mMap.applyInverseMapF(xyz); } // Pos: world -> index + template + __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return mMap.applyJacobianF(xyz); } // Dir: index -> world + template + __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index + template + __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); } + + // @brief Return a non-const void pointer to the tree + __hostdev__ void* treePtr() { return this + 1; }// TreeData is always right after GridData + + // @brief Return a const void pointer to the tree + __hostdev__ const void* treePtr() const { return this + 1; }// TreeData is always right after GridData + + /// @brief Return a non-const void pointer to the first node at @c LEVEL + /// @tparam LEVEL Level of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + template + __hostdev__ const void* nodePtr() const + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + const void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return a non-const void pointer to the first node at @c LEVEL + /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + /// @warning If not nodes exist at @c LEVEL NULL is returned + template + __hostdev__ void* nodePtr() + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return number of nodes at @c LEVEL + /// @tparam Level of the node. LEVEL 0 means leaf node and LEVEL 2 means upper node + template + __hostdev__ uint32_t nodeCount() const + { + static_assert(LEVEL >= 0 && LEVEL < 3, "invalid LEVEL template parameter"); + return *util::PtrAdd(this + 1, 4*(8 + LEVEL));// TreeData is always right after GridData + } + + /// @brief Returns a const reference to the blindMetaData at the specified linear offset. + /// + /// @warning The linear offset is assumed to be in the valid range + __hostdev__ const GridBlindMetaData* blindMetaData(uint32_t n) const + { + NANOVDB_ASSERT(n < mBlindMetadataCount); + return util::PtrAdd(this, mBlindMetadataOffset) + n; + } + + __hostdev__ const char* gridName() const + { + if (mFlags.isMaskOn(GridFlags::HasLongGridName)) {// search for first blind meta data that contains a name + NANOVDB_ASSERT(mBlindMetadataCount > 0); + for (uint32_t i = 0; i < mBlindMetadataCount; ++i) { + const auto* metaData = this->blindMetaData(i);// EXTREMELY important to be a pointer + if (metaData->mDataClass == GridBlindDataClass::GridName) { + NANOVDB_ASSERT(metaData->mDataType == GridType::Unknown); + return metaData->template getBlindData(); + } + } + NANOVDB_ASSERT(false); // should never hit this! + } + return mGridName; + } + + /// @brief Return memory usage in bytes for this class only. + __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + + /// @brief return AABB of active values in world space + __hostdev__ const Vec3dBBox& worldBBox() const { return mWorldBBox; } + + /// @brief return AABB of active values in index space + __hostdev__ const CoordBBox& indexBBox() const {return *(const CoordBBox*)(this->nodePtr<3>());} + + /// @brief return the root table has size + __hostdev__ uint32_t rootTableSize() const + { + const void *root = this->nodePtr<3>(); + return root ? *util::PtrAdd(root, sizeof(CoordBBox)) : 0u; + } + + /// @brief test if the grid is empty, e.i the root table has size 0 + /// @return true if this grid contains not data whatsoever + __hostdev__ bool isEmpty() const {return this->rootTableSize() == 0u;} + + /// @brief return true if RootData follows TreeData in memory without any extra padding + /// @details TreeData is always following right after GridData, but the same might not be true for RootData + __hostdev__ bool isRootConnected() const { return *(const uint64_t*)((const char*)(this + 1) + 24) == 64u;} +}; // GridData + +// Forward declaration of accelerated random access class +template +class ReadAccessor; + +template +using DefaultReadAccessor = ReadAccessor; + +/// @brief Highest level of the data structure. Contains a tree and a world->index +/// transform (that currently only supports uniform scaling and translation). +/// +/// @note This the API of this class to interface with client code +template +class Grid : public GridData +{ +public: + using TreeType = TreeT; + using RootType = typename TreeT::RootType; + using RootNodeType = RootType; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; + using DataType = GridData; + using ValueType = typename TreeT::ValueType; + using BuildType = typename TreeT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using CoordType = typename TreeT::CoordType; + using AccessorType = DefaultReadAccessor; + + /// @brief Disallow constructions, copy and assignment + /// + /// @note Only a Serializer, defined elsewhere, can instantiate this class + Grid(const Grid&) = delete; + Grid& operator=(const Grid&) = delete; + ~Grid() = delete; + + __hostdev__ Version version() const { return DataType::mVersion; } + + __hostdev__ DataType* data() { return reinterpret_cast(this); } + + __hostdev__ const DataType* data() const { return reinterpret_cast(this); } + + /// @brief Return memory usage in bytes for this class only. + //__hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + + /// @brief Return the memory footprint of the entire grid, i.e. including all nodes and blind data + __hostdev__ uint64_t gridSize() const { return DataType::mGridSize; } + + /// @brief Return index of this grid in the buffer + __hostdev__ uint32_t gridIndex() const { return DataType::mGridIndex; } + + /// @brief Return total number of grids in the buffer + __hostdev__ uint32_t gridCount() const { return DataType::mGridCount; } + + /// @brief @brief Return the total number of values indexed by this IndexGrid + /// + /// @note This method is only defined for IndexGrid = NanoGrid + template + __hostdev__ typename util::enable_if::is_index, const uint64_t&>::type + valueCount() const { return DataType::mData1; } + + /// @brief @brief Return the total number of points indexed by this PointGrid + /// + /// @note This method is only defined for PointGrid = NanoGrid + template + __hostdev__ typename util::enable_if::value, const uint64_t&>::type + pointCount() const { return DataType::mData1; } + + /// @brief Return a const reference to the tree + __hostdev__ const TreeT& tree() const { return *reinterpret_cast(this->treePtr()); } + + /// @brief Return a non-const reference to the tree + __hostdev__ TreeT& tree() { return *reinterpret_cast(this->treePtr()); } + + /// @brief Return a new instance of a ReadAccessor used to access values in this grid + __hostdev__ AccessorType getAccessor() const { return AccessorType(this->tree().root()); } + + /// @brief Return a const reference to the size of a voxel in world units + __hostdev__ const Vec3d& voxelSize() const { return DataType::mVoxelSize; } + + /// @brief Return a const reference to the Map for this grid + __hostdev__ const Map& map() const { return DataType::mMap; } + + /// @brief world to index space transformation + template + __hostdev__ Vec3T worldToIndex(const Vec3T& xyz) const { return this->applyInverseMap(xyz); } + + /// @brief index to world space transformation + template + __hostdev__ Vec3T indexToWorld(const Vec3T& xyz) const { return this->applyMap(xyz); } + + /// @brief transformation from index space direction to world space direction + /// @warning assumes dir to be normalized + template + __hostdev__ Vec3T indexToWorldDir(const Vec3T& dir) const { return this->applyJacobian(dir); } + + /// @brief transformation from world space direction to index space direction + /// @warning assumes dir to be normalized + template + __hostdev__ Vec3T worldToIndexDir(const Vec3T& dir) const { return this->applyInverseJacobian(dir); } + + /// @brief transform the gradient from index space to world space. + /// @details Applies the inverse jacobian transform map. + template + __hostdev__ Vec3T indexToWorldGrad(const Vec3T& grad) const { return this->applyIJT(grad); } + + /// @brief world to index space transformation + template + __hostdev__ Vec3T worldToIndexF(const Vec3T& xyz) const { return this->applyInverseMapF(xyz); } + + /// @brief index to world space transformation + template + __hostdev__ Vec3T indexToWorldF(const Vec3T& xyz) const { return this->applyMapF(xyz); } + + /// @brief transformation from index space direction to world space direction + /// @warning assumes dir to be normalized + template + __hostdev__ Vec3T indexToWorldDirF(const Vec3T& dir) const { return this->applyJacobianF(dir); } + + /// @brief transformation from world space direction to index space direction + /// @warning assumes dir to be normalized + template + __hostdev__ Vec3T worldToIndexDirF(const Vec3T& dir) const { return this->applyInverseJacobianF(dir); } + + /// @brief Transforms the gradient from index space to world space. + /// @details Applies the inverse jacobian transform map. + template + __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); } + + /// @brief Computes a AABB of active values in world space + //__hostdev__ const Vec3dBBox& worldBBox() const { return DataType::mWorldBBox; } + + /// @brief Computes a AABB of active values in index space + /// + /// @note This method is returning a floating point bounding box and not a CoordBBox. This makes + /// it more useful for clipping rays. + //__hostdev__ const BBox& indexBBox() const { return this->tree().bbox(); } + + /// @brief Return the total number of active voxels in this tree. + __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); } + + /// @brief Methods related to the classification of this grid + __hostdev__ bool isValid() const { return DataType::isValid(); } + __hostdev__ const GridType& gridType() const { return DataType::mGridType; } + __hostdev__ const GridClass& gridClass() const { return DataType::mGridClass; } + __hostdev__ bool isLevelSet() const { return DataType::mGridClass == GridClass::LevelSet; } + __hostdev__ bool isFogVolume() const { return DataType::mGridClass == GridClass::FogVolume; } + __hostdev__ bool isStaggered() const { return DataType::mGridClass == GridClass::Staggered; } + __hostdev__ bool isPointIndex() const { return DataType::mGridClass == GridClass::PointIndex; } + __hostdev__ bool isGridIndex() const { return DataType::mGridClass == GridClass::IndexGrid; } + __hostdev__ bool isPointData() const { return DataType::mGridClass == GridClass::PointData; } + __hostdev__ bool isMask() const { return DataType::mGridClass == GridClass::Topology; } + __hostdev__ bool isUnknown() const { return DataType::mGridClass == GridClass::Unknown; } + __hostdev__ bool hasMinMax() const { return DataType::mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return DataType::mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return DataType::mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + + /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. + /// This allows for sequential access to the nodes. + template + __hostdev__ bool isSequential() const { return NodeT::FIXED_SIZE && this->isBreadthFirst(); } + + /// @brief return true if the specified node level is layed out breadth-first in memory and has a fixed size. + /// This allows for sequential access to the nodes. + template + __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + + /// @brief return true if nodes at all levels can safely be accessed with simple linear offsets + __hostdev__ bool isSequential() const { return UpperNodeType::FIXED_SIZE && LowerNodeType::FIXED_SIZE && LeafNodeType::FIXED_SIZE && this->isBreadthFirst(); } + + /// @brief Return a c-string with the name of this grid + __hostdev__ const char* gridName() const { return DataType::gridName(); } + + /// @brief Return a c-string with the name of this grid, truncated to 255 characters + __hostdev__ const char* shortGridName() const { return DataType::mGridName; } + + /// @brief Return checksum of the grid buffer. + __hostdev__ const Checksum& checksum() const { return DataType::mChecksum; } + + /// @brief Return true if this grid is empty, i.e. contains no values or nodes. + //__hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } + + /// @brief Return the count of blind-data encoded in this grid + __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; } + + /// @brief Return the index of the first blind data with specified name if found, otherwise -1. + __hostdev__ int findBlindData(const char* name) const; + + /// @brief Return the index of the first blind data with specified semantic if found, otherwise -1. + __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const; + + /// @brief Returns a const pointer to the blindData at the specified linear offset. + /// + /// @warning Pointer might be NULL and the linear offset is assumed to be in the valid range + // this method is deprecated !!!! + [[deprecated("Use Grid::getBlindData() instead.")]] + __hostdev__ const void* blindData(uint32_t n) const + { + printf("\nnanovdb::Grid::blindData is unsafe and hence deprecated! Please use nanovdb::Grid::getBlindData instead.\n\n"); + NANOVDB_ASSERT(n < DataType::mBlindMetadataCount); + return this->blindMetaData(n).blindData(); + } + + template + __hostdev__ const BlindDataT* getBlindData(uint32_t n) const + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return this->blindMetaData(n).template getBlindData();// NULL if mismatching BlindDataT + } + + template + __hostdev__ BlindDataT* getBlindData(uint32_t n) + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return const_cast(this->blindMetaData(n).template getBlindData());// NULL if mismatching BlindDataT + } + + __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return *DataType::blindMetaData(n); } + +private: + static_assert(sizeof(GridData) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(GridData) is misaligned"); +}; // Class Grid + +template +__hostdev__ int Grid::findBlindDataForSemantic(GridBlindDataSemantic semantic) const +{ + for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i) { + if (this->blindMetaData(i).mSemantic == semantic) + return int(i); + } + return -1; +} + +template +__hostdev__ int Grid::findBlindData(const char* name) const +{ + auto test = [&](int n) { + const char* str = this->blindMetaData(n).mName; + for (int i = 0; i < GridBlindMetaData::MaxNameSize; ++i) { + if (name[i] != str[i]) + return false; + if (name[i] == '\0' && str[i] == '\0') + return true; + } + return true; // all len characters matched + }; + for (int i = 0, n = this->blindDataCount(); i < n; ++i) + if (test(i)) + return i; + return -1; +} + +// ----------------------------> Tree <-------------------------------------- + +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData +{ // sizeof(TreeData) == 64B + int64_t mNodeOffset[4];// 32B, byte offset from this tree to first leaf, lower, upper and root node. If mNodeCount[N]=0 => mNodeOffset[N]==mNodeOffset[N+1] + uint32_t mNodeCount[3]; // 12B, total number of nodes of type: leaf, lower internal, upper internal + uint32_t mTileCount[3]; // 12B, total number of active tile values at the lower internal, upper internal and root node levels + uint64_t mVoxelCount; // 8B, total number of active voxels in the root and all its child nodes. + // No padding since it's always 32B aligned + //__hostdev__ TreeData& operator=(const TreeData& other){return *util::memcpy(this, &other);} + TreeData& operator=(const TreeData&) = default; + __hostdev__ void setRoot(const void* root) { + NANOVDB_ASSERT(root); + mNodeOffset[3] = util::PtrDiff(root, this); + } + + /// @brief Get a non-const void pointer to the root node (never NULL) + __hostdev__ void* getRoot() { return util::PtrAdd(this, mNodeOffset[3]); } + + /// @brief Get a const void pointer to the root node (never NULL) + __hostdev__ const void* getRoot() const { return util::PtrAdd(this, mNodeOffset[3]); } + + template + __hostdev__ void setFirstNode(const NodeT* node) {mNodeOffset[NodeT::LEVEL] = (node ? util::PtrDiff(node, this) : 0);} + + /// @brief Return true if the root is empty, i.e. has not child nodes or constant tiles + __hostdev__ bool isEmpty() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3] + sizeof(CoordBBox)) == 0 : true;} + + /// @brief Return the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3]) : CoordBBox();} + + /// @brief return true if RootData is layout out immediately after TreeData in memory + __hostdev__ bool isRootNext() const {return mNodeOffset[3] ? mNodeOffset[3] == sizeof(TreeData) : false; } +};// TreeData + +// ----------------------------> GridTree <-------------------------------------- + +/// @brief defines a tree type from a grid type while preserving constness +template +struct GridTree +{ + using Type = typename GridT::TreeType; + using type = typename GridT::TreeType; +}; +template +struct GridTree +{ + using Type = const typename GridT::TreeType; + using type = const typename GridT::TreeType; +}; + +// ----------------------------> Tree <-------------------------------------- + +/// @brief VDB Tree, which is a thin wrapper around a RootNode. +template +class Tree : public TreeData +{ + static_assert(RootT::LEVEL == 3, "Tree depth is not supported"); + static_assert(RootT::ChildNodeType::LOG2DIM == 5, "Tree configuration is not supported"); + static_assert(RootT::ChildNodeType::ChildNodeType::LOG2DIM == 4, "Tree configuration is not supported"); + static_assert(RootT::LeafNodeType::LOG2DIM == 3, "Tree configuration is not supported"); + +public: + using DataType = TreeData; + using RootType = RootT; + using RootNodeType = RootT; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; + using ValueType = typename RootT::ValueType; + using BuildType = typename RootT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using CoordType = typename RootT::CoordType; + using AccessorType = DefaultReadAccessor; + + using Node3 = RootT; + using Node2 = typename RootT::ChildNodeType; + using Node1 = typename Node2::ChildNodeType; + using Node0 = LeafNodeType; + + /// @brief This class cannot be constructed or deleted + Tree() = delete; + Tree(const Tree&) = delete; + Tree& operator=(const Tree&) = delete; + ~Tree() = delete; + + __hostdev__ DataType* data() { return reinterpret_cast(this); } + + __hostdev__ const DataType* data() const { return reinterpret_cast(this); } + + /// @brief return memory usage in bytes for the class + __hostdev__ static uint64_t memUsage() { return sizeof(DataType); } + + __hostdev__ RootT& root() {return *reinterpret_cast(DataType::getRoot());} + + __hostdev__ const RootT& root() const {return *reinterpret_cast(DataType::getRoot());} + + __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); } + + /// @brief Return the value of the given voxel (regardless of state or location in the tree.) + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->root().getValue(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->root().getValue(CoordType(i, j, k)); } + + /// @brief Return the active state of the given voxel (regardless of state or location in the tree.) + __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); } + + /// @brief Return true if this tree is empty, i.e. contains no values or nodes + //__hostdev__ bool isEmpty() const { return this->root().isEmpty(); } + + /// @brief Combines the previous two methods in a single call + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->root().probeValue(ijk, v); } + + /// @brief Return a const reference to the background value. + __hostdev__ const ValueType& background() const { return this->root().background(); } + + /// @brief Sets the extrema values of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ void extrema(ValueType& min, ValueType& max) const; + + /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + //__hostdev__ const BBox& bbox() const { return this->root().bbox(); } + + /// @brief Return the total number of active voxels in this tree. + __hostdev__ uint64_t activeVoxelCount() const { return DataType::mVoxelCount; } + + /// @brief Return the total number of active tiles at the specified level of the tree. + /// + /// @details level = 1,2,3 corresponds to active tile count in lower internal nodes, upper + /// internal nodes, and the root level. Note active values at the leaf level are + /// referred to as active voxels (see activeVoxelCount defined above). + __hostdev__ const uint32_t& activeTileCount(uint32_t level) const + { + NANOVDB_ASSERT(level > 0 && level <= 3); // 1, 2, or 3 + return DataType::mTileCount[level - 1]; + } + + template + __hostdev__ uint32_t nodeCount() const + { + static_assert(NodeT::LEVEL < 3, "Invalid NodeT"); + return DataType::mNodeCount[NodeT::LEVEL]; + } + + __hostdev__ uint32_t nodeCount(int level) const + { + NANOVDB_ASSERT(level < 3); + return DataType::mNodeCount[level]; + } + + __hostdev__ uint32_t totalNodeCount() const + { + return DataType::mNodeCount[0] + DataType::mNodeCount[1] + DataType::mNodeCount[2]; + } + + /// @brief return a pointer to the first node of the specified type + /// + /// @warning Note it may return NULL if no nodes exist + template + __hostdev__ NodeT* getFirstNode() + { + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; + } + + /// @brief return a const pointer to the first node of the specified type + /// + /// @warning Note it may return NULL if no nodes exist + template + __hostdev__ const NodeT* getFirstNode() const + { + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; + } + + /// @brief return a pointer to the first node at the specified level + /// + /// @warning Note it may return NULL if no nodes exist + template + __hostdev__ typename NodeTrait::type* getFirstNode() + { + return this->template getFirstNode::type>(); + } + + /// @brief return a const pointer to the first node of the specified level + /// + /// @warning Note it may return NULL if no nodes exist + template + __hostdev__ const typename NodeTrait::type* getFirstNode() const + { + return this->template getFirstNode::type>(); + } + + /// @brief Template specializations of getFirstNode + __hostdev__ LeafNodeType* getFirstLeaf() { return this->getFirstNode(); } + __hostdev__ const LeafNodeType* getFirstLeaf() const { return this->getFirstNode(); } + __hostdev__ typename NodeTrait::type* getFirstLower() { return this->getFirstNode<1>(); } + __hostdev__ const typename NodeTrait::type* getFirstLower() const { return this->getFirstNode<1>(); } + __hostdev__ typename NodeTrait::type* getFirstUpper() { return this->getFirstNode<2>(); } + __hostdev__ const typename NodeTrait::type* getFirstUpper() const { return this->getFirstNode<2>(); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return this->root().template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) + { + return this->root().template set(ijk, args...); + } + +private: + static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(TreeData) is misaligned"); + +}; // Tree class + +template +__hostdev__ void Tree::extrema(ValueType& min, ValueType& max) const +{ + min = this->root().minimum(); + max = this->root().maximum(); +} + +// --------------------------> RootData <------------------------------------ + +/// @brief Struct with all the member data of the RootNode (useful during serialization of an openvdb RootNode) +/// +/// @note No client code should (or can) interface with this struct so it can safely be ignored! +template +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData +{ + using ValueT = typename ChildT::ValueType; + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using CoordT = typename ChildT::CoordType; + using StatsT = typename ChildT::FloatType; + static constexpr bool FIXED_SIZE = false; + + /// @brief Return a key based on the coordinates of a voxel +#ifdef NANOVDB_USE_SINGLE_ROOT_KEY + using KeyT = uint64_t; + template + __hostdev__ static KeyT CoordToKey(const CoordType& ijk) + { + static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof"); + static_assert(32 - ChildT::TOTAL <= 21, "Cannot use 64 bit root keys"); + return (KeyT(uint32_t(ijk[2]) >> ChildT::TOTAL)) | // z is the lower 21 bits + (KeyT(uint32_t(ijk[1]) >> ChildT::TOTAL) << 21) | // y is the middle 21 bits + (KeyT(uint32_t(ijk[0]) >> ChildT::TOTAL) << 42); // x is the upper 21 bits + } + __hostdev__ static CoordT KeyToCoord(const KeyT& key) + { + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return CoordT(((key >> 42) & MASK) << ChildT::TOTAL, // x are the upper 21 bits + ((key >> 21) & MASK) << ChildT::TOTAL, // y are the middle 21 bits + (key & MASK) << ChildT::TOTAL); // z are the lower 21 bits + } +#else + using KeyT = CoordT; + __hostdev__ static KeyT CoordToKey(const CoordT& ijk) { return ijk & ~ChildT::MASK; } + __hostdev__ static CoordT KeyToCoord(const KeyT& key) { return key; } +#endif + math::BBox mBBox; // 24B. AABB of active values in index space. + uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node + + ValueT mBackground; // background value, i.e. value of any unset voxel + ValueT mMinimum; // typically 4B, minimum of all the active values + ValueT mMaximum; // typically 4B, maximum of all the active values + StatsT mAverage; // typically 4B, average of all the active values in this node and its child nodes + StatsT mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(RootData) - (24 + 4 + 3 * sizeof(ValueT) + 2 * sizeof(StatsT)); + } + + struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) Tile + { + template + __hostdev__ void setChild(const CoordType& k, const void* ptr, const RootData* data) + { + key = CoordToKey(k); + state = false; + child = util::PtrDiff(ptr, data); + } + template + __hostdev__ void setValue(const CoordType& k, bool s, const ValueType& v) + { + key = CoordToKey(k); + state = s; + value = v; + child = 0; + } + __hostdev__ bool isChild() const { return child != 0; } + __hostdev__ bool isValue() const { return child == 0; } + __hostdev__ bool isActive() const { return child == 0 && state; } + __hostdev__ CoordT origin() const { return KeyToCoord(key); } + KeyT key; // NANOVDB_USE_SINGLE_ROOT_KEY ? 8B : 12B + int64_t child; // 8B. signed byte offset from this node to the child node. 0 means it is a constant tile, so use value. + uint32_t state; // 4B. state of tile value + ValueT value; // value of tile (i.e. no child node) + }; // Tile + + /// @brief Returns a non-const reference to the tile at the specified linear offset. + /// + /// @warning The linear offset is assumed to be in the valid range + __hostdev__ const Tile* tile(uint32_t n) const + { + NANOVDB_ASSERT(n < mTableSize); + return reinterpret_cast(this + 1) + n; + } + __hostdev__ Tile* tile(uint32_t n) + { + NANOVDB_ASSERT(n < mTableSize); + return reinterpret_cast(this + 1) + n; + } + + __hostdev__ Tile* probeTile(const CoordT& ijk) + { +#if 1 // switch between linear and binary seach + const auto key = CoordToKey(ijk); + for (Tile *p = reinterpret_cast(this + 1), *q = p + mTableSize; p < q; ++p) + if (p->key == key) + return p; + return nullptr; +#else // do not enable binary search if tiles are not guaranteed to be sorted!!!!!! + int32_t low = 0, high = mTableSize; // low is inclusive and high is exclusive + while (low != high) { + int mid = low + ((high - low) >> 1); + const Tile* tile = &tiles[mid]; + if (tile->key == key) { + return tile; + } else if (tile->key < key) { + low = mid + 1; + } else { + high = mid; + } + } + return nullptr; +#endif + } + + __hostdev__ inline const Tile* probeTile(const CoordT& ijk) const + { + return const_cast(this)->probeTile(ijk); + } + + /// @brief Returns a const reference to the child node in the specified tile. + /// + /// @warning A child node is assumed to exist in the specified tile + __hostdev__ ChildT* getChild(const Tile* tile) + { + NANOVDB_ASSERT(tile->child); + return util::PtrAdd(this, tile->child); + } + __hostdev__ const ChildT* getChild(const Tile* tile) const + { + NANOVDB_ASSERT(tile->child); + return util::PtrAdd(this, tile->child); + } + + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } + + __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } + __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } + __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } + __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } + + /// @brief This class cannot be constructed or deleted + RootData() = delete; + RootData(const RootData&) = delete; + RootData& operator=(const RootData&) = delete; + ~RootData() = delete; +}; // RootData + +// --------------------------> RootNode <------------------------------------ + +/// @brief Top-most node of the VDB tree structure. +template +class RootNode : public RootData +{ +public: + using DataType = RootData; + using ChildNodeType = ChildT; + using RootType = RootNode; // this allows RootNode to behave like a Tree + using RootNodeType = RootType; + using UpperNodeType = ChildT; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename ChildT::LeafNodeType; + using ValueType = typename DataType::ValueT; + using FloatType = typename DataType::StatsT; + using BuildType = typename DataType::BuildT; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + + using CoordType = typename ChildT::CoordType; + using BBoxType = math::BBox; + using AccessorType = DefaultReadAccessor; + using Tile = typename DataType::Tile; + static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; + + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + + template + class BaseIter + { + protected: + using DataT = typename util::match_const::type; + using TileT = typename util::match_const::type; + DataT* mData; + uint32_t mPos, mSize; + __hostdev__ BaseIter(DataT* data = nullptr, uint32_t n = 0) + : mData(data) + , mPos(0) + , mSize(n) + { + } + + public: + __hostdev__ operator bool() const { return mPos < mSize; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ void next() { ++mPos; } + __hostdev__ TileT* tile() const { return mData->tile(mPos); } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); + } + __hostdev__ CoordType getCoord() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); + } + }; // Member class BaseIter + + template + class ChildIter : public BaseIter + { + static_assert(util::is_same::type, RootNode>::value, "Invalid RootT"); + using BaseT = BaseIter; + using NodeT = typename util::match_const::type; + + public: + __hostdev__ ChildIter() + : BaseT() + { + } + __hostdev__ ChildIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isChild()) + this->next(); + } + __hostdev__ NodeT& operator*() const + { + NANOVDB_ASSERT(*this); + return *BaseT::mData->getChild(this->tile()); + } + __hostdev__ NodeT* operator->() const + { + NANOVDB_ASSERT(*this); + return BaseT::mData->getChild(this->tile()); + } + __hostdev__ ChildIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isValue()) + this->next(); + return *this; + } + __hostdev__ ChildIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ChildIter + + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; + + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + + template + class ValueIter : public BaseIter + { + using BaseT = BaseIter; + + public: + __hostdev__ ValueIter() + : BaseT() + { + } + __hostdev__ ValueIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && this->tile()->isChild()) + this->next(); + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ ValueIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isChild()) + this->next(); + return *this; + } + __hostdev__ ValueIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIter + + using ValueIterator = ValueIter; + using ConstValueIterator = ValueIter; + + __hostdev__ ValueIterator beginValue() { return ValueIterator(this); } + __hostdev__ ConstValueIterator cbeginValueAll() const { return ConstValueIterator(this); } + + template + class ValueOnIter : public BaseIter + { + using BaseT = BaseIter; + + public: + __hostdev__ ValueOnIter() + : BaseT() + { + } + __hostdev__ ValueOnIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isActive()) + ++BaseT::mPos; + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ ValueOnIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && !this->tile()->isActive()) + this->next(); + return *this; + } + __hostdev__ ValueOnIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueOnIter + + using ValueOnIterator = ValueOnIter; + using ConstValueOnIterator = ValueOnIter; + + __hostdev__ ValueOnIterator beginValueOn() { return ValueOnIterator(this); } + __hostdev__ ConstValueOnIterator cbeginValueOn() const { return ConstValueOnIterator(this); } + + template + class DenseIter : public BaseIter + { + using BaseT = BaseIter; + using NodeT = typename util::match_const::type; + + public: + __hostdev__ DenseIter() + : BaseT() + { + } + __hostdev__ DenseIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + } + __hostdev__ NodeT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(*this); + NodeT* child = nullptr; + auto* t = this->tile(); + if (t->isChild()) { + child = BaseT::mData->getChild(t); + } else { + value = t->value; + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ DenseIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + return *this; + } + __hostdev__ DenseIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class DenseIter + + using DenseIterator = DenseIter; + using ConstDenseIterator = DenseIter; + + __hostdev__ DenseIterator beginDense() { return DenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginDense() const { return ConstDenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginChildAll() const { return ConstDenseIterator(this); } + + /// @brief This class cannot be constructed or deleted + RootNode() = delete; + RootNode(const RootNode&) = delete; + RootNode& operator=(const RootNode&) = delete; + ~RootNode() = delete; + + __hostdev__ AccessorType getAccessor() const { return AccessorType(*this); } + + __hostdev__ DataType* data() { return reinterpret_cast(this); } + + __hostdev__ const DataType* data() const { return reinterpret_cast(this); } + + /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ const BBoxType& bbox() const { return DataType::mBBox; } + + /// @brief Return the total number of active voxels in the root and all its child nodes. + + /// @brief Return a const reference to the background value, i.e. the value associated with + /// any coordinate location that has not been set explicitly. + __hostdev__ const ValueType& background() const { return DataType::mBackground; } + + /// @brief Return the number of tiles encoded in this root node + __hostdev__ const uint32_t& tileCount() const { return DataType::mTableSize; } + __hostdev__ const uint32_t& getTableSize() const { return DataType::mTableSize; } + + /// @brief Return a const reference to the minimum active value encoded in this root node and any of its child nodes + __hostdev__ const ValueType& minimum() const { return DataType::mMinimum; } + + /// @brief Return a const reference to the maximum active value encoded in this root node and any of its child nodes + __hostdev__ const ValueType& maximum() const { return DataType::mMaximum; } + + /// @brief Return a const reference to the average of all the active values encoded in this root node and any of its child nodes + __hostdev__ const FloatType& average() const { return DataType::mAverage; } + + /// @brief Return the variance of all the active values encoded in this root node and any of its child nodes + __hostdev__ FloatType variance() const { return math::Pow2(DataType::mStdDevi); } + + /// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes + __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } + + /// @brief Return the expected memory footprint in bytes with the specified number of tiles + __hostdev__ static uint64_t memUsage(uint32_t tableSize) { return sizeof(RootNode) + tableSize * sizeof(Tile); } + + /// @brief Return the actual memory footprint of this root node + __hostdev__ uint64_t memUsage() const { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); } + + /// @brief Return true if this RootNode is empty, i.e. contains no values or nodes + __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + + /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + if (const Tile* tile = DataType::probeTile(ijk)) { + return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value; + } + return DataType::mBackground; + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->getValue(CoordType(i, j, k)); } + + __hostdev__ bool isActive(const CoordType& ijk) const + { + if (const Tile* tile = DataType::probeTile(ijk)) { + return tile->isChild() ? this->getChild(tile)->isActive(ijk) : tile->state; + } + return false; + } + + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { + if (const Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) { + const auto* child = this->getChild(tile); + return child->probeValue(ijk, v); + } + v = tile->value; + return tile->state; + } + v = DataType::mBackground; + return false; + } + + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const + { + const Tile* tile = DataType::probeTile(ijk); + if (tile && tile->isChild()) { + const auto* child = this->getChild(tile); + return child->probeLeaf(ijk); + } + return nullptr; + } + +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const + { + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; + } + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; + } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template get(ijk, args...); + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template set(ijk, args...); + return OpT::set(*tile, args...); + } + return OpT::set(*this, args...); + } + +private: + static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData) is misaligned"); + static_assert(sizeof(typename DataType::Tile) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData::Tile) is misaligned"); + + template + friend class ReadAccessor; + + template + friend class Tree; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Private method to return node information and update a ReadAccessor + template + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const + { + using NodeInfoT = typename AccT::NodeInfo; + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->getNodeInfoAndCache(ijk, acc); + } + return NodeInfoT{LEVEL, ChildT::dim(), tile->value, tile->value, tile->value, 0, tile->origin(), tile->origin() + CoordType(ChildT::DIM)}; + } + return NodeInfoT{LEVEL, ChildT::dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + } + + /// @brief Private method to return a voxel value and update a ReadAccessor + template + __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->getValueAndCache(ijk, acc); + } + return tile->value; + } + return DataType::mBackground; + } + + template + __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const + { + const Tile* tile = this->probeTile(ijk); + if (tile && tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->isActiveAndCache(ijk, acc); + } + return false; + } + + template + __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->probeValueAndCache(ijk, v, acc); + } + v = tile->value; + return tile->state; + } + v = DataType::mBackground; + return false; + } + + template + __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const + { + const Tile* tile = this->probeTile(ijk); + if (tile && tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->probeLeafAndCache(ijk, acc); + } + return nullptr; + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const auto* child = this->getChild(tile); + acc.insert(ijk, child); + return child->getDimAndCache(ijk, ray, acc); + } + return 1 << ChildT::TOTAL; //tile value + } + return ChildNodeType::dim(); // background + } + + template + //__hostdev__ decltype(OpT::get(util::declval(), util::declval()...)) + __hostdev__ auto + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) { + ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + return OpT::set(*tile, args...); + } + return OpT::set(*this, args...); + } + +}; // RootNode class + +// After the RootNode the memory layout is assumed to be the sorted Tiles + +// --------------------------> InternalNode <------------------------------------ + +/// @brief Struct with all the member data of the InternalNode (useful during serialization of an openvdb InternalNode) +/// +/// @note No client code should (or can) interface with this struct so it can safely be ignored! +template +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData +{ + using ValueT = typename ChildT::ValueType; + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using StatsT = typename ChildT::FloatType; + using CoordT = typename ChildT::CoordType; + using MaskT = typename ChildT::template MaskType; + static constexpr bool FIXED_SIZE = true; + + union Tile + { + ValueT value; + int64_t child; //signed 64 bit byte offset relative to this InternalData, i.e. child-pointer = Tile::child + this + /// @brief This class cannot be constructed or deleted + Tile() = delete; + Tile(const Tile&) = delete; + Tile& operator=(const Tile&) = delete; + ~Tile() = delete; + }; + + math::BBox mBBox; // 24B. node bounding box. | + uint64_t mFlags; // 8B. node flags. | 32B aligned + MaskT mValueMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned + MaskT mChildMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned + + ValueT mMinimum; // typically 4B + ValueT mMaximum; // typically 4B + StatsT mAverage; // typically 4B, average of all the active values in this node and its child nodes + StatsT mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes + // possible padding, e.g. 28 byte padding when ValueType = bool + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(InternalData) - (24u + 8u + 2 * (sizeof(MaskT) + sizeof(ValueT) + sizeof(StatsT)) + (1u << (3 * LOG2DIM)) * (sizeof(ValueT) > 8u ? sizeof(ValueT) : 8u)); + } + alignas(32) Tile mTable[1u << (3 * LOG2DIM)]; // sizeof(ValueT) x (16*16*16 or 32*32*32) + + __hostdev__ static uint64_t memUsage() { return sizeof(InternalData); } + + __hostdev__ void setChild(uint32_t n, const void* ptr) + { + NANOVDB_ASSERT(mChildMask.isOn(n)); + mTable[n].child = util::PtrDiff(ptr, this); + } + + template + __hostdev__ void setValue(uint32_t n, const ValueT& v) + { + NANOVDB_ASSERT(!mChildMask.isOn(n)); + mTable[n].value = v; + } + + /// @brief Returns a pointer to the child node at the specifed linear offset. + __hostdev__ ChildT* getChild(uint32_t n) + { + NANOVDB_ASSERT(mChildMask.isOn(n)); + return util::PtrAdd(this, mTable[n].child); + } + __hostdev__ const ChildT* getChild(uint32_t n) const + { + NANOVDB_ASSERT(mChildMask.isOn(n)); + return util::PtrAdd(this, mTable[n].child); + } + + __hostdev__ ValueT getValue(uint32_t n) const + { + NANOVDB_ASSERT(mChildMask.isOff(n)); + return mTable[n].value; + } + + __hostdev__ bool isActive(uint32_t n) const + { + NANOVDB_ASSERT(mChildMask.isOff(n)); + return mValueMask.isOn(n); + } + + __hostdev__ bool isChild(uint32_t n) const { return mChildMask.isOn(n); } + + template + __hostdev__ void setOrigin(const T& ijk) { mBBox[0] = ijk; } + + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } + +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } + __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } + __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } + __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) +#pragma GCC diagnostic pop +#endif + + /// @brief This class cannot be constructed or deleted + InternalData() = delete; + InternalData(const InternalData&) = delete; + InternalData& operator=(const InternalData&) = delete; + ~InternalData() = delete; +}; // InternalData + +/// @brief Internal nodes of a VDB tree +template +class InternalNode : public InternalData +{ +public: + using DataType = InternalData; + using ValueType = typename DataType::ValueT; + using FloatType = typename DataType::StatsT; + using BuildType = typename DataType::BuildT; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using LeafNodeType = typename ChildT::LeafNodeType; + using ChildNodeType = ChildT; + using CoordType = typename ChildT::CoordType; + static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; + template + using MaskType = typename ChildT::template MaskType; + template + using MaskIterT = typename Mask::template Iterator; + + static constexpr uint32_t LOG2DIM = Log2Dim; + static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space + static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node + static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); // number of tile values (or child pointers) + static constexpr uint32_t MASK = (1u << TOTAL) - 1u; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + + /// @brief Visits child nodes of this node only + template + class ChildIter : public MaskIterT + { + static_assert(util::is_same::type, InternalNode>::value, "Invalid ParentT"); + using BaseT = MaskIterT; + using NodeT = typename util::match_const::type; + ParentT* mParent; + + public: + __hostdev__ ChildIter() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ChildIter(ParentT* parent) + : BaseT(parent->mChildMask.beginOn()) + , mParent(parent) + { + } + ChildIter& operator=(const ChildIter&) = default; + __hostdev__ NodeT& operator*() const + { + NANOVDB_ASSERT(*this); + return *mParent->getChild(BaseT::pos()); + } + __hostdev__ NodeT* operator->() const + { + NANOVDB_ASSERT(*this); + return mParent->getChild(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return (*this)->origin(); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class ChildIter + + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; + + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + + /// @brief Visits all tile values in this node, i.e. both inactive and active tiles + class ValueIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode* mParent; + + public: + __hostdev__ ValueIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueIterator(const InternalNode* parent) + : BaseT(parent->data()->mChildMask.beginOff()) + , mParent(parent) + { + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->isActive(BaseT::mPos); + } + }; // Member class ValueIterator + + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } + + /// @brief Visits active tile values of this node only + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode* mParent; + + public: + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const InternalNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class ValueOnIterator + + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public Mask::DenseIterator + { + using BaseT = typename Mask::DenseIterator; + const DataType* mParent; + + public: + __hostdev__ DenseIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ DenseIterator(const InternalNode* parent) + : BaseT(0) + , mParent(parent->data()) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ const ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + const ChildT* child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->getChild(BaseT::pos()); + } else { + value = mParent->getValue(BaseT::pos()); + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->isActive(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class DenseIterator + + __hostdev__ DenseIterator beginDense() const { return DenseIterator(this); } + __hostdev__ DenseIterator cbeginChildAll() const { return DenseIterator(this); } // matches openvdb + + /// @brief This class cannot be constructed or deleted + InternalNode() = delete; + InternalNode(const InternalNode&) = delete; + InternalNode& operator=(const InternalNode&) = delete; + ~InternalNode() = delete; + + __hostdev__ DataType* data() { return reinterpret_cast(this); } + + __hostdev__ const DataType* data() const { return reinterpret_cast(this); } + + /// @brief Return the dimension, in voxel units, of this internal node (typically 8*16 or 8*16*32) + __hostdev__ static uint32_t dim() { return 1u << TOTAL; } + + /// @brief Return memory usage in bytes for the class + __hostdev__ static size_t memUsage() { return DataType::memUsage(); } + + /// @brief Return a const reference to the bit mask of active voxels in this internal node + __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } + + /// @brief Return a const reference to the bit mask of child nodes in this internal node + __hostdev__ const MaskType& childMask() const { return DataType::mChildMask; } + __hostdev__ const MaskType& getChildMask() const { return DataType::mChildMask; } + + /// @brief Return the origin in index space of this leaf node + __hostdev__ CoordType origin() const { return DataType::mBBox.min() & ~MASK; } + + /// @brief Return a const reference to the minimum active value encoded in this internal node and any of its child nodes + __hostdev__ const ValueType& minimum() const { return this->getMin(); } + + /// @brief Return a const reference to the maximum active value encoded in this internal node and any of its child nodes + __hostdev__ const ValueType& maximum() const { return this->getMax(); } + + /// @brief Return a const reference to the average of all the active values encoded in this internal node and any of its child nodes + __hostdev__ const FloatType& average() const { return DataType::mAverage; } + + /// @brief Return the variance of all the active values encoded in this internal node and any of its child nodes + __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; } + + /// @brief Return a const reference to the standard deviation of all the active values encoded in this internal node and any of its child nodes + __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } + + /// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes + __hostdev__ const math::BBox& bbox() const { return DataType::mBBox; } + + /// @brief If the first entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getFirstValue() on the child. + __hostdev__ ValueType getFirstValue() const + { + return DataType::mChildMask.isOn(0) ? this->getChild(0)->getFirstValue() : DataType::getValue(0); + } + + /// @brief If the last entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getLastValue() on the child. + __hostdev__ ValueType getLastValue() const + { + return DataType::mChildMask.isOn(SIZE - 1) ? this->getChild(SIZE - 1)->getLastValue() : DataType::getValue(SIZE - 1); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::getValue(n); + } + __hostdev__ bool isActive(const CoordType& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n); + } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOn(n)) + return this->getChild(n)->probeValue(ijk, v); + v = DataType::getValue(n); + return DataType::isActive(n); + } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOn(n)) + return this->getChild(n)->probeLeaf(ijk); + return nullptr; + } + +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr; + } + __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr; + } + + /// @brief Return the linear offset corresponding to the given coordinate + __hostdev__ static uint32_t CoordToOffset(const CoordType& ijk) + { + return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) | // note, we're using bitwise OR instead of + + (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) | + ((ijk[2] & MASK) >> ChildT::TOTAL); + } + + /// @return the local coordinate of the n'th tile or child node + __hostdev__ static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const uint32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & ((1 << LOG2DIM) - 1)); + } + + /// @brief modifies local coordinates to global coordinates of a tile or child node + __hostdev__ void localToGlobalCoord(Coord& ijk) const + { + ijk <<= ChildT::TOTAL; + ijk += this->origin(); + } + + __hostdev__ Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = InternalNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + /// @brief Return true if this node or any of its child nodes contain active values + __hostdev__ bool isActive() const { return DataType::mFlags & uint32_t(2); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template set(ijk, args...); + return OpT::set(*this, n, args...); + } + +private: + static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned"); + + template + friend class ReadAccessor; + + template + friend class RootNode; + template + friend class InternalNode; + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Private read access method used by the ReadAccessor + template + __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return DataType::getValue(n); + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->getValueAndCache(ijk, acc); + } + template + __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return DataType::isActive(n); + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->isActiveAndCache(ijk, acc); + } + template + __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) { + v = DataType::getValue(n); + return DataType::isActive(n); + } + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->probeValueAndCache(ijk, v, acc); + } + template + __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return nullptr; + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->probeLeafAndCache(ijk, acc); + } + template + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const + { + using NodeInfoT = typename AccT::NodeInfo; + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) { + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + } + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->getNodeInfoAndCache(ijk, acc); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const + { + if (DataType::mFlags & uint32_t(1u)) + return this->dim(); // skip this node if the 1st bit is set + //if (!ray.intersects( this->bbox() )) return 1<getChild(n); + acc.insert(ijk, child); + return child->getDimAndCache(ijk, ray, acc); + } + return ChildNodeType::dim(); // tile value + } + + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::get(*this, n, args...); + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::set(*this, n, args...); + ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + +}; // InternalNode class + +// --------------------------> LeafData <------------------------------------ + +/// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) +/// +/// @note No client code should (or can) interface with this struct so it can safely be ignored! +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = ValueT; + using BuildType = ValueT; + using FloatType = typename FloatTraits::FloatType; + using ArrayType = ValueT; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + + ValueType mMinimum; // typically 4B + ValueType mMaximum; // typically 4B + FloatType mAverage; // typically 4B, average of all the active values in this node and its child nodes + FloatType mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes + alignas(32) ValueType mValues[1u << 3 * LOG2DIM]; + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12 + 3 + 1 + sizeof(MaskT) + 2 * (sizeof(ValueT) + sizeof(FloatType)) + (1u << (3 * LOG2DIM)) * sizeof(ValueT)); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + + __hostdev__ static bool hasStats() { return true; } + + __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; } + __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, const ValueType& value) + { + mValueMask.setOn(offset); + mValues[offset] = value; + } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + + __hostdev__ ValueType getMin() const { return mMinimum; } + __hostdev__ ValueType getMax() const { return mMaximum; } + __hostdev__ FloatType getAvg() const { return mAverage; } + __hostdev__ FloatType getDev() const { return mStdDevi; } + + __hostdev__ void setMin(const ValueType& v) { mMinimum = v; } + __hostdev__ void setMax(const ValueType& v) { mMaximum = v; } + __hostdev__ void setAvg(const FloatType& v) { mAverage = v; } + __hostdev__ void setDev(const FloatType& v) { mStdDevi = v; } + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + __hostdev__ void fill(const ValueType& v) + { + for (auto *p = mValues, *q = p + 512; p != q; ++p) + *p = v; + } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafFnBase <------------------------------------ + +/// @brief Base-class for quantized float leaf nodes +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = float; + using FloatType = float; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + + float mMinimum; // 4B - minimum of ALL values in this node + float mQuantum; // = (max - min)/15 4B + uint16_t mMin, mMax, mAvg, mDev; // quantized representations of statistics of active values + // no padding since it's always 32B aligned + __hostdev__ static uint64_t memUsage() { return sizeof(LeafFnBase); } + + __hostdev__ static bool hasStats() { return true; } + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT) + 2 * 4 + 4 * 2); + } + __hostdev__ void init(float min, float max, uint8_t bitWidth) + { + mMinimum = min; + mQuantum = (max - min) / float((1 << bitWidth) - 1); + } + + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + + /// @brief return the quantized minimum of the active values in this node + __hostdev__ float getMin() const { return mMin * mQuantum + mMinimum; } + + /// @brief return the quantized maximum of the active values in this node + __hostdev__ float getMax() const { return mMax * mQuantum + mMinimum; } + + /// @brief return the quantized average of the active values in this node + __hostdev__ float getAvg() const { return mAvg * mQuantum + mMinimum; } + /// @brief return the quantized standard deviation of the active values in this node + + /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 + __hostdev__ float getDev() const { return mDev * mQuantum; } + + /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 + __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum) / mQuantum + 0.5f); } + + /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 + __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum) / mQuantum + 0.5f); } + + /// @note min <= avg <= max or 0 <= (avg-min)/(min-max) <= 1 + __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum) / mQuantum + 0.5f); } + + /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 + __hostdev__ void setDev(float dev) { mDev = uint16_t(dev / mQuantum + 0.5f); } + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } +}; // LeafFnBase + +// --------------------------> LeafData <------------------------------------ + +/// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) +/// +/// @note No client code should (or can) interface with this struct so it can safely be ignored! +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafFnBase +{ + using BaseT = LeafFnBase; + using BuildType = Fp4; + using ArrayType = uint8_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)]; // LeafFnBase is 32B aligned and so is mCode + + __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - (1u << (3 * LOG2DIM - 1)); + } + + __hostdev__ static constexpr uint8_t bitWidth() { return 4u; } + __hostdev__ float getValue(uint32_t i) const + { +#if 0 + const uint8_t c = mCode[i>>1]; + return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum; +#else + return ((mCode[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)) * BaseT::mQuantum + BaseT::mMinimum; +#endif + } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafBase <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafFnBase +{ + using BaseT = LeafFnBase; + using BuildType = Fp8; + using ArrayType = uint8_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + alignas(32) uint8_t mCode[1u << 3 * LOG2DIM]; + __hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - (1u << 3 * LOG2DIM); + } + + __hostdev__ static constexpr uint8_t bitWidth() { return 8u; } + __hostdev__ float getValue(uint32_t i) const + { + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/255 + min + } + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafFnBase +{ + using BaseT = LeafFnBase; + using BuildType = Fp16; + using ArrayType = uint16_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + alignas(32) uint16_t mCode[1u << 3 * LOG2DIM]; + + __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - 2 * (1u << 3 * LOG2DIM); + } + + __hostdev__ static constexpr uint8_t bitWidth() { return 16u; } + __hostdev__ float getValue(uint32_t i) const + { + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/65535 + min + } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafFnBase +{ // this class has no additional data members, however every instance is immediately followed by + // bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes + using BaseT = LeafFnBase; + using BuildType = FpN; + static constexpr bool FIXED_SIZE = false; + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return 0; + } + + __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); } // 4,8,16,32 = 2^(2,3,4,5) + __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth() * 64; } + __hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth * 64; } + __hostdev__ float getValue(uint32_t i) const + { +#ifdef NANOVDB_FPN_BRANCHLESS // faster + const int b = BaseT::mFlags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits +#if 0 // use LUT + uint16_t code = reinterpret_cast(this + 1)[i >> (4 - b)]; + const static uint8_t shift[5] = {15, 7, 3, 1, 0}; + const static uint16_t mask[5] = {1, 3, 15, 255, 65535}; + code >>= (i & shift[b]) << b; + code &= mask[b]; +#else // no LUT + uint32_t code = reinterpret_cast(this + 1)[i >> (5 - b)]; + code >>= (i & ((32 >> b) - 1)) << b; + code &= (1 << (1 << b)) - 1; +#endif +#else // use branched version (slow) + float code; + auto* values = reinterpret_cast(this + 1); + switch (BaseT::mFlags >> 5) { + case 0u: // 1 bit float + code = float((values[i >> 3] >> (i & 7)) & uint8_t(1)); + break; + case 1u: // 2 bits float + code = float((values[i >> 2] >> ((i & 3) << 1)) & uint8_t(3)); + break; + case 2u: // 4 bits float + code = float((values[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)); + break; + case 3u: // 8 bits float + code = float(values[i]); + break; + default: // 16 bits float + code = float(reinterpret_cast(values)[i]); + } +#endif + return float(code) * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/UNITS + min + } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +// Partial template specialization of LeafData with bool +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = bool; + using BuildType = bool; + using FloatType = bool; // dummy value type + using ArrayType = MaskT; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + MaskT mValues; // LOG2DIM(3): 64B. + uint64_t mPadding[2]; // 16B padding to 32B alignment + + __hostdev__ static constexpr uint32_t padding() { return sizeof(LeafData) - 12u - 3u - 1u - 2 * sizeof(MaskT) - 16u; } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static bool hasStats() { return false; } + __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); } + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy + __hostdev__ void setValue(uint32_t offset, bool v) + { + mValueMask.setOn(offset); + mValues.set(offset, v); + } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const bool&) {} // no-op + __hostdev__ void setMax(const bool&) {} // no-op + __hostdev__ void setAvg(const bool&) {} // no-op + __hostdev__ void setDev(const bool&) {} // no-op + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +// Partial template specialization of LeafData with ValueMask +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = bool; + using BuildType = ValueMask; + using FloatType = bool; // dummy value type + using ArrayType = void; // type used for the internal mValue array - void means missing + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + uint64_t mPadding[2]; // 16B padding to 32B alignment + + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static bool hasStats() { return false; } + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); + } + + __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); } + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy + __hostdev__ void setValue(uint32_t offset, bool) { mValueMask.setOn(offset); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafIndexBase <------------------------------------ + +// Partial template specialization of LeafData with ValueIndex +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafIndexBase +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = uint64_t; + using FloatType = uint64_t; + using ArrayType = void; // type used for the internal mValue array - void means missing + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + uint64_t mOffset, mPrefixSum; // 8B offset to first value in this leaf node and 9-bit prefix sum + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafIndexBase) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafIndexBase); } + __hostdev__ bool hasStats() const { return mFlags & (uint8_t(1) << 4); } + // return the offset to the first value indexed by this leaf node + __hostdev__ const uint64_t& firstOffset() const { return mOffset; } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + +protected: + /// @brief This class should be used as an abstract class and only constructed or deleted via child classes + LeafIndexBase() = default; + LeafIndexBase(const LeafIndexBase&) = default; + LeafIndexBase& operator=(const LeafIndexBase&) = default; + ~LeafIndexBase() = default; +}; // LeafIndexBase + +// --------------------------> LeafData <------------------------------------ + +// Partial template specialization of LeafData with ValueIndex +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueIndex; + // return the total number of values indexed by this leaf node, excluding the optional 4 stats + __hostdev__ static uint32_t valueCount() { return uint32_t(512); } // 8^3 = 2^9 + // return the offset to the last value indexed by this leaf node (disregarding optional stats) + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + 511u; } // 2^9 - 1 + // if stats are available, they are always placed after the last voxel value in this leaf node + __hostdev__ uint64_t getMin() const { return this->hasStats() ? BaseT::mOffset + 512u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? BaseT::mOffset + 513u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? BaseT::mOffset + 514u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? BaseT::mOffset + 515u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const { return BaseT::mOffset + i; } // dense leaf node with active and inactive voxels +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueOnIndex; + __hostdev__ uint32_t valueCount() const + { + return util::countOn(BaseT::mValueMask.words()[7]) + (BaseT::mPrefixSum >> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask + } + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + this->valueCount() - 1u; } + __hostdev__ uint64_t getMin() const { return this->hasStats() ? this->lastOffset() + 1u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? this->lastOffset() + 2u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? this->lastOffset() + 3u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? this->lastOffset() + 4u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const + { + //return mValueMask.isOn(i) ? mOffset + mValueMask.countOn(i) : 0u;// for debugging + uint32_t n = i >> 6; + const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); + if (!(w & mask)) return uint64_t(0); // if i'th value is inactive return offset to background value + uint64_t sum = BaseT::mOffset + util::countOn(w & (mask - 1u)); + if (n--) sum += BaseT::mPrefixSum >> (9u * n) & 511u; + return sum; + } +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueIndexMask; + MaskT mMask; + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueOnIndexMask; + MaskT mMask; + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = uint64_t; + using BuildType = Point; + using FloatType = typename FloatTraits::FloatType; + using ArrayType = uint16_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + + uint64_t mOffset; // 8B + uint64_t mPointCount; // 8B + alignas(32) uint16_t mValues[1u << 3 * LOG2DIM]; // 1KB + // no padding + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u + (1u << 3 * LOG2DIM) * 2u); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + + __hostdev__ uint64_t offset() const { return mOffset; } + __hostdev__ uint64_t pointCount() const { return mPointCount; } + __hostdev__ uint64_t first(uint32_t i) const { return i ? uint64_t(mValues[i - 1u]) + mOffset : mOffset; } + __hostdev__ uint64_t last(uint32_t i) const { return uint64_t(mValues[i]) + mOffset; } + __hostdev__ uint64_t getValue(uint32_t i) const { return uint64_t(mValues[i]); } + __hostdev__ void setValueOnly(uint32_t offset, uint16_t value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, uint16_t value) + { + mValueMask.setOn(offset); + mValues[offset] = value; + } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + + __hostdev__ ValueType getMin() const { return mOffset; } + __hostdev__ ValueType getMax() const { return mPointCount; } + __hostdev__ FloatType getAvg() const { return 0.0f; } + __hostdev__ FloatType getDev() const { return 0.0f; } + + __hostdev__ void setMin(const ValueType&) {} + __hostdev__ void setMax(const ValueType&) {} + __hostdev__ void setAvg(const FloatType&) {} + __hostdev__ void setDev(const FloatType&) {} + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafNode <------------------------------------ + +/// @brief Leaf nodes of the VDB tree. (defaults to 8x8x8 = 512 voxels) +template class MaskT = Mask, + uint32_t Log2Dim = 3> +class LeafNode : public LeafData +{ +public: + struct ChildNodeType + { + static constexpr uint32_t TOTAL = 0; + static constexpr uint32_t DIM = 1; + __hostdev__ static uint32_t dim() { return 1u; } + }; // Voxel + using LeafNodeType = LeafNode; + using DataType = LeafData; + using ValueType = typename DataType::ValueType; + using FloatType = typename DataType::FloatType; + using BuildType = typename DataType::BuildType; + using CoordType = CoordT; + static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; + template + using MaskType = MaskT; + template + using MaskIterT = typename Mask::template Iterator; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode* mParent; + + public: + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + }; // Member class ValueOnIterator + + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode* mParent; + + public: + __hostdev__ ValueOffIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOffIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOff()) + , mParent(parent) + { + } + ValueOffIterator& operator=(const ValueOffIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + }; // Member class ValueOffIterator + + __hostdev__ ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } + __hostdev__ ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode* mParent; + uint32_t mPos; + + public: + __hostdev__ ValueIterator() + : mParent(nullptr) + , mPos(1u << 3 * Log2Dim) + { + } + __hostdev__ ValueIterator(const LeafNode* parent) + : mParent(parent) + , mPos(0) + { + NANOVDB_ASSERT(parent); + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(mPos); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(mPos); + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->isActive(mPos); + } + __hostdev__ operator bool() const { return mPos < (1u << 3 * Log2Dim); } + __hostdev__ ValueIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ ValueIterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } + + static_assert(util::is_same::Type>::value, "Mismatching BuildType"); + static constexpr uint32_t LOG2DIM = Log2Dim; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = (1u << LOG2DIM) - 1u; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + + __hostdev__ DataType* data() { return reinterpret_cast(this); } + + __hostdev__ const DataType* data() const { return reinterpret_cast(this); } + + /// @brief Return a const reference to the bit mask of active voxels in this leaf node + __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } + + /// @brief Return a const reference to the minimum active value encoded in this leaf node + __hostdev__ ValueType minimum() const { return DataType::getMin(); } + + /// @brief Return a const reference to the maximum active value encoded in this leaf node + __hostdev__ ValueType maximum() const { return DataType::getMax(); } + + /// @brief Return a const reference to the average of all the active values encoded in this leaf node + __hostdev__ FloatType average() const { return DataType::getAvg(); } + + /// @brief Return the variance of all the active values encoded in this leaf node + __hostdev__ FloatType variance() const { return Pow2(DataType::getDev()); } + + /// @brief Return a const reference to the standard deviation of all the active values encoded in this leaf node + __hostdev__ FloatType stdDeviation() const { return DataType::getDev(); } + + __hostdev__ uint8_t flags() const { return DataType::mFlags; } + + /// @brief Return the origin in index space of this leaf node + __hostdev__ CoordT origin() const { return DataType::mBBoxMin & ~MASK; } + + /// @brief Compute the local coordinates from a linear offset + /// @param n Linear offset into this nodes dense table + /// @return Local (vs global) 3D coordinates + __hostdev__ static CoordT OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const uint32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return CoordT(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + } + + /// @brief Converts (in place) a local index coordinate to a global index coordinate + __hostdev__ void localToGlobalCoord(Coord& ijk) const { ijk += this->origin(); } + + __hostdev__ CoordT offsetToGlobalCoord(uint32_t n) const + { + return OffsetToLocalCoord(n) + this->origin(); + } + + /// @brief Return the dimension, in index space, of this leaf node (typically 8 as for openvdb leaf nodes!) + __hostdev__ static uint32_t dim() { return 1u << LOG2DIM; } + + /// @brief Return the bounding box in index space of active values in this leaf node + __hostdev__ math::BBox bbox() const + { + math::BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); + if (this->hasBBox()) { + bbox.max()[0] += DataType::mBBoxDif[0]; + bbox.max()[1] += DataType::mBBoxDif[1]; + bbox.max()[2] += DataType::mBBoxDif[2]; + } else { // very rare case + bbox = math::BBox(); // invalid + } + return bbox; + } + + /// @brief Return the total number of voxels (e.g. values) encoded in this leaf node + __hostdev__ static uint32_t voxelCount() { return 1u << (3 * LOG2DIM); } + + __hostdev__ static uint32_t padding() { return DataType::padding(); } + + /// @brief return memory usage in bytes for the leaf node + __hostdev__ uint64_t memUsage() const { return DataType::memUsage(); } + + /// @brief This class cannot be constructed or deleted + LeafNode() = delete; + LeafNode(const LeafNode&) = delete; + LeafNode& operator=(const LeafNode&) = delete; + ~LeafNode() = delete; + + /// @brief Return the voxel value at the given offset. + __hostdev__ ValueType getValue(uint32_t offset) const { return DataType::getValue(offset); } + + /// @brief Return the voxel value at the given coordinate. + __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); } + + /// @brief Return the first value in this leaf node. + __hostdev__ ValueType getFirstValue() const { return this->getValue(0); } + /// @brief Return the last value in this leaf node. + __hostdev__ ValueType getLastValue() const { return this->getValue(SIZE - 1); } + + /// @brief Sets the value at the specified location and activate its state. + /// + /// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes) + __hostdev__ void setValue(const CoordT& ijk, const ValueType& v) { DataType::setValue(CoordToOffset(ijk), v); } + + /// @brief Sets the value at the specified location but leaves its state unchanged. + /// + /// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes) + __hostdev__ void setValueOnly(uint32_t offset, const ValueType& v) { DataType::setValueOnly(offset, v); } + __hostdev__ void setValueOnly(const CoordT& ijk, const ValueType& v) { DataType::setValueOnly(CoordToOffset(ijk), v); } + + /// @brief Return @c true if the voxel value at the given coordinate is active. + __hostdev__ bool isActive(const CoordT& ijk) const { return DataType::mValueMask.isOn(CoordToOffset(ijk)); } + __hostdev__ bool isActive(uint32_t n) const { return DataType::mValueMask.isOn(n); } + + /// @brief Return @c true if any of the voxel value are active in this leaf node. + __hostdev__ bool isActive() const + { + //NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != DataType::mValueMask.isOff() ); + //return DataType::mFlags & uint8_t(2); + return !DataType::mValueMask.isOff(); + } + + __hostdev__ bool hasBBox() const { return DataType::mFlags & uint8_t(2); } + + /// @brief Return @c true if the voxel value at the given coordinate is active and updates @c v with the value. + __hostdev__ bool probeValue(const CoordT& ijk, ValueType& v) const + { + const uint32_t n = CoordToOffset(ijk); + v = DataType::getValue(n); + return DataType::mValueMask.isOn(n); + } + + __hostdev__ const LeafNode* probeLeaf(const CoordT&) const { return this; } + + /// @brief Return the linear offset corresponding to the given coordinate + __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk) + { + return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK); + } + + /// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated. + /// + /// @warning It assumes that the origin and value mask have already been set. + /// + /// @details This method is based on few (intrinsic) bit operations and hence is relatively fast. + /// However, it should only only be called if either the value mask has changed or if the + /// active bounding box is still undefined. e.g. during construction of this node. + __hostdev__ bool updateBBox(); + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto get(const uint32_t n, ArgsT&&... args) const + { + return OpT::get(*this, n, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto set(const uint32_t n, ArgsT&&... args) + { + return OpT::set(*this, n, args...); + } + +private: + static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned"); + + template + friend class ReadAccessor; + + template + friend class RootNode; + template + friend class InternalNode; + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Private method to return a voxel value and update a (dummy) ReadAccessor + template + __hostdev__ ValueType getValueAndCache(const CoordT& ijk, const AccT&) const { return this->getValue(ijk); } + + /// @brief Return the node information. + template + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const + { + using NodeInfoT = typename AccT::NodeInfo; + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + } + + template + __hostdev__ bool isActiveAndCache(const CoordT& ijk, const AccT&) const { return this->isActive(ijk); } + + template + __hostdev__ bool probeValueAndCache(const CoordT& ijk, ValueType& v, const AccT&) const { return this->probeValue(ijk, v); } + + template + __hostdev__ const LeafNode* probeLeafAndCache(const CoordT&, const AccT&) const { return this; } +#endif + + template + __hostdev__ uint32_t getDimAndCache(const CoordT&, const RayT& /*ray*/, const AccT&) const + { + if (DataType::mFlags & uint8_t(1u)) + return this->dim(); // skip this node if the 1st bit is set + + //if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM; + return ChildNodeType::dim(); + } + + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) + getAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + +}; // LeafNode class + +// --------------------------> LeafNode::updateBBox <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +__hostdev__ inline bool LeafNode::updateBBox() +{ + static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!"); + if (DataType::mValueMask.isOff()) { + DataType::mFlags &= ~uint8_t(2); // set 2nd bit off, which indicates that this nodes has no bbox + return false; + } + auto update = [&](uint32_t min, uint32_t max, int axis) { + NANOVDB_ASSERT(min <= max && max < 8); + DataType::mBBoxMin[axis] = (DataType::mBBoxMin[axis] & ~MASK) + int(min); + DataType::mBBoxDif[axis] = uint8_t(max - min); + }; + uint64_t *w = DataType::mValueMask.words(), word64 = *w; + uint32_t Xmin = word64 ? 0u : 8u, Xmax = Xmin; + for (int i = 1; i < 8; ++i) { // last loop over 8 64 bit words + if (w[i]) { // skip if word has no set bits + word64 |= w[i]; // union 8 x 64 bits words into one 64 bit word + if (Xmin == 8) + Xmin = i; // only set once + Xmax = i; + } + } + NANOVDB_ASSERT(word64); + update(Xmin, Xmax, 0); + update(util::findLowestOn(word64) >> 3, util::findHighestOn(word64) >> 3, 1); + const uint32_t *p = reinterpret_cast(&word64), word32 = p[0] | p[1]; + const uint16_t *q = reinterpret_cast(&word32), word16 = q[0] | q[1]; + const uint8_t *b = reinterpret_cast(&word16), byte = b[0] | b[1]; + NANOVDB_ASSERT(byte); + update(util::findLowestOn(static_cast(byte)), util::findHighestOn(static_cast(byte)), 2); + DataType::mFlags |= uint8_t(2); // set 2nd bit on, which indicates that this nodes has a bbox + return true; +} // LeafNode::updateBBox + +// --------------------------> Template specializations and traits <------------------------------------ + +/// @brief Template specializations to the default configuration used in OpenVDB: +/// Root -> 32^3 -> 16^3 -> 8^3 +template +using NanoLeaf = LeafNode; +template +using NanoLower = InternalNode, 4>; +template +using NanoUpper = InternalNode, 5>; +template +using NanoRoot = RootNode>; +template +using NanoTree = Tree>; +template +using NanoGrid = Grid>; + +/// @brief Trait to map from LEVEL to node type +template +struct NanoNode; + +// Partial template specialization of above Node struct +template +struct NanoNode +{ + using Type = NanoLeaf; + using type = NanoLeaf; +}; +template +struct NanoNode +{ + using Type = NanoLower; + using type = NanoLower; +}; +template +struct NanoNode +{ + using Type = NanoUpper; + using type = NanoUpper; +}; +template +struct NanoNode +{ + using Type = NanoRoot; + using type = NanoRoot; +}; + +using FloatTree = NanoTree; +using Fp4Tree = NanoTree; +using Fp8Tree = NanoTree; +using Fp16Tree = NanoTree; +using FpNTree = NanoTree; +using DoubleTree = NanoTree; +using Int32Tree = NanoTree; +using UInt32Tree = NanoTree; +using Int64Tree = NanoTree; +using Vec3fTree = NanoTree; +using Vec3dTree = NanoTree; +using Vec4fTree = NanoTree; +using Vec4dTree = NanoTree; +using Vec3ITree = NanoTree; +using MaskTree = NanoTree; +using BoolTree = NanoTree; +using IndexTree = NanoTree; +using OnIndexTree = NanoTree; +using IndexMaskTree = NanoTree; +using OnIndexMaskTree = NanoTree; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; +using DoubleGrid = Grid; +using Int32Grid = Grid; +using UInt32Grid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using Vec3IGrid = Grid; +using MaskGrid = Grid; +using BoolGrid = Grid; +using PointGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using IndexMaskGrid = Grid; +using OnIndexMaskGrid = Grid; + +// --------------------------> callNanoGrid <------------------------------------ + +/** +* @brief Below is an example of the struct used for generic programming with callNanoGrid +* @details For an example see "struct Crc32TailOld" in nanovdb/tools/GridChecksum.h or +* "struct IsNanoGridValid" in nanovdb/tools/GridValidator.h +* @code +* struct OpT { + // define these two static functions with non-const GridData +* template +* static auto known( GridData *gridData, args...); +* static auto unknown( GridData *gridData, args...); +* // or alternatively these two static functions with const GridData +* template +* static auto known(const GridData *gridData, args...); +* static auto unknown(const GridData *gridData, args...); +* }; +* @endcode +* +* @brief Here is an example of how to use callNanoGrid in client code +* @code +* return callNanoGrid(gridData, args...); +* @endcode +*/ + +/// @brief Use this function, which depends a pointer to GridData, to call +/// other functions that depend on a NanoGrid of a known ValueType. +/// @details This function allows for generic programming by converting GridData +/// to a NanoGrid of the type encoded in GridData::mGridType. +template +auto callNanoGrid(GridDataT *gridData, ArgsT&&... args) +{ + static_assert(util::is_same::value, "Expected gridData to be of type GridData* or const GridData*"); + switch (gridData->mGridType){ + case GridType::Float: + return OpT::template known(gridData, args...); + case GridType::Double: + return OpT::template known(gridData, args...); + case GridType::Int16: + return OpT::template known(gridData, args...); + case GridType::Int32: + return OpT::template known(gridData, args...); + case GridType::Int64: + return OpT::template known(gridData, args...); + case GridType::Vec3f: + return OpT::template known(gridData, args...); + case GridType::Vec3d: + return OpT::template known(gridData, args...); + case GridType::UInt32: + return OpT::template known(gridData, args...); + case GridType::Mask: + return OpT::template known(gridData, args...); + case GridType::Index: + return OpT::template known(gridData, args...); + case GridType::OnIndex: + return OpT::template known(gridData, args...); + case GridType::IndexMask: + return OpT::template known(gridData, args...); + case GridType::OnIndexMask: + return OpT::template known(gridData, args...); + case GridType::Boolean: + return OpT::template known(gridData, args...); + case GridType::RGBA8: + return OpT::template known(gridData, args...); + case GridType::Fp4: + return OpT::template known(gridData, args...); + case GridType::Fp8: + return OpT::template known(gridData, args...); + case GridType::Fp16: + return OpT::template known(gridData, args...); + case GridType::FpN: + return OpT::template known(gridData, args...); + case GridType::Vec4f: + return OpT::template known(gridData, args...); + case GridType::Vec4d: + return OpT::template known(gridData, args...); + case GridType::UInt8: + return OpT::template known(gridData, args...); + default: + return OpT::unknown(gridData, args...); + } +}// callNanoGrid + +// --------------------------> ReadAccessor <------------------------------------ + +/// @brief A read-only value accessor with three levels of node caching. This allows for +/// inverse tree traversal during lookup, which is on average significantly faster +/// than calling the equivalent method on the tree (i.e. top-down traversal). +/// +/// @note By virtue of the fact that a value accessor accelerates random access operations +/// by re-using cached access patterns, this access should be reused for multiple access +/// operations. In other words, never create an instance of this accessor for a single +/// access only. In general avoid single access operations with this accessor, and +/// if that is not possible call the corresponding method on the tree instead. +/// +/// @warning Since this ReadAccessor internally caches raw pointers to the nodes of the tree +/// structure, it is not safe to copy between host and device, or even to share among +/// multiple threads on the same host or device. However, it is light-weight so simple +/// instantiate one per thread (on the host and/or device). +/// +/// @details Used to accelerated random access into a VDB tree. Provides on average +/// O(1) random access operations by means of inverse tree traversal, +/// which amortizes the non-const time complexity of the root node. + +template +class ReadAccessor +{ + using GridT = NanoGrid; // grid + using TreeT = NanoTree; // tree + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node + using FloatType = typename RootT::FloatType; + using CoordValueType = typename RootT::CoordType::ValueType; + + mutable const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const) +public: + using BuildType = BuildT; + using ValueType = typename RootT::ValueType; + using CoordType = typename RootT::CoordType; + + static const int CacheLevels = 0; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + struct NodeInfo + { + uint32_t mLevel; // 4B + uint32_t mDim; // 4B + ValueType mMinimum; // typically 4B + ValueType mMaximum; // typically 4B + FloatType mAverage; // typically 4B + FloatType mStdDevi; // typically 4B + CoordType mBBoxMin; // 3*4B + CoordType mBBoxMax; // 3*4B + }; +#endif + /// @brief Constructor from a root node + __hostdev__ ReadAccessor(const RootT& root) + : mRoot{&root} + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + /// @node Noop since this template specialization has no cache + __hostdev__ void clear() {} + + __hostdev__ const RootT& root() const { return *mRoot; } + + /// @brief Defaults constructors + ReadAccessor(const ReadAccessor&) = default; + ~ReadAccessor() = default; + ReadAccessor& operator=(const ReadAccessor&) = default; +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return mRoot->getValueAndCache(ijk, *this); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + + __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const + { + return mRoot->getNodeInfoAndCache(ijk, *this); + } + + __hostdev__ bool isActive(const CoordType& ijk) const + { + return mRoot->isActiveAndCache(ijk, *this); + } + + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { + return mRoot->probeValueAndCache(ijk, v, *this); + } + + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const + { + return mRoot->probeLeafAndCache(ijk, *this); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + template + __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const + { + return mRoot->getDimAndCache(ijk, ray, *this); + } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return mRoot->template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + return const_cast(mRoot)->template set(ijk, args...); + } + +private: + /// @brief Allow nodes to insert themselves into the cache. + template + friend class RootNode; + template + friend class InternalNode; + template class, uint32_t> + friend class LeafNode; + + /// @brief No-op + template + __hostdev__ void insert(const CoordType&, const NodeT*) const {} +}; // ReadAccessor class + +/// @brief Node caching at a single tree level +template +class ReadAccessor //e.g. 0, 1, 2 +{ + static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 should be 0, 1, or 2"); + + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node + using NodeT = typename NodeTrait::type; + using CoordT = typename RootT::CoordType; + using ValueT = typename RootT::ValueType; + + using FloatType = typename RootT::FloatType; + using CoordValueType = typename RootT::CoordT::ValueType; + + // All member data are mutable to allow for access methods to be const + mutable CoordT mKey; // 3*4 = 12 bytes + mutable const RootT* mRoot; // 8 bytes + mutable const NodeT* mNode; // 8 bytes + +public: + using BuildType = BuildT; + using ValueType = ValueT; + using CoordType = CoordT; + + static const int CacheLevels = 1; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + using NodeInfo = typename ReadAccessor::NodeInfo; +#endif + /// @brief Constructor from a root node + __hostdev__ ReadAccessor(const RootT& root) + : mKey(CoordType::max()) + , mRoot(&root) + , mNode(nullptr) + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { + mKey = CoordType::max(); + mNode = nullptr; + } + + __hostdev__ const RootT& root() const { return *mRoot; } + + /// @brief Defaults constructors + ReadAccessor(const ReadAccessor&) = default; + ~ReadAccessor() = default; + ReadAccessor& operator=(const ReadAccessor&) = default; + + __hostdev__ bool isCached(const CoordType& ijk) const + { + return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2]; + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + if (this->isCached(ijk)) + return mNode->getValueAndCache(ijk, *this); + return mRoot->getValueAndCache(ijk, *this); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + + __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const + { + if (this->isCached(ijk)) + return mNode->getNodeInfoAndCache(ijk, *this); + return mRoot->getNodeInfoAndCache(ijk, *this); + } + + __hostdev__ bool isActive(const CoordType& ijk) const + { + if (this->isCached(ijk)) + return mNode->isActiveAndCache(ijk, *this); + return mRoot->isActiveAndCache(ijk, *this); + } + + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { + if (this->isCached(ijk)) + return mNode->probeValueAndCache(ijk, v, *this); + return mRoot->probeValueAndCache(ijk, v, *this); + } + + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const + { + if (this->isCached(ijk)) + return mNode->probeLeafAndCache(ijk, *this); + return mRoot->probeLeafAndCache(ijk, *this); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + template + __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const + { + if (this->isCached(ijk)) + return mNode->getDimAndCache(ijk, ray, *this); + return mRoot->getDimAndCache(ijk, ray, *this); + } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return mNode->template getAndCache(ijk, *this, args...); + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return const_cast(mNode)->template setAndCache(ijk, *this, args...); + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + +private: + /// @brief Allow nodes to insert themselves into the cache. + template + friend class RootNode; + template + friend class InternalNode; + template class, uint32_t> + friend class LeafNode; + + /// @brief Inserts a leaf node and key pair into this ReadAccessor + __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const + { + mKey = ijk & ~NodeT::MASK; + mNode = node; + } + + // no-op + template + __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {} + +}; // ReadAccessor + +template +class ReadAccessor //e.g. (0,1), (1,2), (0,2) +{ + static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 must be 0, 1, 2"); + static_assert(LEVEL1 >= 0 && LEVEL1 <= 2, "LEVEL1 must be 0, 1, 2"); + static_assert(LEVEL0 < LEVEL1, "Level 0 must be lower than level 1"); + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; + using LeafT = NanoLeaf; + using Node1T = typename NodeTrait::type; + using Node2T = typename NodeTrait::type; + using CoordT = typename RootT::CoordType; + using ValueT = typename RootT::ValueType; + using FloatType = typename RootT::FloatType; + using CoordValueType = typename RootT::CoordT::ValueType; + + // All member data are mutable to allow for access methods to be const +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total + mutable CoordT mKey; // 3*4 = 12 bytes +#else // 68 bytes total + mutable CoordT mKeys[2]; // 2*3*4 = 24 bytes +#endif + mutable const RootT* mRoot; + mutable const Node1T* mNode1; + mutable const Node2T* mNode2; + +public: + using BuildType = BuildT; + using ValueType = ValueT; + using CoordType = CoordT; + + static const int CacheLevels = 2; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + using NodeInfo = typename ReadAccessor::NodeInfo; +#endif + /// @brief Constructor from a root node + __hostdev__ ReadAccessor(const RootT& root) +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + : mKey(CoordType::max()) +#else + : mKeys{CoordType::max(), CoordType::max()} +#endif + , mRoot(&root) + , mNode1(nullptr) + , mNode2(nullptr) + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = CoordType::max(); +#else + mKeys[0] = mKeys[1] = CoordType::max(); +#endif + mNode1 = nullptr; + mNode2 = nullptr; + } + + __hostdev__ const RootT& root() const { return *mRoot; } + + /// @brief Defaults constructors + ReadAccessor(const ReadAccessor&) = default; + ~ReadAccessor() = default; + ReadAccessor& operator=(const ReadAccessor&) = default; + +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + __hostdev__ bool isCached1(CoordValueType dirty) const + { + if (!mNode1) + return false; + if (dirty & int32_t(~Node1T::MASK)) { + mNode1 = nullptr; + return false; + } + return true; + } + __hostdev__ bool isCached2(CoordValueType dirty) const + { + if (!mNode2) + return false; + if (dirty & int32_t(~Node2T::MASK)) { + mNode2 = nullptr; + return false; + } + return true; + } + __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const + { + return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]); + } +#else + __hostdev__ bool isCached1(const CoordType& ijk) const + { + return (ijk[0] & int32_t(~Node1T::MASK)) == mKeys[0][0] && + (ijk[1] & int32_t(~Node1T::MASK)) == mKeys[0][1] && + (ijk[2] & int32_t(~Node1T::MASK)) == mKeys[0][2]; + } + __hostdev__ bool isCached2(const CoordType& ijk) const + { + return (ijk[0] & int32_t(~Node2T::MASK)) == mKeys[1][0] && + (ijk[1] & int32_t(~Node2T::MASK)) == mKeys[1][1] && + (ijk[2] & int32_t(~Node2T::MASK)) == mKeys[1][2]; + } +#endif + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ ValueType getValue(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->getValueAndCache(ijk, *this); + } else if (this->isCached2(dirty)) { + return mNode2->getValueAndCache(ijk, *this); + } + return mRoot->getValueAndCache(ijk, *this); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->getNodeInfoAndCache(ijk, *this); + } else if (this->isCached2(dirty)) { + return mNode2->getNodeInfoAndCache(ijk, *this); + } + return mRoot->getNodeInfoAndCache(ijk, *this); + } + + __hostdev__ bool isActive(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->isActiveAndCache(ijk, *this); + } else if (this->isCached2(dirty)) { + return mNode2->isActiveAndCache(ijk, *this); + } + return mRoot->isActiveAndCache(ijk, *this); + } + + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->probeValueAndCache(ijk, v, *this); + } else if (this->isCached2(dirty)) { + return mNode2->probeValueAndCache(ijk, v, *this); + } + return mRoot->probeValueAndCache(ijk, v, *this); + } + + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->probeLeafAndCache(ijk, *this); + } else if (this->isCached2(dirty)) { + return mNode2->probeLeafAndCache(ijk, *this); + } + return mRoot->probeLeafAndCache(ijk, *this); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->getDimAndCache(ijk, ray, *this); + } else if (this->isCached2(dirty)) { + return mNode2->getDimAndCache(ijk, ray, *this); + } + return mRoot->getDimAndCache(ijk, ray, *this); + } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->template getAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return mNode2->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return const_cast(mNode1)->template setAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return const_cast(mNode2)->template setAndCache(ijk, *this, args...); + } + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + +private: + /// @brief Allow nodes to insert themselves into the cache. + template + friend class RootNode; + template + friend class InternalNode; + template class, uint32_t> + friend class LeafNode; + + /// @brief Inserts a leaf node and key pair into this ReadAccessor + __hostdev__ void insert(const CoordType& ijk, const Node1T* node) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = ijk; +#else + mKeys[0] = ijk & ~Node1T::MASK; +#endif + mNode1 = node; + } + __hostdev__ void insert(const CoordType& ijk, const Node2T* node) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = ijk; +#else + mKeys[1] = ijk & ~Node2T::MASK; +#endif + mNode2 = node; + } + template + __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {} +}; // ReadAccessor + +/// @brief Node caching at all (three) tree levels +template +class ReadAccessor +{ + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node + using NodeT2 = NanoUpper; // upper internal node + using NodeT1 = NanoLower; // lower internal node + using LeafT = NanoLeaf; // Leaf node + using CoordT = typename RootT::CoordType; + using ValueT = typename RootT::ValueType; + + using FloatType = typename RootT::FloatType; + using CoordValueType = typename RootT::CoordT::ValueType; + + // All member data are mutable to allow for access methods to be const +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total + mutable CoordT mKey; // 3*4 = 12 bytes +#else // 68 bytes total + mutable CoordT mKeys[3]; // 3*3*4 = 36 bytes +#endif + mutable const RootT* mRoot; + mutable const void* mNode[3]; // 4*8 = 32 bytes + +public: + using BuildType = BuildT; + using ValueType = ValueT; + using CoordType = CoordT; + + static const int CacheLevels = 3; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + using NodeInfo = typename ReadAccessor::NodeInfo; +#endif + /// @brief Constructor from a root node + __hostdev__ ReadAccessor(const RootT& root) +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + : mKey(CoordType::max()) +#else + : mKeys{CoordType::max(), CoordType::max(), CoordType::max()} +#endif + , mRoot(&root) + , mNode{nullptr, nullptr, nullptr} + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + __hostdev__ const RootT& root() const { return *mRoot; } + + /// @brief Defaults constructors + ReadAccessor(const ReadAccessor&) = default; + ~ReadAccessor() = default; + ReadAccessor& operator=(const ReadAccessor&) = default; + + /// @brief Return a const point to the cached node of the specified type + /// + /// @warning The return value could be NULL. + template + __hostdev__ const NodeT* getNode() const + { + using T = typename NodeTrait::type; + static_assert(util::is_same::value, "ReadAccessor::getNode: Invalid node type"); + return reinterpret_cast(mNode[NodeT::LEVEL]); + } + + template + __hostdev__ const typename NodeTrait::type* getNode() const + { + using T = typename NodeTrait::type; + static_assert(LEVEL >= 0 && LEVEL <= 2, "ReadAccessor::getNode: Invalid node type"); + return reinterpret_cast(mNode[LEVEL]); + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = CoordType::max(); +#else + mKeys[0] = mKeys[1] = mKeys[2] = CoordType::max(); +#endif + mNode[0] = mNode[1] = mNode[2] = nullptr; + } + +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + template + __hostdev__ bool isCached(CoordValueType dirty) const + { + if (!mNode[NodeT::LEVEL]) + return false; + if (dirty & int32_t(~NodeT::MASK)) { + mNode[NodeT::LEVEL] = nullptr; + return false; + } + return true; + } + + __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const + { + return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]); + } +#else + template + __hostdev__ bool isCached(const CoordType& ijk) const + { + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; + } +#endif + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ ValueType getValue(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->getValue(ijk); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->getValueAndCache(ijk, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->getValueAndCache(ijk, *this); + } + return mRoot->getValueAndCache(ijk, *this); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + + __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->getNodeInfoAndCache(ijk, *this); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->getNodeInfoAndCache(ijk, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->getNodeInfoAndCache(ijk, *this); + } + return mRoot->getNodeInfoAndCache(ijk, *this); + } + + __hostdev__ bool isActive(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->isActive(ijk); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->isActiveAndCache(ijk, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->isActiveAndCache(ijk, *this); + } + return mRoot->isActiveAndCache(ijk, *this); + } + + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->probeValue(ijk, v); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->probeValueAndCache(ijk, v, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->probeValueAndCache(ijk, v, *this); + } + return mRoot->probeValueAndCache(ijk, v, *this); + } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0]); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->probeLeafAndCache(ijk, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->probeLeafAndCache(ijk, *this); + } + return mRoot->probeLeafAndCache(ijk, *this); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((const LeafT*)mNode[0])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return ((RootT*)mRoot)->template setAndCache(ijk, *this, args...); + } + + template + __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this); + } + return mRoot->getDimAndCache(ijk, ray, *this); + } + +private: + /// @brief Allow nodes to insert themselves into the cache. + template + friend class RootNode; + template + friend class InternalNode; + template class, uint32_t> + friend class LeafNode; + + /// @brief Inserts a leaf node and key pair into this ReadAccessor + template + __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = ijk; +#else + mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; +#endif + mNode[NodeT::LEVEL] = node; + } +}; // ReadAccessor + +////////////////////////////////////////////////// + +/// @brief Free-standing function for convenient creation of a ReadAccessor with +/// optional and customizable node caching. +/// +/// @details createAccessor<>(grid): No caching of nodes and hence it's thread-safe but slow +/// createAccessor<0>(grid): Caching of leaf nodes only +/// createAccessor<1>(grid): Caching of lower internal nodes only +/// createAccessor<2>(grid): Caching of upper internal nodes only +/// createAccessor<0,1>(grid): Caching of leaf and lower internal nodes +/// createAccessor<0,2>(grid): Caching of leaf and upper internal nodes +/// createAccessor<1,2>(grid): Caching of lower and upper internal nodes +/// createAccessor<0,1,2>(grid): Caching of all nodes at all tree levels + +template +ReadAccessor createAccessor(const NanoGrid& grid) +{ + return ReadAccessor(grid); +} + +template +ReadAccessor createAccessor(const NanoTree& tree) +{ + return ReadAccessor(tree); +} + +template +ReadAccessor createAccessor(const NanoRoot& root) +{ + return ReadAccessor(root); +} + +////////////////////////////////////////////////// + +/// @brief This is a convenient class that allows for access to grid meta-data +/// that are independent of the value type of a grid. That is, this class +/// can be used to get information about a grid without actually knowing +/// its ValueType. +class GridMetaData +{ // 768 bytes (32 byte aligned) + GridData mGridData; // 672B + TreeData mTreeData; // 64B + CoordBBox mIndexBBox; // 24B. AABB of active values in index space. + uint32_t mRootTableSize, mPadding{0}; // 8B + +public: + template + GridMetaData(const NanoGrid& grid) + { + mGridData = *grid.data(); + mTreeData = *grid.tree().data(); + mIndexBBox = grid.indexBBox(); + mRootTableSize = grid.tree().root().getTableSize(); + } + GridMetaData(const GridData* gridData) + { + if (GridMetaData::safeCast(gridData)) { + *this = *reinterpret_cast(gridData); + //util::memcpy(this, (const GridMetaData*)gridData); + } else {// otherwise copy each member individually + mGridData = *gridData; + mTreeData = *reinterpret_cast(gridData->treePtr()); + mIndexBBox = gridData->indexBBox(); + mRootTableSize = gridData->rootTableSize(); + } + } + GridMetaData& operator=(const GridMetaData&) = default; + /// @brief return true if the RootData follows right after the TreeData. + /// If so, this implies that it's safe to cast the grid from which + /// this instance was constructed to a GridMetaData + __hostdev__ bool safeCast() const { return mTreeData.isRootNext(); } + + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. + __hostdev__ static bool safeCast(const GridData *gridData){ + NANOVDB_ASSERT(gridData && gridData->isValid()); + return gridData->isRootConnected(); + } + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. + template + __hostdev__ static bool safeCast(const NanoGrid& grid){return grid.tree().isRootNext();} + __hostdev__ bool isValid() const { return mGridData.isValid(); } + __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } + __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } + __hostdev__ bool isLevelSet() const { return mGridData.mGridClass == GridClass::LevelSet; } + __hostdev__ bool isFogVolume() const { return mGridData.mGridClass == GridClass::FogVolume; } + __hostdev__ bool isStaggered() const { return mGridData.mGridClass == GridClass::Staggered; } + __hostdev__ bool isPointIndex() const { return mGridData.mGridClass == GridClass::PointIndex; } + __hostdev__ bool isGridIndex() const { return mGridData.mGridClass == GridClass::IndexGrid; } + __hostdev__ bool isPointData() const { return mGridData.mGridClass == GridClass::PointData; } + __hostdev__ bool isMask() const { return mGridData.mGridClass == GridClass::Topology; } + __hostdev__ bool isUnknown() const { return mGridData.mGridClass == GridClass::Unknown; } + __hostdev__ bool hasMinMax() const { return mGridData.mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return mGridData.mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return mGridData.mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } + __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } + __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } + __hostdev__ const char* shortGridName() const { return mGridData.mGridName; } + __hostdev__ const Map& map() const { return mGridData.mMap; } + __hostdev__ const Vec3dBBox& worldBBox() const { return mGridData.mWorldBBox; } + __hostdev__ const CoordBBox& indexBBox() const { return mIndexBBox; } + __hostdev__ Vec3d voxelSize() const { return mGridData.mVoxelSize; } + __hostdev__ int blindDataCount() const { return mGridData.mBlindMetadataCount; } + __hostdev__ uint64_t activeVoxelCount() const { return mTreeData.mVoxelCount; } + __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return mTreeData.mTileCount[level - 1]; } + __hostdev__ uint32_t nodeCount(uint32_t level) const { return mTreeData.mNodeCount[level]; } + __hostdev__ const Checksum& checksum() const { return mGridData.mChecksum; } + __hostdev__ uint32_t rootTableSize() const { return mRootTableSize; } + __hostdev__ bool isEmpty() const { return mRootTableSize == 0; } + __hostdev__ Version version() const { return mGridData.mVersion; } +}; // GridMetaData + +/// @brief Class to access points at a specific voxel location +/// +/// @note If GridClass::PointIndex AttT should be uint32_t and if GridClass::PointData Vec3f +template +class PointAccessor : public DefaultReadAccessor +{ + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; + +public: + PointAccessor(const NanoGrid& grid) + : AccT(grid.tree().root()) + , mGrid(grid) + , mData(grid.template getBlindData(0)) + { + NANOVDB_ASSERT(grid.gridType() == toGridType()); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return the total number of point in the grid and set the + /// iterators to the complete range of points. + __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const + { + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; + begin = mData; + end = begin + count; + return count; + } + /// @brief Return the number of points in the leaf node containing the coordinate @a ijk. + /// If this return value is larger than zero then the iterators @a begin and @a end + /// will point to all the attributes contained within that leaf node. + __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + auto* leaf = this->probeLeaf(ijk); + if (leaf == nullptr) { + return 0; + } + begin = mData + leaf->minimum(); + end = begin + leaf->maximum(); + return leaf->maximum(); + } + + /// @brief get iterators over attributes to points at a specific voxel location + __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + begin = end = nullptr; + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t offset = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(offset)) { + begin = mData + leaf->minimum(); + end = begin + leaf->getValue(offset); + if (offset > 0u) + begin += leaf->getValue(offset - 1); + } + } + return end - begin; + } +}; // PointAccessor + +template +class PointAccessor : public DefaultReadAccessor +{ + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; + +public: + PointAccessor(const NanoGrid& grid) + : AccT(grid.tree().root()) + , mGrid(grid) + , mData(grid.template getBlindData(0)) + { + NANOVDB_ASSERT(mData); + NANOVDB_ASSERT(grid.gridType() == GridType::PointIndex); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return the total number of point in the grid and set the + /// iterators to the complete range of points. + __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const + { + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; + begin = mData; + end = begin + count; + return count; + } + /// @brief Return the number of points in the leaf node containing the coordinate @a ijk. + /// If this return value is larger than zero then the iterators @a begin and @a end + /// will point to all the attributes contained within that leaf node. + __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + auto* leaf = this->probeLeaf(ijk); + if (leaf == nullptr) + return 0; + begin = mData + leaf->offset(); + end = begin + leaf->pointCount(); + return leaf->pointCount(); + } + + /// @brief get iterators over attributes to points at a specific voxel location + __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t n = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(n)) { + begin = mData + leaf->first(n); + end = mData + leaf->last(n); + return end - begin; + } + } + begin = end = nullptr; + return 0u; // no leaf or inactive voxel + } +}; // PointAccessor + +/// @brief Class to access values in channels at a specific voxel location. +/// +/// @note The ChannelT template parameter can be either const and non-const. +template +class ChannelAccessor : public DefaultReadAccessor +{ + static_assert(BuildTraits::is_index, "Expected an index build type"); + using BaseT = DefaultReadAccessor; + + const NanoGrid& mGrid; + ChannelT* mChannel; + +public: + using ValueType = ChannelT; + using TreeType = NanoTree; + using AccessorType = ChannelAccessor; + + /// @brief Ctor from an IndexGrid and an integer ID of an internal channel + /// that is assumed to exist as blind data in the IndexGrid. + __hostdev__ ChannelAccessor(const NanoGrid& grid, uint32_t channelID = 0u) + : BaseT(grid.tree().root()) + , mGrid(grid) + , mChannel(nullptr) + { + NANOVDB_ASSERT(isIndex(grid.gridType())); + NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); + this->setChannel(channelID); + } + + /// @brief Ctor from an IndexGrid and an external channel + __hostdev__ ChannelAccessor(const NanoGrid& grid, ChannelT* channelPtr) + : BaseT(grid.tree().root()) + , mGrid(grid) + , mChannel(channelPtr) + { + NANOVDB_ASSERT(isIndex(grid.gridType())); + NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mChannel != nullptr; } + + /// @brief Return a const reference to the IndexGrid + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return a const reference to the tree of the IndexGrid + __hostdev__ const TreeType& tree() const { return mGrid.tree(); } + + /// @brief Return a vector of the axial voxel sizes + __hostdev__ const Vec3d& voxelSize() const { return mGrid.voxelSize(); } + + /// @brief Return total number of values indexed by the IndexGrid + __hostdev__ const uint64_t& valueCount() const { return mGrid.valueCount(); } + + /// @brief Change to an external channel + /// @return Pointer to channel data + __hostdev__ ChannelT* setChannel(ChannelT* channelPtr) {return mChannel = channelPtr;} + + /// @brief Change to an internal channel, assuming it exists as as blind data + /// in the IndexGrid. + /// @return Pointer to channel data, which could be NULL if channelID is out of range or + /// if ChannelT does not match the value type of the blind data + __hostdev__ ChannelT* setChannel(uint32_t channelID) + { + return mChannel = const_cast(mGrid.template getBlindData(channelID)); + } + + /// @brief Return the linear offset into a channel that maps to the specified coordinate + __hostdev__ uint64_t getIndex(const math::Coord& ijk) const { return BaseT::getValue(ijk); } + __hostdev__ uint64_t idx(int i, int j, int k) const { return BaseT::getValue(math::Coord(i, j, k)); } + + /// @brief Return the value from a cached channel that maps to the specified coordinate + __hostdev__ ChannelT& getValue(const math::Coord& ijk) const { return mChannel[BaseT::getValue(ijk)]; } + __hostdev__ ChannelT& operator()(const math::Coord& ijk) const { return this->getValue(ijk); } + __hostdev__ ChannelT& operator()(int i, int j, int k) const { return this->getValue(math::Coord(i, j, k)); } + + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const math::Coord& ijk, typename util::remove_const::type& v) const + { + uint64_t idx; + const bool isActive = BaseT::probeValue(ijk, idx); + v = mChannel[idx]; + return isActive; + } + /// @brief Return the value from a specified channel that maps to the specified coordinate + /// + /// @note The template parameter can be either const or non-const + template + __hostdev__ T& getValue(const math::Coord& ijk, T* channelPtr) const { return channelPtr[BaseT::getValue(ijk)]; } + +}; // ChannelAccessor + +#if 0 +// This MiniGridHandle class is only included as a stand-alone example. Note that aligned_alloc is a C++17 feature! +// Normally we recommend using GridHandle defined in util/GridHandle.h but this minimal implementation could be an +// alternative when using the IO methods defined below. +struct MiniGridHandle { + struct BufferType { + uint8_t *data; + uint64_t size; + BufferType(uint64_t n=0) : data(std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, n)), size(n) {assert(isValid(data));} + BufferType(BufferType &&other) : data(other.data), size(other.size) {other.data=nullptr; other.size=0;} + ~BufferType() {std::free(data);} + BufferType& operator=(const BufferType &other) = delete; + BufferType& operator=(BufferType &&other){data=other.data; size=other.size; other.data=nullptr; other.size=0; return *this;} + static BufferType create(size_t n, BufferType* dummy = nullptr) {return BufferType(n);} + } buffer; + MiniGridHandle(BufferType &&buf) : buffer(std::move(buf)) {} + const uint8_t* data() const {return buffer.data;} +};// MiniGridHandle +#endif + +namespace io { + +/// @brief Define compression codecs +/// +/// @note NONE is the default, ZIP is slow but compact and BLOSC offers a great balance. +/// +/// @throw NanoVDB optionally supports ZIP and BLOSC compression and will throw an exception +/// if its support is required but missing. +enum class Codec : uint16_t { NONE = 0, + ZIP = 1, + BLOSC = 2, + End = 3, + StrLen = 6 + End }; + +__hostdev__ inline const char* toStr(char *dst, Codec codec) +{ + switch (codec){ + case Codec::NONE: return util::strcpy(dst, "NONE"); + case Codec::ZIP: return util::strcpy(dst, "ZIP"); + case Codec::BLOSC : return util::strcpy(dst, "BLOSC"); + default: return util::strcpy(dst, "END"); + } +} + +__hostdev__ inline Codec toCodec(const char *str) +{ + if (util::streq(str, "none")) return Codec::NONE; + if (util::streq(str, "zip")) return Codec::ZIP; + if (util::streq(str, "blosc")) return Codec::BLOSC; + return Codec::End; +} + +/// @brief Data encoded at the head of each segment of a file or stream. +/// +/// @note A file or stream is composed of one or more segments that each contain +// one or more grids. +struct FileHeader {// 16 bytes + uint64_t magic;// 8 bytes + Version version;// 4 bytes version numbers + uint16_t gridCount;// 2 bytes + Codec codec;// 2 bytes + bool isValid() const {return magic == NANOVDB_MAGIC_NUMB || magic == NANOVDB_MAGIC_FILE;} +}; // FileHeader ( 16 bytes = 2 words ) + +// @brief Data encoded for each of the grids associated with a segment. +// Grid size in memory (uint64_t) | +// Grid size on disk (uint64_t) | +// Grid name hash key (uint64_t) | +// Numer of active voxels (uint64_t) | +// Grid type (uint32_t) | +// Grid class (uint32_t) | +// Characters in grid name (uint32_t) | +// AABB in world space (2*3*double) | one per grid in file +// AABB in index space (2*3*int) | +// Size of a voxel in world units (3*double) | +// Byte size of the grid name (uint32_t) | +// Number of nodes per level (4*uint32_t) | +// Numer of active tiles per level (3*uint32_t) | +// Codec for file compression (uint16_t) | +// Padding due to 8B alignment (uint16_t) | +// Version number (uint32_t) | +struct FileMetaData +{// 176 bytes + uint64_t gridSize, fileSize, nameKey, voxelCount; // 4 * 8 = 32B. + GridType gridType; // 4B. + GridClass gridClass; // 4B. + Vec3dBBox worldBBox; // 2 * 3 * 8 = 48B. + CoordBBox indexBBox; // 2 * 3 * 4 = 24B. + Vec3d voxelSize; // 24B. + uint32_t nameSize; // 4B. + uint32_t nodeCount[4]; //4 x 4 = 16B + uint32_t tileCount[3];// 3 x 4 = 12B + Codec codec; // 2B + uint16_t padding;// 2B, due to 8B alignment from uint64_t + Version version;// 4B +}; // FileMetaData + +// the following code block uses std and therefore needs to be ignored by CUDA and HIP +#if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +// Note that starting with version 32.6.0 it is possible to write and read raw grid buffers to +// files, e.g. os.write((const char*)&buffer.data(), buffer.size()) or more conveniently as +// handle.write(fileName). In addition to this simple approach we offer the methods below to +// write traditional uncompressed nanovdb files that unlike raw files include metadata that +// is used for tools like nanovdb_print. + +/// +/// @brief This is a standalone alternative to io::writeGrid(...,Codec::NONE) defined in util/IO.h +/// Unlike the latter this function has no dependencies at all, not even NanoVDB.h, so it also +/// works if client code only includes PNanoVDB.h! +/// +/// @details Writes a raw NanoVDB buffer, possibly with multiple grids, to a stream WITHOUT compression. +/// It follows all the conventions in util/IO.h so the stream can be read by all existing client +/// code of NanoVDB. +/// +/// @note This method will always write uncompressed grids to the stream, i.e. Blosc or ZIP compression +/// is never applied! This is a fundamental limitation and feature of this standalone function. +/// +/// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid. +/// +/// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :) +template // StreamT class must support: "void write(const char*, size_t)" +void writeUncompressedGrid(StreamT& os, const GridData* gridData, bool raw = false) +{ + NANOVDB_ASSERT(gridData->mMagic == NANOVDB_MAGIC_NUMB || gridData->mMagic == NANOVDB_MAGIC_GRID); + NANOVDB_ASSERT(gridData->mVersion.isCompatible()); + if (!raw) {// segment with a single grid: FileHeader, FileMetaData, gridName, Grid +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + FileHeader head{NANOVDB_MAGIC_FILE, gridData->mVersion, 1u, Codec::NONE}; +#else + FileHeader head{NANOVDB_MAGIC_NUMB, gridData->mVersion, 1u, Codec::NONE}; +#endif + const char* gridName = gridData->gridName(); + const uint32_t nameSize = util::strlen(gridName) + 1;// include '\0' + const TreeData* treeData = (const TreeData*)(gridData->treePtr()); + FileMetaData meta{gridData->mGridSize, gridData->mGridSize, 0u, treeData->mVoxelCount, + gridData->mGridType, gridData->mGridClass, gridData->mWorldBBox, + treeData->bbox(), gridData->mVoxelSize, nameSize, + {treeData->mNodeCount[0], treeData->mNodeCount[1], treeData->mNodeCount[2], 1u}, + {treeData->mTileCount[0], treeData->mTileCount[1], treeData->mTileCount[2]}, + Codec::NONE, 0u, gridData->mVersion }; // FileMetaData + os.write((const char*)&head, sizeof(FileHeader)); // write header + os.write((const char*)&meta, sizeof(FileMetaData)); // write meta data + os.write(gridName, nameSize); // write grid name + } + os.write((const char*)gridData, gridData->mGridSize);// write the grid +}// writeUncompressedGrid + +/// @brief write multiple NanoVDB grids to a single file, without compression. +/// @note To write all grids in a single GridHandle simply use handle.write("fieNane") +template class VecT> +void writeUncompressedGrids(const char* fileName, const VecT& handles, bool raw = false) +{ +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ofstream or FILE implementations + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); +#else + struct StreamT { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "wb"); } + ~StreamT() { fclose(fptr); } + void write(const char* data, size_t n) { fwrite(data, 1, n, fptr); } + bool is_open() const { return fptr != NULL; } + } os(fileName); +#endif + if (!os.is_open()) { + fprintf(stderr, "nanovdb::writeUncompressedGrids: Unable to open file \"%s\"for output\n", fileName); + exit(EXIT_FAILURE); + } + for (auto& h : handles) { + for (uint32_t n=0; n class VecT> +VecT readUncompressedGrids(StreamT& is, const typename GridHandleT::BufferType& pool = typename GridHandleT::BufferType()) +{ + VecT handles; + GridData data; + is.read((char*)&data, sizeof(GridData)); + if (data.isValid()) {// stream contains a raw grid buffer + uint64_t size = data.mGridSize, sum = 0u; + while(data.mGridIndex + 1u < data.mGridCount) { + is.skip(data.mGridSize - sizeof(GridData));// skip grid + is.read((char*)&data, sizeof(GridData));// read sizeof(GridData) bytes + sum += data.mGridSize; + } + is.skip(-int64_t(sum + sizeof(GridData)));// rewind to start + auto buffer = GridHandleT::BufferType::create(size + sum, &pool); + is.read((char*)(buffer.data()), buffer.size()); + handles.emplace_back(std::move(buffer)); + } else {// Header0, MetaData0, gridName0, Grid0...HeaderN, MetaDataN, gridNameN, GridN + is.skip(-sizeof(GridData));// rewind + FileHeader head; + while(is.read((char*)&head, sizeof(FileHeader))) { + if (!head.isValid()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number = \"%s\"\n", (const char*)&(head.magic)); + exit(EXIT_FAILURE); + } else if (!head.version.isCompatible()) { + char str[20]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version = \"%s\"\n", toStr(str, head.version)); + exit(EXIT_FAILURE); + } else if (head.codec != Codec::NONE) { + char str[8]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec = \"%s\"\n", toStr(str, head.codec)); + exit(EXIT_FAILURE); + } + FileMetaData meta; + for (uint16_t i = 0; i < head.gridCount; ++i) { // read all grids in segment + is.read((char*)&meta, sizeof(FileMetaData));// read meta data + is.skip(meta.nameSize); // skip grid name + auto buffer = GridHandleT::BufferType::create(meta.gridSize, &pool); + is.read((char*)buffer.data(), meta.gridSize);// read grid + handles.emplace_back(std::move(buffer)); + }// loop over grids in segment + }// loop over segments + } + return handles; +} // readUncompressedGrids + +/// @brief Read a multiple un-compressed NanoVDB grids from a file and return them as a vector. +template class VecT> +VecT readUncompressedGrids(const char* fileName, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) +{ +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ifstream or FILE implementations + struct StreamT : public std::ifstream { + StreamT(const char* name) : std::ifstream(name, std::ios::in | std::ios::binary){} + void skip(int64_t off) { this->seekg(off, std::ios_base::cur); } + }; +#else + struct StreamT { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "rb"); } + ~StreamT() { fclose(fptr); } + bool read(char* data, size_t n) { + size_t m = fread(data, 1, n, fptr); + return n == m; + } + void skip(int64_t off) { fseek(fptr, (long int)off, SEEK_CUR); } + bool is_open() const { return fptr != NULL; } + }; +#endif + StreamT is(fileName); + if (!is.is_open()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: Unable to open file \"%s\"for input\n", fileName); + exit(EXIT_FAILURE); + } + return readUncompressedGrids(is, buffer); +} // readUncompressedGrids + +#endif // if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +} // namespace io + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +/// @brief Implements Tree::getValue(math::Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue +{ + __hostdev__ static auto get(const NanoRoot& root) { return root.mBackground; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.value; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.getValue(n); } // works with all build types +}; // GetValue + +template +struct SetValue +{ + static_assert(!BuildTraits::is_special, "SetValue does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile& tile, const ValueT& v) { tile.value = v; } + __hostdev__ static auto set(NanoUpper& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLower& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetValue + +template +struct SetVoxel +{ + static_assert(!BuildTraits::is_special, "SetVoxel does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile&, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoUpper&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLower&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetVoxel + +/// @brief Implements Tree::isActive(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetState +{ + __hostdev__ static auto get(const NanoRoot&) { return false; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.state > 0; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } +}; // GetState + +/// @brief Implements Tree::getDim(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetDim +{ + __hostdev__ static uint32_t get(const NanoRoot&) { return 0u; } // background + __hostdev__ static uint32_t get(const typename NanoRoot::Tile&) { return 4096u; } + __hostdev__ static uint32_t get(const NanoUpper&, uint32_t) { return 128u; } + __hostdev__ static uint32_t get(const NanoLower&, uint32_t) { return 8u; } + __hostdev__ static uint32_t get(const NanoLeaf&, uint32_t) { return 1u; } +}; // GetDim + +/// @brief Return the pointer to the leaf node that contains math::Coord. Implements Tree::probeLeaf(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetLeaf +{ + __hostdev__ static const NanoLeaf* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLower&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLeaf& leaf, uint32_t) { return &leaf; } +}; // GetLeaf + +/// @brief Return point to the lower internal node where math::Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetLower +{ + __hostdev__ static const NanoLower* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLower* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoLower& node, uint32_t) { return &node; } + __hostdev__ static const NanoLower* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetLower + +/// @brief Return point to the upper internal node where math::Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetUpper +{ + __hostdev__ static const NanoUpper* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoUpper& node, uint32_t) { return &node; } + __hostdev__ static const NanoUpper* get(const NanoLower& node, uint32_t) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetUpper + +/// @brief Implements Tree::probeLeaf(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct ProbeValue +{ + using ValueT = typename BuildToValueMap::Type; + __hostdev__ static bool get(const NanoRoot& root, ValueT& v) + { + v = root.mBackground; + return false; + } + __hostdev__ static bool get(const typename NanoRoot::Tile& tile, ValueT& v) + { + v = tile.value; + return tile.state > 0u; + } + __hostdev__ static bool get(const NanoUpper& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLower& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLeaf& leaf, uint32_t n, ValueT& v) + { + v = leaf.getValue(n); + return leaf.mValueMask.isOn(n); + } +}; // ProbeValue + +/// @brief Implements Tree::getNodeInfo(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetNodeInfo +{ + using ValueType = typename NanoLeaf::ValueType; + using FloatType = typename NanoLeaf::FloatType; + struct NodeInfo + { + uint32_t level, dim; + ValueType minimum, maximum; + FloatType average, stdDevi; + CoordBBox bbox; + }; + __hostdev__ static NodeInfo get(const NanoRoot& root) + { + return NodeInfo{3u, NanoUpper::DIM, root.minimum(), root.maximum(), root.average(), root.stdDeviation(), root.bbox()}; + } + __hostdev__ static NodeInfo get(const typename NanoRoot::Tile& tile) + { + return NodeInfo{3u, NanoUpper::DIM, tile.value, tile.value, static_cast(tile.value), 0, CoordBBox::createCube(tile.origin(), NanoUpper::DIM)}; + } + __hostdev__ static NodeInfo get(const NanoUpper& node, uint32_t n) + { + return NodeInfo{2u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLower& node, uint32_t n) + { + return NodeInfo{1u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLeaf& leaf, uint32_t n) + { + return NodeInfo{0u, leaf.dim(), leaf.minimum(), leaf.maximum(), leaf.average(), leaf.stdDeviation(), leaf.bbox()}; + } +}; // GetNodeInfo + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/NodeManager.h b/external/nanovdb/NodeManager.h new file mode 100644 index 00000000..0d7686eb --- /dev/null +++ b/external/nanovdb/NodeManager.h @@ -0,0 +1,327 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/NodeManager.h + + \author Ken Museth + + \date February 12, 2021 + + \brief This class allows for sequential access to nodes + in a NanoVDB tree on both the host and device. + + \details The ordering of the sequential access to nodes is always breadth-first! +*/ + +#include // for NanoGrid etc +#include // for HostBuffer + +#ifndef NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED +#define NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED + +namespace nanovdb { + +/// @brief NodeManager allows for sequential access to nodes +template +class NodeManager; + +/// @brief NodeManagerHandle manages the memory of a NodeManager +template +class NodeManagerHandle; + +/// @brief brief Construct a NodeManager and return its handle +/// +/// @param grid grid whose nodes will be accessed sequentially +/// @param buffer buffer from which to allocate the output handle +/// +/// @note This is the only way to create a NodeManager since it's using +/// managed memory pointed to by a NodeManagerHandle. +template +NodeManagerHandle createNodeManager(const NanoGrid &grid, + const BufferT& buffer = BufferT()); + +struct NodeManagerData +{// 48B = 6*8B + uint64_t mMagic;// 8B + union {int64_t mPadding; uint8_t mLinear;};// 8B of which 1B is used for a binary flag + void *mGrid;// 8B pointer to either host or device grid + union {int64_t *mPtr[3], mOff[3];};// 24B, use mOff if mLinear!=0 +}; + +/// @brief This class serves to manage a raw memory buffer of a NanoVDB NodeManager or LeafManager. +template +class NodeManagerHandle +{ + GridType mGridType{GridType::Unknown}; + BufferT mBuffer; + + template + const NodeManager* getMgr() const { + return mGridType == toGridType() ? (const NodeManager*)mBuffer.data() : nullptr; + } + + template + typename util::enable_if::hasDeviceDual, const NodeManager*>::type + getDeviceMgr() const { + return mGridType == toGridType() ? (const NodeManager*)mBuffer.deviceData() : nullptr; + } + + template + static T* no_const(const T* ptr) { return const_cast(ptr); } + +public: + /// @brief Move constructor from a buffer + NodeManagerHandle(GridType gridType, BufferT&& buffer) : mGridType(gridType) { mBuffer = std::move(buffer); } + /// @brief Empty ctor + NodeManagerHandle() = default; + /// @brief Disallow copy-construction + NodeManagerHandle(const NodeManagerHandle&) = delete; + /// @brief Disallow copy assignment operation + NodeManagerHandle& operator=(const NodeManagerHandle&) = delete; + /// @brief Move copy assignment operation + NodeManagerHandle& operator=(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; + mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; + return *this; + } + /// @brief Move copy-constructor + NodeManagerHandle(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; + mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; + } + /// @brief Default destructor + ~NodeManagerHandle() { this->reset(); } + /// @brief clear the buffer + void reset() { mBuffer.clear(); } + + /// @brief Return a reference to the buffer + BufferT& buffer() { return mBuffer; } + + /// @brief Return a const reference to the buffer + const BufferT& buffer() const { return mBuffer; } + + /// @brief Returns a non-const pointer to the data. + /// + /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized + void* data() { return mBuffer.data(); } + + /// @brief Returns a const pointer to the data. + /// + /// @warning Note that the return pointer can be NULL if the NodeManagerHandle was not initialized + const void* data() const { return mBuffer.data(); } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this NodeManagerHandle's allocator. + uint64_t size() const { return mBuffer.size(); } + + /// @brief Returns a const pointer to the NodeManager encoded in this NodeManagerHandle. + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + const NodeManager* mgr() const { return this->template getMgr(); } + + /// @brief Returns a pointer to the NodeManager encoded in this NodeManagerHandle. + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + NodeManager* mgr() { return no_const(this->template getMgr()); } + + /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + typename util::enable_if::hasDeviceDual, const NodeManager*>::type + deviceMgr() const { return this->template getDeviceMgr(); } + + /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU + /// + /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! + template + typename util::enable_if::hasDeviceDual, NodeManager*>::type + deviceMgr() { return no_const(this->template getDeviceMgr()); } + + /// @brief Upload the NodeManager to the device, e.g. from CPU to GPU + /// + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceUpload(void* deviceGrid, void* stream = nullptr, bool sync = true) + { + assert(deviceGrid); + auto *data = reinterpret_cast(mBuffer.data()); + void *tmp = data->mGrid; + data->mGrid = deviceGrid; + mBuffer.deviceUpload(stream, sync); + data->mGrid = tmp; + } + + /// @brief Download the NodeManager to from the device, e.g. from GPU to CPU + /// + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceDownload(void* stream = nullptr, bool sync = true) + { + auto *data = reinterpret_cast(mBuffer.data()); + void *tmp = data->mGrid; + mBuffer.deviceDownload(stream, sync); + data->mGrid = tmp; + } +};// NodeManagerHandle + +/// @brief This class allows for sequential access to nodes in a NanoVDB tree +/// +/// @details Nodes are always arranged breadth first during sequential access of nodes +/// at a particular level. +template +class NodeManager : private NodeManagerData +{ + using DataT = NodeManagerData; + using GridT = NanoGrid; + using TreeT = typename GridTree::type; + template + using NodeT = typename NodeTrait::type; + using RootT = NodeT<3>;// root node + using Node2 = NodeT<2>;// upper internal node + using Node1 = NodeT<1>;// lower internal node + using Node0 = NodeT<0>;// leaf node + +public: + static constexpr bool FIXED_SIZE = Node0::FIXED_SIZE && Node1::FIXED_SIZE && Node2::FIXED_SIZE; + + NodeManager(const NodeManager&) = delete; + NodeManager(NodeManager&&) = delete; + NodeManager& operator=(const NodeManager&) = delete; + NodeManager& operator=(NodeManager&&) = delete; + ~NodeManager() = delete; + + /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. + /// This allows for direct and memory-efficient linear access to nodes. + __hostdev__ static bool isLinear(const GridT &grid) {return FIXED_SIZE && grid.isBreadthFirst();} + + /// @brief return true if the nodes have both fixed size and are arranged breadth-first in memory. + /// This allows for direct and memory-efficient linear access to nodes. + __hostdev__ bool isLinear() const {return DataT::mLinear!=0u;} + + /// @brief Return the memory footprint in bytes of the NodeManager derived from the specified grid + __hostdev__ static uint64_t memUsage(const GridT &grid) { + uint64_t size = sizeof(NodeManagerData); + if (!NodeManager::isLinear(grid)) { + const uint32_t *p = grid.tree().mNodeCount; + size += sizeof(int64_t)*(p[0]+p[1]+p[2]); + } + return size; + } + + /// @brief Return the memory footprint in bytes of this instance + __hostdev__ uint64_t memUsage() const {return NodeManager::memUsage(this->grid());} + + /// @brief Return a reference to the grid + __hostdev__ GridT& grid() { return *reinterpret_cast(DataT::mGrid); } + __hostdev__ const GridT& grid() const { return *reinterpret_cast(DataT::mGrid); } + + /// @brief Return a reference to the tree + __hostdev__ TreeT& tree() { return this->grid().tree(); } + __hostdev__ const TreeT& tree() const { return this->grid().tree(); } + + /// @brief Return a reference to the root + __hostdev__ RootT& root() { return this->tree().root(); } + __hostdev__ const RootT& root() const { return this->tree().root(); } + + /// @brief Return the number of tree nodes at the specified level + /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level + __hostdev__ uint64_t nodeCount(int level) const { return this->tree().nodeCount(level); } + + __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } + __hostdev__ uint64_t lowerCount() const { return this->tree().nodeCount(1); } + __hostdev__ uint64_t upperCount() const { return this->tree().nodeCount(2); } + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + template + __hostdev__ const NodeT& node(uint32_t i) const { + NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); + const NodeT* ptr = nullptr; + if (DataT::mLinear) { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; + } else { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); + } + NANOVDB_ASSERT(ptr && isAligned(ptr)); + return *ptr; + } + + /// @brief Return the i'th node with respect to breadth-first ordering + template + __hostdev__ NodeT& node(uint32_t i) { + NANOVDB_ASSERT(i < this->nodeCount(LEVEL)); + NodeT* ptr = nullptr; + if (DataT::mLinear) { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mOff[LEVEL]) + i; + } else { + ptr = util::PtrAdd>(DataT::mGrid, DataT::mPtr[LEVEL][i]); + } + NANOVDB_ASSERT(ptr && isAligned(ptr)); + return *ptr; + } + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + __hostdev__ const Node0& leaf(uint32_t i) const { return this->node<0>(i); } + __hostdev__ Node0& leaf(uint32_t i) { return this->node<0>(i); } + + /// @brief Return the i'th lower internal node with respect to breadth-first ordering + __hostdev__ const Node1& lower(uint32_t i) const { return this->node<1>(i); } + __hostdev__ Node1& lower(uint32_t i) { return this->node<1>(i); } + + /// @brief Return the i'th upper internal node with respect to breadth-first ordering + __hostdev__ const Node2& upper(uint32_t i) const { return this->node<2>(i); } + __hostdev__ Node2& upper(uint32_t i) { return this->node<2>(i); } + +}; // NodeManager class + +template +NodeManagerHandle createNodeManager(const NanoGrid &grid, + const BufferT& buffer) +{ + NodeManagerHandle handle(toGridType(), BufferT::create(NodeManager::memUsage(grid), &buffer)); + auto *data = reinterpret_cast(handle.data()); + NANOVDB_ASSERT(data && isAligned(data)); + NANOVDB_ASSERT(toGridType() == grid.gridType()); +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *data = NodeManagerData{NANOVDB_MAGIC_NODE, {0u}, (void*)&grid, {{0u,0u,0u}}}; +#else + *data = NodeManagerData{NANOVDB_MAGIC_NUMB, {0u}, (void*)&grid, {{0u,0u,0u}}}; +#endif + + if (NodeManager::isLinear(grid)) { + data->mLinear = uint8_t(1u); + data->mOff[0] = util::PtrDiff(grid.tree().template getFirstNode<0>(), &grid); + data->mOff[1] = util::PtrDiff(grid.tree().template getFirstNode<1>(), &grid); + data->mOff[2] = util::PtrDiff(grid.tree().template getFirstNode<2>(), &grid); + } else { + int64_t *ptr0 = data->mPtr[0] = reinterpret_cast(data + 1); + int64_t *ptr1 = data->mPtr[1] = data->mPtr[0] + grid.tree().nodeCount(0); + int64_t *ptr2 = data->mPtr[2] = data->mPtr[1] + grid.tree().nodeCount(1); + // Performs depth first traversal but breadth first insertion + for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { + *ptr2++ = util::PtrDiff(&*it2, &grid); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + *ptr1++ = util::PtrDiff(&*it1, &grid); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + *ptr0++ = util::PtrDiff(&*it0, &grid); + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + } + + return handle;// // is converted to r-value so return value is move constructed! +} + +} // namespace nanovdb + +#if defined(__CUDACC__) +#include +#endif// defined(__CUDACC__) + +#endif // NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/PNanoVDB.h b/external/nanovdb/PNanoVDB.h new file mode 100644 index 00000000..3e7b306b --- /dev/null +++ b/external/nanovdb/PNanoVDB.h @@ -0,0 +1,3390 @@ + +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/PNanoVDB.h + + \author Andrew Reidmeyer + + \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port + of NanoVDB.h, which is compatible with most graphics APIs. +*/ + +#ifndef NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED + +// ------------------------------------------------ Configuration ----------------------------------------------------------- + +// platforms +//#define PNANOVDB_C +//#define PNANOVDB_HLSL +//#define PNANOVDB_GLSL + +// addressing mode +// PNANOVDB_ADDRESS_32 +// PNANOVDB_ADDRESS_64 +#if defined(PNANOVDB_C) +#ifndef PNANOVDB_ADDRESS_32 +#define PNANOVDB_ADDRESS_64 +#endif +#elif defined(PNANOVDB_HLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#elif defined(PNANOVDB_GLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#endif + +// bounds checking +//#define PNANOVDB_BUF_BOUNDS_CHECK + +// enable HDDA by default on HLSL/GLSL, make explicit on C +#if defined(PNANOVDB_C) +//#define PNANOVDB_HDDA +#ifdef PNANOVDB_HDDA +#ifndef PNANOVDB_CMATH +#define PNANOVDB_CMATH +#endif +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_HDDA +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_HDDA +#endif + +#ifdef PNANOVDB_CMATH +#ifndef __CUDACC_RTC__ +#include +#endif +#endif + +// ------------------------------------------------ Buffer ----------------------------------------------------------- + +#if defined(PNANOVDB_BUF_CUSTOM) +// NOP +#elif defined(PNANOVDB_C) +#define PNANOVDB_BUF_C +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_BUF_HLSL +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_BUF_GLSL +#endif + +#if defined(PNANOVDB_BUF_C) +#ifndef __CUDACC_RTC__ +#include +#endif +#if defined(__CUDACC__) +#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_BUF_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +typedef struct pnanovdb_buf_t +{ + uint32_t* data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words; +#endif +}pnanovdb_buf_t; +PNANOVDB_BUF_FORCE_INLINE pnanovdb_buf_t pnanovdb_make_buf(uint32_t* data, uint64_t size_in_words) +{ + pnanovdb_buf_t ret; + ret.data = data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + ret.size_in_words = size_in_words; +#endif + return ret; +} +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; +} +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#elif defined(PNANOVDB_ADDRESS_64) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; + } +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#endif +typedef uint32_t pnanovdb_grid_type_t; +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_HLSL) +#if defined(PNANOVDB_ADDRESS_32) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return buf[(byte_offset >> 2u)]; +} +uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uint2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) +{ + // NOP, by default no write in HLSL +} +#elif defined(PNANOVDB_ADDRESS_64) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + return buf[uint(byte_offset >> 2u)]; +} +uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t ret; + ret = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + // NOP, by default no write in HLSL +} +#endif +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_GLSL) +struct pnanovdb_buf_t +{ + uint unused; // to satisfy min struct size? +}; +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return pnanovdb_buf_data[(byte_offset >> 2u)]; +} +uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uvec2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) +{ + // NOP, by default no write in HLSL +} +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#endif + +// ------------------------------------------------ Basic Types ----------------------------------------------------------- + +// force inline +#if defined(PNANOVDB_C) +#if defined(__CUDACC__) +#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_FORCE_INLINE +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_FORCE_INLINE +#endif + +// struct typedef, static const, inout +#if defined(PNANOVDB_C) +#define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#if defined(__CUDA_ARCH__) +#define PNANOVDB_STATIC_CONST constexpr __constant__ +#else +#define PNANOVDB_STATIC_CONST static const +#endif +#define PNANOVDB_INOUT(X) X* +#define PNANOVDB_IN(X) const X* +#define PNANOVDB_DEREF(X) (*X) +#define PNANOVDB_REF(X) &X +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST static const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#endif + +// basic types, type conversion +#if defined(PNANOVDB_C) +#define PNANOVDB_NATIVE_64 +#ifndef __CUDACC_RTC__ +#include +#endif +#if !defined(PNANOVDB_MEMCPY_CUSTOM) +#ifndef __CUDACC_RTC__ +#include +#endif +#define pnanovdb_memcpy memcpy +#endif +typedef uint32_t pnanovdb_uint32_t; +typedef int32_t pnanovdb_int32_t; +typedef int32_t pnanovdb_bool_t; +#define PNANOVDB_FALSE 0 +#define PNANOVDB_TRUE 1 +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +typedef struct pnanovdb_coord_t +{ + pnanovdb_int32_t x, y, z; +}pnanovdb_coord_t; +typedef struct pnanovdb_vec3_t +{ + float x, y, z; +}pnanovdb_vec3_t; +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return (pnanovdb_int64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } +PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return ((pnanovdb_uint64_t)x); } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#ifdef PNANOVDB_CMATH +PNANOVDB_FORCE_INLINE float pnanovdb_floor(float v) { return floorf(v); } +#endif +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_min(float a, float b) { return a < b ? a : b; } +PNANOVDB_FORCE_INLINE float pnanovdb_max(float a, float b) { return a > b ? a : b; } +#elif defined(PNANOVDB_HLSL) +typedef uint pnanovdb_uint32_t; +typedef int pnanovdb_int32_t; +typedef bool pnanovdb_bool_t; +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +typedef int3 pnanovdb_coord_t; +typedef float3 pnanovdb_vec3_t; +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#if defined(PNANOVDB_ADDRESS_32) +typedef uint2 pnanovdb_uint64_t; +typedef int2 pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +#else +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint64_t(x); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#endif +#elif defined(PNANOVDB_GLSL) +#define pnanovdb_uint32_t uint +#define pnanovdb_int32_t int +#define pnanovdb_bool_t bool +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +#define pnanovdb_uint64_t uvec2 +#define pnanovdb_int64_t ivec2 +#define pnanovdb_coord_t ivec3 +#define pnanovdb_vec3_t vec3 +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uvec2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#endif + +// ------------------------------------------------ Coord/Vec3 Utilties ----------------------------------------------------------- + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) +{ + pnanovdb_vec3_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_add(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_sub(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x - b.x; + v.y = a.y - b.y; + v.z = a.z - b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_mul(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x * b.x; + v.y = a.y * b.y; + v.z = a.z * b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_div(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x / b.x; + v.y = a.y / b.y; + v.z = a.z / b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_min(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x < b.x ? a.x : b.x; + v.y = a.y < b.y ? a.y : b.y; + v.z = a.z < b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_max(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x > b.x ? a.x : b.x; + v.y = a.y > b.y ? a.y : b.y; + v.z = a.z > b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) +{ + pnanovdb_vec3_t v; + v.x = pnanovdb_int32_to_float(coord.x); + v.y = pnanovdb_int32_to_float(coord.y); + v.z = pnanovdb_int32_to_float(coord.z); + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_uniform(const pnanovdb_int32_t a) +{ + pnanovdb_coord_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) +{ + pnanovdb_coord_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +#elif defined(PNANOVDB_HLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return float3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(pnanovdb_coord_t coord) { return float3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return int3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#elif defined(PNANOVDB_GLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return vec3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) { return vec3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#endif + +// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +{ +#if defined(PNANOVDB_C) +#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) + return __popcnt(value); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) + return __builtin_popcount(value); +#else + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return (value * 0x01010101) >> 24; +#endif +#elif defined(PNANOVDB_HLSL) + return countbits(value); +#elif defined(PNANOVDB_GLSL) + return bitCount(value); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) +{ + return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); +} + +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + low += b; + if (low < b) + { + high += 1u; + } + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + if (low == 0u) + { + high -= 1u; + } + low -= 1u; + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + return (b >= 32u) ? + (high >> (b - 32)) : + ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; + pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; + return pnanovdb_uint32_as_uint64(mask_low, mask_high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return pnanovdb_uint32_as_uint64( + pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), + pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) + ); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; +} + +#else +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return a + b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + return a - 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return pnanovdb_uint64_low(a >> b); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + return 1llu << bit_idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return a & b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return a != 0llu; +} +#endif + +// ------------------------------------------------ Address Type ----------------------------------------------------------- + +#if defined(PNANOVDB_ADDRESS_32) +struct pnanovdb_address_t +{ + pnanovdb_uint32_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + // lose high bits on 32-bit + ret.byte_offset += pnanovdb_uint64_low(byte_offset); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return address.byte_offset & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~mask); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#elif defined(PNANOVDB_ADDRESS_64) +struct pnanovdb_address_t +{ + pnanovdb_uint64_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint32_as_uint64_low(byte_offset) * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return pnanovdb_uint64_low(address.byte_offset) & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~pnanovdb_uint32_as_uint64_low(mask)); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0llu; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#endif + +// ------------------------------------------------ High Level Buffer Read ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint32(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_read_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint64(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_read_int32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_read_int64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_int64(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_read_double(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_double(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_coord_t ret; + ret.x = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 0u))); + ret.y = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 4u))); + ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); + ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); + ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 65535.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 255.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_read_bit(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t bit_offset) +{ + pnanovdb_address_t word_address = pnanovdb_address_mask_inv(address, 3u); + pnanovdb_uint32_t bit_index = (pnanovdb_address_mask(address, 3u) << 3u) + bit_offset; + pnanovdb_uint32_t value_word = pnanovdb_buf_read_uint32(buf, word_address.byte_offset); + return ((value_word >> bit_index) & 1) != 0u; +} + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE short pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return (short)(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_HLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return f16tof32(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_GLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return unpackHalf2x16(raw >> (pnanovdb_address_mask(address, 2) << 3)).x; +} +#endif + +// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) +{ + pnanovdb_buf_write_uint32(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) +{ + pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) +{ + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) +{ + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); +} + +// ------------------------------------------------ Core Structures ----------------------------------------------------------- + +#define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_GRID 0x314244566f6e614eUL// "NanoVDB1" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_FILE 0x324244566f6e614eUL// "NanoVDB2" in hex - little endian (uint64_t) + +#define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI +#define PNANOVDB_MINOR_VERSION_NUMBER 7// reflects changes to the API but not ABI +#define PNANOVDB_PATCH_VERSION_NUMBER 0// reflects bug-fixes with no ABI or API changes + +#define PNANOVDB_GRID_TYPE_UNKNOWN 0 +#define PNANOVDB_GRID_TYPE_FLOAT 1 +#define PNANOVDB_GRID_TYPE_DOUBLE 2 +#define PNANOVDB_GRID_TYPE_INT16 3 +#define PNANOVDB_GRID_TYPE_INT32 4 +#define PNANOVDB_GRID_TYPE_INT64 5 +#define PNANOVDB_GRID_TYPE_VEC3F 6 +#define PNANOVDB_GRID_TYPE_VEC3D 7 +#define PNANOVDB_GRID_TYPE_MASK 8 +#define PNANOVDB_GRID_TYPE_HALF 9 +#define PNANOVDB_GRID_TYPE_UINT32 10 +#define PNANOVDB_GRID_TYPE_BOOLEAN 11 +#define PNANOVDB_GRID_TYPE_RGBA8 12 +#define PNANOVDB_GRID_TYPE_FP4 13 +#define PNANOVDB_GRID_TYPE_FP8 14 +#define PNANOVDB_GRID_TYPE_FP16 15 +#define PNANOVDB_GRID_TYPE_FPN 16 +#define PNANOVDB_GRID_TYPE_VEC4F 17 +#define PNANOVDB_GRID_TYPE_VEC4D 18 +#define PNANOVDB_GRID_TYPE_INDEX 19 +#define PNANOVDB_GRID_TYPE_ONINDEX 20 +#define PNANOVDB_GRID_TYPE_INDEXMASK 21 +#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 +#define PNANOVDB_GRID_TYPE_POINTINDEX 23 +#define PNANOVDB_GRID_TYPE_VEC3U8 24 +#define PNANOVDB_GRID_TYPE_VEC3U16 25 +#define PNANOVDB_GRID_TYPE_UINT8 26 +#define PNANOVDB_GRID_TYPE_END 27 + +#define PNANOVDB_GRID_CLASS_UNKNOWN 0 +#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF +#define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density +#define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity +#define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid +#define PNANOVDB_GRID_CLASS_POINT_DATA 5 // point data grid +#define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) +#define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft +#define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array +#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features +#define PNANOVDB_GRID_CLASS_END 10 + +#define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) +#define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) +#define PNANOVDB_GRID_FLAGS_HAS_MIN_MAX (1 << 2) +#define PNANOVDB_GRID_FLAGS_HAS_AVERAGE (1 << 3) +#define PNANOVDB_GRID_FLAGS_HAS_STD_DEVIATION (1 << 4) +#define PNANOVDB_GRID_FLAGS_IS_BREADTH_FIRST (1 << 5) +#define PNANOVDB_GRID_FLAGS_END (1 << 6) + +#define PNANOVDB_LEAF_TYPE_DEFAULT 0 +#define PNANOVDB_LEAF_TYPE_LITE 1 +#define PNANOVDB_LEAF_TYPE_FP 2 +#define PNANOVDB_LEAF_TYPE_INDEX 3 +#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 +#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 + +// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... +// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48, 8 }; +// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64 }; +// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48, 8 }; +// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16, 8 }; +// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32, 32 }; +// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0, 0 }; + +struct pnanovdb_map_t +{ + float matf[9]; + float invmatf[9]; + float vecf[3]; + float taperf; + double matd[9]; + double invmatd[9]; + double vecd[3]; + double taperd; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_t) +struct pnanovdb_map_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_handle_t) + +#define PNANOVDB_MAP_SIZE 264 + +#define PNANOVDB_MAP_OFF_MATF 0 +#define PNANOVDB_MAP_OFF_INVMATF 36 +#define PNANOVDB_MAP_OFF_VECF 72 +#define PNANOVDB_MAP_OFF_TAPERF 84 +#define PNANOVDB_MAP_OFF_MATD 88 +#define PNANOVDB_MAP_OFF_INVMATD 160 +#define PNANOVDB_MAP_OFF_VECD 232 +#define PNANOVDB_MAP_OFF_TAPERD 256 + +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); +} + +struct pnanovdb_grid_t +{ + pnanovdb_uint64_t magic; // 8 bytes, 0 + pnanovdb_uint64_t checksum; // 8 bytes, 8 + pnanovdb_uint32_t version; // 4 bytes, 16 + pnanovdb_uint32_t flags; // 4 bytes, 20 + pnanovdb_uint32_t grid_index; // 4 bytes, 24 + pnanovdb_uint32_t grid_count; // 4 bytes, 28 + pnanovdb_uint64_t grid_size; // 8 bytes, 32 + pnanovdb_uint32_t grid_name[256 / 4]; // 256 bytes, 40 + pnanovdb_map_t map; // 264 bytes, 296 + double world_bbox[6]; // 48 bytes, 560 + double voxel_size[3]; // 24 bytes, 608 + pnanovdb_uint32_t grid_class; // 4 bytes, 632 + pnanovdb_uint32_t grid_type; // 4 bytes, 636 + pnanovdb_int64_t blind_metadata_offset; // 8 bytes, 640 + pnanovdb_uint32_t blind_metadata_count; // 4 bytes, 648 + pnanovdb_uint32_t pad[5]; // 20 bytes, 652 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_t) +struct pnanovdb_grid_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_handle_t) + +#define PNANOVDB_GRID_SIZE 672 + +#define PNANOVDB_GRID_OFF_MAGIC 0 +#define PNANOVDB_GRID_OFF_CHECKSUM 8 +#define PNANOVDB_GRID_OFF_VERSION 16 +#define PNANOVDB_GRID_OFF_FLAGS 20 +#define PNANOVDB_GRID_OFF_GRID_INDEX 24 +#define PNANOVDB_GRID_OFF_GRID_COUNT 28 +#define PNANOVDB_GRID_OFF_GRID_SIZE 32 +#define PNANOVDB_GRID_OFF_GRID_NAME 40 +#define PNANOVDB_GRID_OFF_MAP 296 +#define PNANOVDB_GRID_OFF_WORLD_BBOX 560 +#define PNANOVDB_GRID_OFF_VOXEL_SIZE 608 +#define PNANOVDB_GRID_OFF_GRID_CLASS 632 +#define PNANOVDB_GRID_OFF_GRID_TYPE 636 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET 640 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT 648 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_map_handle_t pnanovdb_grid_get_map(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + pnanovdb_map_handle_t ret; + ret.address = pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAP); + return ret; +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_grid_get_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) +{ + return (major << 21u) | (minor << 10u) | patch_num; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) +{ + return (version >> 21u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_minor(pnanovdb_uint32_t version) +{ + return (version >> 10u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint32_t version) +{ + return version & ((1u << 10u) - 1u); +} + +struct pnanovdb_gridblindmetadata_t +{ + pnanovdb_int64_t data_offset; // 8 bytes, 0 + pnanovdb_uint64_t value_count; // 8 bytes, 8 + pnanovdb_uint32_t value_size; // 4 bytes, 16 + pnanovdb_uint32_t semantic; // 4 bytes, 20 + pnanovdb_uint32_t data_class; // 4 bytes, 24 + pnanovdb_uint32_t data_type; // 4 bytes, 28 + pnanovdb_uint32_t name[256 / 4]; // 256 bytes, 32 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_t) +struct pnanovdb_gridblindmetadata_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) + +#define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 + +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET 0 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT 8 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE 16 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_data_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_value_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_value_size(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_class(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_type(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_name(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_NAME + 4u * index)); +} + +struct pnanovdb_tree_t +{ + pnanovdb_uint64_t node_offset_leaf; + pnanovdb_uint64_t node_offset_lower; + pnanovdb_uint64_t node_offset_upper; + pnanovdb_uint64_t node_offset_root; + pnanovdb_uint32_t node_count_leaf; + pnanovdb_uint32_t node_count_lower; + pnanovdb_uint32_t node_count_upper; + pnanovdb_uint32_t tile_count_leaf; + pnanovdb_uint32_t tile_count_lower; + pnanovdb_uint32_t tile_count_upper; + pnanovdb_uint64_t voxel_count; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_t) +struct pnanovdb_tree_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_handle_t) + +#define PNANOVDB_TREE_SIZE 64 + +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF 0 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER 8 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER 16 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT 24 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LEAF 32 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LOWER 36 +#define PNANOVDB_TREE_OFF_NODE_COUNT_UPPER 40 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LEAF 44 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LOWER 48 +#define PNANOVDB_TREE_OFF_TILE_COUNT_UPPER 52 +#define PNANOVDB_TREE_OFF_VOXEL_COUNT 56 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); +} + +struct pnanovdb_root_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint32_t table_size; + pnanovdb_uint32_t pad1; // background can start here + // background, min, max +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_t) +struct pnanovdb_root_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_handle_t) + +#define PNANOVDB_ROOT_BASE_SIZE 28 + +#define PNANOVDB_ROOT_OFF_BBOX_MIN 0 +#define PNANOVDB_ROOT_OFF_BBOX_MAX 12 +#define PNANOVDB_ROOT_OFF_TABLE_SIZE 24 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); +} + +struct pnanovdb_root_tile_t +{ + pnanovdb_uint64_t key; + pnanovdb_int64_t child; // signed byte offset from root to the child node, 0 means it is a constant tile, so use value + pnanovdb_uint32_t state; + pnanovdb_uint32_t pad1; // value can start here + // value +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_t) +struct pnanovdb_root_tile_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_handle_t) + +#define PNANOVDB_ROOT_TILE_BASE_SIZE 20 + +#define PNANOVDB_ROOT_TILE_OFF_KEY 0 +#define PNANOVDB_ROOT_TILE_OFF_CHILD 8 +#define PNANOVDB_ROOT_TILE_OFF_STATE 16 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_tile_get_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_root_tile_get_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { + pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); +} + +struct pnanovdb_upper_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[1024]; + pnanovdb_uint32_t child_mask[1024]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_t) +struct pnanovdb_upper_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_handle_t) + +#define PNANOVDB_UPPER_TABLE_COUNT 32768 +#define PNANOVDB_UPPER_BASE_SIZE 8224 + +#define PNANOVDB_UPPER_OFF_BBOX_MIN 0 +#define PNANOVDB_UPPER_OFF_BBOX_MAX 12 +#define PNANOVDB_UPPER_OFF_FLAGS 24 +#define PNANOVDB_UPPER_OFF_VALUE_MASK 32 +#define PNANOVDB_UPPER_OFF_CHILD_MASK 4128 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_upper_get_flags(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_value_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_lower_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[128]; + pnanovdb_uint32_t child_mask[128]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_t) +struct pnanovdb_lower_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_handle_t) + +#define PNANOVDB_LOWER_TABLE_COUNT 4096 +#define PNANOVDB_LOWER_BASE_SIZE 1056 + +#define PNANOVDB_LOWER_OFF_BBOX_MIN 0 +#define PNANOVDB_LOWER_OFF_BBOX_MAX 12 +#define PNANOVDB_LOWER_OFF_FLAGS 24 +#define PNANOVDB_LOWER_OFF_VALUE_MASK 32 +#define PNANOVDB_LOWER_OFF_CHILD_MASK 544 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_lower_get_flags(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_value_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_leaf_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_uint32_t bbox_dif_and_flags; + pnanovdb_uint32_t value_mask[16]; + // min, max + // alignas(32) pnanovdb_uint32_t values[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_t) +struct pnanovdb_leaf_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_handle_t) + +#define PNANOVDB_LEAF_TABLE_COUNT 512 +#define PNANOVDB_LEAF_BASE_SIZE 80 + +#define PNANOVDB_LEAF_OFF_BBOX_MIN 0 +#define PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS 12 +#define PNANOVDB_LEAF_OFF_VALUE_MASK 16 + +#define PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS 84 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM 16 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM 12 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_leaf_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); +} + +struct pnanovdb_grid_type_constants_t +{ + pnanovdb_uint32_t root_off_background; + pnanovdb_uint32_t root_off_min; + pnanovdb_uint32_t root_off_max; + pnanovdb_uint32_t root_off_ave; + pnanovdb_uint32_t root_off_stddev; + pnanovdb_uint32_t root_size; + pnanovdb_uint32_t value_stride_bits; + pnanovdb_uint32_t table_stride; + pnanovdb_uint32_t root_tile_off_value; + pnanovdb_uint32_t root_tile_size; + pnanovdb_uint32_t upper_off_min; + pnanovdb_uint32_t upper_off_max; + pnanovdb_uint32_t upper_off_ave; + pnanovdb_uint32_t upper_off_stddev; + pnanovdb_uint32_t upper_off_table; + pnanovdb_uint32_t upper_size; + pnanovdb_uint32_t lower_off_min; + pnanovdb_uint32_t lower_off_max; + pnanovdb_uint32_t lower_off_ave; + pnanovdb_uint32_t lower_off_stddev; + pnanovdb_uint32_t lower_off_table; + pnanovdb_uint32_t lower_size; + pnanovdb_uint32_t leaf_off_min; + pnanovdb_uint32_t leaf_off_max; + pnanovdb_uint32_t leaf_off_ave; + pnanovdb_uint32_t leaf_off_stddev; + pnanovdb_uint32_t leaf_off_table; + pnanovdb_uint32_t leaf_size; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) + +// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h +PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = +{ +{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, +{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, +{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, +{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, +{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, +{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, +{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, +{28, 29, 30, 32, 36, 64, 8, 8, 20, 32, 8224, 8225, 8228, 8232, 8256, 270400, 1056, 1057, 1060, 1064, 1088, 33856, 80, 81, 84, 88, 96, 608}, +}; + +// ------------------------------------------------ Basic Lookup ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_gridblindmetadata(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = { grid.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_grid_get_blind_metadata_offset(buf, grid); + meta.address = pnanovdb_address_offset64(meta.address, byte_offset); + meta.address = pnanovdb_address_offset_product(meta.address, PNANOVDB_GRIDBLINDMETADATA_SIZE, index); + return meta; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); + pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_data_offset(buf, meta); + pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); + return address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_tree_handle_t pnanovdb_grid_get_tree(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid) +{ + pnanovdb_tree_handle_t tree = { grid.address }; + tree.address = pnanovdb_address_offset(tree.address, PNANOVDB_GRID_SIZE); + return tree; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_handle_t pnanovdb_tree_get_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t tree) +{ + pnanovdb_root_handle_t root = { tree.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_tree_get_node_offset_root(buf, tree); + root.address = pnanovdb_address_offset64(root.address, byte_offset); + return root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root, pnanovdb_uint32_t n) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + tile.address = pnanovdb_address_offset_product(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size), n); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile_zero(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_upper_handle_t pnanovdb_root_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, pnanovdb_root_tile_handle_t tile) +{ + pnanovdb_upper_handle_t upper = { root.address }; + upper.address = pnanovdb_address_offset64(upper.address, pnanovdb_int64_as_uint64(pnanovdb_root_tile_get_child(buf, tile))); + return upper; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_coord_to_key(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ +#if defined(PNANOVDB_NATIVE_64) + pnanovdb_uint64_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint64_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint64_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + return (ku) | (ju << 21u) | (iu << 42u); +#else + pnanovdb_uint32_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint32_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint32_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + pnanovdb_uint32_t key_x = ku | (ju << 21); + pnanovdb_uint32_t key_y = (iu << 10) | (ju >> 11); + return pnanovdb_uint32_as_uint64(key_x, key_y); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t tile_count = pnanovdb_uint32_as_int32(pnanovdb_root_get_tile_count(buf, root)); + pnanovdb_root_tile_handle_t tile = pnanovdb_root_get_tile_zero(grid_type, root); + pnanovdb_uint64_t key = pnanovdb_coord_to_key(ijk); + for (pnanovdb_uint32_t i = 0u; i < tile_count; i++) + { + if (pnanovdb_uint64_is_equal(key, pnanovdb_root_tile_get_key(buf, tile))) + { + return tile; + } + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size)); + } + pnanovdb_root_tile_handle_t null_handle = { pnanovdb_address_null() }; + return null_handle; +} + +// ----------------------------- Leaf Node --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + + (((PNANOVDB_DEREF(ijk).y & 7) >> 0) << (3)) + + ((PNANOVDB_DEREF(ijk).z & 7) >> 0); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_table) + ((PNANOVDB_GRID_TYPE_GET(grid_type, value_stride_bits) * n) >> 3u); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +// ----------------------------- Leaf FP Types Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) +{ + // value_log_bits // 2 3 4 + pnanovdb_uint32_t value_bits = 1u << value_log_bits; // 4 8 16 + pnanovdb_uint32_t value_mask = (1u << value_bits) - 1u; // 0xF 0xFF 0xFFFF + pnanovdb_uint32_t values_per_word_bits = 5u - value_log_bits; // 3 2 1 + pnanovdb_uint32_t values_per_word_mask = (1u << values_per_word_bits) - 1u; // 7 3 1 + + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + float minimum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM)); + float quantum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM)); + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, ((n >> values_per_word_bits) << 2u))); + pnanovdb_uint32_t value_compressed = (raw >> ((n & values_per_word_mask) << value_log_bits)) & value_mask; + return pnanovdb_uint32_to_float(value_compressed) * quantum + minimum; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 2u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 3u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 4u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t bbox_dif_and_flags = pnanovdb_read_uint32(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS)); + pnanovdb_uint32_t flags = bbox_dif_and_flags >> 24u; + pnanovdb_uint32_t value_log_bits = flags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits + return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); +} + +// ----------------------------- Leaf Index Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + return pnanovdb_uint64_offset(offset, n); +} + +// ----------------------------- Leaf IndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_index_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_index_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_index_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_index_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_index_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf OnIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( + buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); + return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_uint64_offset( + pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), + pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + + pnanovdb_uint32_t word_idx = n >> 6u; + pnanovdb_uint32_t bit_idx = n & 63u; + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); + pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); + pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) + { + pnanovdb_uint32_t sum = 0u; + sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); + if (word_idx > 0u) + { + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); + sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; + } + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + value_index = pnanovdb_uint64_offset(offset, sum); + } + return value_index; +} + +// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_value_count(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_onindex_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_onindex_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf PointIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); + pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); + if ((i & 1) == 0u) + { + raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); + } + else + { + raw32 = (raw32 & 0x0000FFFF) | (value << 16u); + } + pnanovdb_write_uint32(buf, addr, raw32); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + pnanovdb_uint32_t word_idx = i >> 5; + pnanovdb_uint32_t bit_idx = i & 31; + pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); + pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); + val_mask = val_mask | (1u << bit_idx); + pnanovdb_write_uint32(buf, addr, val_mask); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_leaf_pointindex_set_on(buf, leaf, i); + pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); +} + +// ------------------------------------------------ Lower Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + + (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + + ((PNANOVDB_DEREF(ijk).z & 127) >> 3); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_lower_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, table_address); +} + +PNANOVDB_FORCE_INLINE pnanovdb_leaf_handle_t pnanovdb_lower_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, pnanovdb_uint32_t n) +{ + pnanovdb_leaf_handle_t leaf = { lower.address }; + leaf.address = pnanovdb_address_offset64(leaf.address, pnanovdb_int64_as_uint64(pnanovdb_lower_get_table_child(grid_type, buf, lower, n))); + return leaf; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + value_address = pnanovdb_leaf_get_value_address(grid_type, buf, child, ijk); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); +} + +// ------------------------------------------------ Upper Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + + (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + + ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_upper_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, bufAddress); +} + +PNANOVDB_FORCE_INLINE pnanovdb_lower_handle_t pnanovdb_upper_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, pnanovdb_uint32_t n) +{ + pnanovdb_lower_handle_t lower = { upper.address }; + lower.address = pnanovdb_address_offset64(lower.address, pnanovdb_int64_as_uint64(pnanovdb_upper_get_table_child(grid_type, buf, upper, n))); + return lower; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + value_address = pnanovdb_lower_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, bufAddress, child); +} + +// ------------------------------------------------ Root ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_max); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_ave); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_stddev); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_tile_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t root_tile) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value); + return pnanovdb_address_offset(root_tile.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + ret = pnanovdb_upper_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp4_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp8_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp16_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fpn_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_end +) +{ + pnanovdb_uint32_t local_range_begin = 0u; + pnanovdb_uint32_t local_range_end = 0u; + pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); + if (level == 0) + { + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + // recover leaf address + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; + if (n > 0u) + { + local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); + } + local_range_end = pnanovdb_read_uint16(buf, value_address); + offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); + } + PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); + PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); + return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( + pnanovdb_buf_t buf, + pnanovdb_grid_type_t value_type, + pnanovdb_address_t value_address, + pnanovdb_address_t blindmetadata_value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_address_t)address_begin, + PNANOVDB_INOUT(pnanovdb_address_t)address_end +) +{ + pnanovdb_uint64_t range_begin; + pnanovdb_uint64_t range_end; + pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); + + pnanovdb_uint32_t stride = 12u; // vec3f + if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) + { + stride = 3u; + } + else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) + { + stride = 6u; + } + PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); + PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); + return range_size; +} + +// ------------------------------------------------ ReadAccessor ----------------------------------------------------------- + +struct pnanovdb_readaccessor_t +{ + pnanovdb_coord_t key; + pnanovdb_leaf_handle_t leaf; + pnanovdb_lower_handle_t lower; + pnanovdb_upper_handle_t upper; + pnanovdb_root_handle_t root; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_readaccessor_t) + +PNANOVDB_FORCE_INLINE void pnanovdb_readaccessor_init(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, pnanovdb_root_handle_t root) +{ + PNANOVDB_DEREF(acc).key.x = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.y = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.z = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).root = root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached0(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).leaf.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 3) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached1(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).lower.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 7) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached2(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).upper.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 12) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE int pnanovdb_readaccessor_computedirty(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (PNANOVDB_DEREF(ijk).x ^ PNANOVDB_DEREF(acc).key.x) | (PNANOVDB_DEREF(ijk).y ^ PNANOVDB_DEREF(acc).key.y) | (PNANOVDB_DEREF(ijk).z ^ PNANOVDB_DEREF(acc).key.z); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, child, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, table_address, child); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, upper, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, root, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_address_t value_address; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc, level); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + value_address = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc, level); + } + else + { + value_address = pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc, level); + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +// ------------------------------------------------ ReadAccessor GetDim ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + return 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (3u)); // node 0 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_lower_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (4u + 3u)); // node 1 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = 1u << (5u + 4u + 3u); // background, node 2 dim + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = 1u << (5u + 4u + 3u); // tile value, node 2 dim + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_readaccessor_get_dim(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_uint32_t dim; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + dim = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + dim = pnanovdb_lower_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + dim = pnanovdb_upper_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + dim = pnanovdb_root_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return dim; +} + +// ------------------------------------------------ ReadAccessor IsActive ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_value_mask(buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_lower_get_value_mask(buf, lower, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_upper_get_value_mask(buf, upper, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_root_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_address_is_null(tile.address)) + { + is_active = PNANOVDB_FALSE; // background + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + pnanovdb_uint32_t state = pnanovdb_root_tile_get_state(buf, tile); + is_active = state != 0u; // tile value + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_is_active(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_bool_t is_active; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + is_active = pnanovdb_root_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return is_active; +} + +// ------------------------------------------------ Map Transforms ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2) + pnanovdb_map_get_vecf(buf, map, 0); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5) + pnanovdb_map_get_vecf(buf, map, 1); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8) + pnanovdb_map_get_vecf(buf, map, 2); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x - pnanovdb_map_get_vecf(buf, map, 0); + float sy = PNANOVDB_DEREF(src).y - pnanovdb_map_get_vecf(buf, map, 1); + float sz = PNANOVDB_DEREF(src).z - pnanovdb_map_get_vecf(buf, map, 2); + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_indexf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_worldf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_index_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse_jacobi(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_world_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_jacobi(buf, map, src); +} + +// ------------------------------------------------ DitherLUT ----------------------------------------------------------- + +// This table was generated with +/************** + +static constexpr inline uint32 +SYSwang_inthash(uint32 key) +{ + // From http://www.concentric.net/~Ttwang/tech/inthash.htm + key += ~(key << 16); + key ^= (key >> 5); + key += (key << 3); + key ^= (key >> 13); + key += ~(key << 9); + key ^= (key >> 17); + return key; +} + +static void +ut_initDitherR(float *pattern, float offset, + int x, int y, int z, int res, int goalres) +{ + // These offsets are designed to maximize the difference between + // dither values in nearby voxels within a given 2x2x2 cell, without + // producing axis-aligned artifacts. The are organized in row-major + // order. + static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; + static const float theScale = 0.125F; + int key = (((z << res) + y) << res) + x; + + if (res == goalres) + { + pattern[key] = offset; + return; + } + + // Randomly flip (on each axis) the dithering patterns used by the + // subcells. This key is xor'd with the subcell index below before + // looking up in the dither offset list. + key = SYSwang_inthash(key) & 7; + + x <<= 1; + y <<= 1; + z <<= 1; + + offset *= theScale; + for (int i = 0; i < 8; i++) + ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, + x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); +} + +// This is a compact algorithm that accomplishes essentially the same thing +// as ut_initDither() above. We should eventually switch to use this and +// clean the dead code. +static fpreal32 * +ut_initDitherRecursive(int goalres) +{ + const int nfloat = 1 << (goalres*3); + float *pattern = new float[nfloat]; + ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); + + // This has built an even spacing from 1/nfloat to 1.0. + // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) + // So we do a correction here. Note that the earlier calculations are + // done with powers of 2 so are exact, so it does make sense to delay + // the renormalization to this pass. + float correctionterm = nfloat / (nfloat+1.0F); + for (int i = 0; i < nfloat; i++) + pattern[i] *= correctionterm; + return pattern; +} + + theDitherMatrix = ut_initDitherRecursive(3); + + for (int i = 0; i < 512/8; i ++) + { + for (int j = 0; j < 8; j ++) + std::cout << theDitherMatrix[i*8+j] << "f, "; + std::cout << std::endl; + } + + **************/ + +PNANOVDB_STATIC_CONST float pnanovdb_dither_lut[512] = +{ + 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, + 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, + 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, + 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, + 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, + 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, + 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, + 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, + 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, + 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, + 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, + 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, + 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, + 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, + 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, + 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, + 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, + 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, + 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, + 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, + 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, + 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, + 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, + 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, + 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, + 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, + 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, + 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, + 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, + 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, + 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, + 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, + 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, + 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, + 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, + 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, + 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, + 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, + 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, + 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, + 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, + 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, + 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, + 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, + 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, + 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, + 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, + 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, + 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, + 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, + 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, + 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, + 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, + 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, + 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, + 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, + 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, + 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, + 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, + 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, + 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, + 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, + 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, + 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, +}; + +PNANOVDB_FORCE_INLINE float pnanovdb_dither_lookup(pnanovdb_bool_t enabled, int offset) +{ + return enabled ? pnanovdb_dither_lut[offset & 511] : 0.5f; +} + +// ------------------------------------------------ HDDA ----------------------------------------------------------- + +#ifdef PNANOVDB_HDDA + +// Comment out to disable this explicit round-off check +#define PNANOVDB_ENFORCE_FORWARD_STEPPING + +#define PNANOVDB_HDDA_FLOAT_MAX 1e38f + +struct pnanovdb_hdda_t +{ + pnanovdb_int32_t dim; + float tmin; + float tmax; + pnanovdb_coord_t voxel; + pnanovdb_coord_t step; + pnanovdb_vec3_t delta; + pnanovdb_vec3_t next; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_hdda_t) + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_ijk(PNANOVDB_IN(pnanovdb_vec3_t) pos) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_voxel(PNANOVDB_IN(pnanovdb_vec3_t) pos, int dim) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)) & (~(dim - 1)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)) & (~(dim - 1)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)) & (~(dim - 1)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_hdda_ray_start(PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction) +{ + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(tmin)), + PNANOVDB_DEREF(origin) + ); + return pos; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_hdda_init(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, int dim) +{ + PNANOVDB_DEREF(hdda).dim = dim; + PNANOVDB_DEREF(hdda).tmin = tmin; + PNANOVDB_DEREF(hdda).tmax = tmax; + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + // x + if (PNANOVDB_DEREF(direction).x == 0.f) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.x = 0; + PNANOVDB_DEREF(hdda).delta.x = 0.f; + } + else if (dir_inv.x > 0.f) + { + PNANOVDB_DEREF(hdda).step.x = 1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x + dim - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = dir_inv.x; + } + else + { + PNANOVDB_DEREF(hdda).step.x = -1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = -dir_inv.x; + } + + // y + if (PNANOVDB_DEREF(direction).y == 0.f) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.y = 0; + PNANOVDB_DEREF(hdda).delta.y = 0.f; + } + else if (dir_inv.y > 0.f) + { + PNANOVDB_DEREF(hdda).step.y = 1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y + dim - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = dir_inv.y; + } + else + { + PNANOVDB_DEREF(hdda).step.y = -1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = -dir_inv.y; + } + + // z + if (PNANOVDB_DEREF(direction).z == 0.f) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.z = 0; + PNANOVDB_DEREF(hdda).delta.z = 0.f; + } + else if (dir_inv.z > 0.f) + { + PNANOVDB_DEREF(hdda).step.z = 1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z + dim - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = dir_inv.z; + } + else + { + PNANOVDB_DEREF(hdda).step.z = -1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = -dir_inv.z; + } +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_update(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_IN(pnanovdb_vec3_t) direction, int dim) +{ + if (PNANOVDB_DEREF(hdda).dim == dim) + { + return PNANOVDB_FALSE; + } + PNANOVDB_DEREF(hdda).dim = dim; + + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(PNANOVDB_DEREF(hdda).tmin)), + PNANOVDB_DEREF(origin) + ); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + if (PNANOVDB_DEREF(hdda).step.x != 0) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + if (PNANOVDB_DEREF(hdda).step.x > 0) + { + PNANOVDB_DEREF(hdda).next.x += dim * dir_inv.x; + } + } + if (PNANOVDB_DEREF(hdda).step.y != 0) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + if (PNANOVDB_DEREF(hdda).step.y > 0) + { + PNANOVDB_DEREF(hdda).next.y += dim * dir_inv.y; + } + } + if (PNANOVDB_DEREF(hdda).step.z != 0) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + if (PNANOVDB_DEREF(hdda).step.z > 0) + { + PNANOVDB_DEREF(hdda).next.z += dim * dir_inv.z; + } + } + + return PNANOVDB_TRUE; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_step(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda) +{ + pnanovdb_bool_t ret; + if (PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.y && PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.x <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.x + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.x; + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.x; + PNANOVDB_DEREF(hdda).voxel.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.x; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else if (PNANOVDB_DEREF(hdda).next.y < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.y <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.y + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.y; + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.y; + PNANOVDB_DEREF(hdda).voxel.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.y; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.z <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.z + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.z; + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.z; + PNANOVDB_DEREF(hdda).voxel.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.z; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_ray_clip( + PNANOVDB_IN(pnanovdb_vec3_t) bbox_min, + PNANOVDB_IN(pnanovdb_vec3_t) bbox_max, + PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_INOUT(float) tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, PNANOVDB_INOUT(float) tmax +) +{ + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + pnanovdb_vec3_t t0 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_min), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t t1 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_max), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t tmin3 = pnanovdb_vec3_min(t0, t1); + pnanovdb_vec3_t tmax3 = pnanovdb_vec3_max(t0, t1); + float tnear = pnanovdb_max(tmin3.x, pnanovdb_max(tmin3.y, tmin3.z)); + float tfar = pnanovdb_min(tmax3.x, pnanovdb_min(tmax3.y, tmax3.z)); + pnanovdb_bool_t hit = tnear <= tfar; + PNANOVDB_DEREF(tmin) = pnanovdb_max(PNANOVDB_DEREF(tmin), tnear); + PNANOVDB_DEREF(tmax) = pnanovdb_min(PNANOVDB_DEREF(tmax), tfar); + return hit; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_zero_crossing( + pnanovdb_grid_type_t grid_type, + pnanovdb_buf_t buf, + PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, + PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, + PNANOVDB_INOUT(float) thit, + PNANOVDB_INOUT(float) v +) +{ + pnanovdb_coord_t bbox_min = pnanovdb_root_get_bbox_min(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_coord_t bbox_max = pnanovdb_root_get_bbox_max(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_vec3_t bbox_minf = pnanovdb_coord_to_vec3(bbox_min); + pnanovdb_vec3_t bbox_maxf = pnanovdb_coord_to_vec3(pnanovdb_coord_add(bbox_max, pnanovdb_coord_uniform(1))); + + pnanovdb_bool_t hit = pnanovdb_hdda_ray_clip(PNANOVDB_REF(bbox_minf), PNANOVDB_REF(bbox_maxf), origin, PNANOVDB_REF(tmin), direction, PNANOVDB_REF(tmax)); + if (!hit || tmax > 1.0e20f) + { + return PNANOVDB_FALSE; + } + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_coord_t ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos)); + + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + float v0 = pnanovdb_read_float(buf, address); + + pnanovdb_int32_t dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_t hdda; + pnanovdb_hdda_init(PNANOVDB_REF(hdda), origin, tmin, direction, tmax, dim); + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda))) + { + pnanovdb_vec3_t pos_start = pnanovdb_hdda_ray_start(origin, hdda.tmin + 1.0001f, direction); + ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos_start)); + dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_update(PNANOVDB_REF(hdda), origin, direction, dim); + if (hdda.dim > 1 || !pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(ijk))) + { + continue; + } + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda)) && pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(hdda.voxel))) + { + ijk = hdda.voxel; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + PNANOVDB_DEREF(v) = pnanovdb_read_float(buf, address); + if (PNANOVDB_DEREF(v) * v0 < 0.f) + { + PNANOVDB_DEREF(thit) = hdda.tmin; + return PNANOVDB_TRUE; + } + } + } + return PNANOVDB_FALSE; +} + +#endif + +#endif // end of NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/cuda/DeviceBuffer.h b/external/nanovdb/cuda/DeviceBuffer.h new file mode 100644 index 00000000..465bd9dc --- /dev/null +++ b/external/nanovdb/cuda/DeviceBuffer.h @@ -0,0 +1,231 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file DeviceBuffer.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Implements a simple dual (host/device) CUDA buffer. + + \note This file has no device-only kernel functions, + which explains why it's a .h and not .cuh file. +*/ + +#ifndef NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED + +#include // for BufferTraits +#include // for cudaMalloc/cudaMallocManaged/cudaFree + +namespace nanovdb {// ================================================================ + +namespace cuda {// =================================================================== + +// ----------------------------> DeviceBuffer <-------------------------------------- + +/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. +/// Obviously this class is making explicit used of CUDA so replace it with your own memory +/// allocator if you are not using CUDA. +/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device +/// it is significantly slower then cached (un-pinned) memory on the host. +class DeviceBuffer +{ + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) + void *mCpuData, *mGpuData; // raw pointers to the host and device buffers + bool mManaged; + +public: + /// @brief Static factory method that return an instance of this buffer + /// @param size byte size of buffer to be initialized + /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, const DeviceBuffer* dummy = nullptr, bool host = true, void* stream = nullptr); + + /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory + /// @param size byte size of buffer specified by external memory + /// @param cpuData pointer to externally managed host memory + /// @param gpuData pointer to externally managed device memory + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, void* cpuData, void* gpuData); + + /// @brief Constructor + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + DeviceBuffer(uint64_t size = 0, bool host = true, void* stream = nullptr) + : mSize(0) + , mCpuData(nullptr) + , mGpuData(nullptr) + , mManaged(false) + { + if (size > 0) this->init(size, host, stream); + } + + DeviceBuffer(uint64_t size, void* cpuData, void* gpuData) + : mSize(size) + , mCpuData(cpuData) + , mGpuData(gpuData) + , mManaged(false) + { + } + + /// @brief Disallow copy-construction + DeviceBuffer(const DeviceBuffer&) = delete; + + /// @brief Move copy-constructor + DeviceBuffer(DeviceBuffer&& other) noexcept + : mSize(other.mSize) + , mCpuData(other.mCpuData) + , mGpuData(other.mGpuData) + , mManaged(other.mManaged) + { + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + } + + /// @brief Disallow copy assignment operation + DeviceBuffer& operator=(const DeviceBuffer&) = delete; + + /// @brief Move copy assignment operation + DeviceBuffer& operator=(DeviceBuffer&& other) noexcept + { + this->clear(); + mSize = other.mSize; + mCpuData = other.mCpuData; + mGpuData = other.mGpuData; + mManaged = other.mManaged; + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + return *this; + } + + /// @brief Destructor frees memory on both the host and device + ~DeviceBuffer() { this->clear(); }; + + /// @brief Initialize buffer + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @note All existing buffers are first cleared + /// @warning size is expected to be non-zero. Use clear() clear buffer! + void init(uint64_t size, bool host = true, void* stream = nullptr); + + /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* data() const { return mCpuData; } + + /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* deviceData() const { return mGpuData; } + + /// @brief Upload this buffer from the host to the device, i.e. CPU -> GPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the device/GPU buffer does not exist it is first allocated + /// @warning Assumes that the host/CPU buffer already exists + void deviceUpload(void* stream = nullptr, bool sync = true) const; + + /// @brief Upload this buffer from the device to the host, i.e. GPU -> CPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the host/CPU buffer does not exist it is first allocated + /// @warning Assumes that the device/GPU buffer already exists + void deviceDownload(void* stream = nullptr, bool sync = true) const; + + /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. + uint64_t size() const { return mSize; } + + //@{ + /// @brief Returns true if this allocator is empty, i.e. has no allocated memory + bool empty() const { return mSize == 0; } + bool isEmpty() const { return mSize == 0; } + //@} + + /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL + void clear(void* stream = nullptr); + +}; // DeviceBuffer class + +// --------------------------> Implementations below <------------------------------------ + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, const DeviceBuffer*, bool host, void* stream) +{ + return DeviceBuffer(size, host, stream); +} + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, void* cpuData, void* gpuData) +{ + return DeviceBuffer(size, cpuData, gpuData); +} + +inline void DeviceBuffer::init(uint64_t size, bool host, void* stream) +{ + if (mSize>0) this->clear(stream); + NANOVDB_ASSERT(size > 0); + if (host) { + cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + checkPtr(mCpuData, "cuda::DeviceBuffer::init: failed to allocate host buffer"); + } else { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, size, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + checkPtr(mGpuData, "cuda::DeviceBuffer::init: failed to allocate device buffer"); + } + mSize = size; + mManaged = true; +} // DeviceBuffer::init + +inline void DeviceBuffer::deviceUpload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceUpload called on externally managed memory. Replace deviceUpload call with the appropriate external copy operation."); + + checkPtr(mCpuData, "uninitialized cpu data"); + if (mGpuData == nullptr) { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, mSize, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + } + checkPtr(mGpuData, "uninitialized gpu data"); + cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuUpload + +inline void DeviceBuffer::deviceDownload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceDownload called on externally managed memory. Replace deviceDownload call with the appropriate external copy operation."); + + checkPtr(mGpuData, "uninitialized gpu data"); + if (mCpuData == nullptr) { + cudaCheck(cudaMallocHost((void**)&mCpuData, mSize)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + } + checkPtr(mCpuData, "uninitialized cpu data"); + cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuDownload + +inline void DeviceBuffer::clear(void *stream) +{ + if (mManaged && mGpuData) cudaCheck(util::cuda::freeAsync(mGpuData, reinterpret_cast(stream))); + if (mManaged && mCpuData) cudaCheck(cudaFreeHost(mCpuData)); + mCpuData = mGpuData = nullptr; + mSize = 0; + mManaged = false; +} // DeviceBuffer::clear + +}// namespace cuda + +using CudaDeviceBuffer [[deprecated("Use nanovdb::cuda::DeviceBuffer instead")]] = cuda::DeviceBuffer; + +template<> +struct BufferTraits +{ + static constexpr bool hasDeviceDual = true; +}; + +}// namespace nanovdb + +#endif // end of NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/cuda/GridHandle.cuh b/external/nanovdb/cuda/GridHandle.cuh new file mode 100644 index 00000000..a0fc96cb --- /dev/null +++ b/external/nanovdb/cuda/GridHandle.cuh @@ -0,0 +1,145 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/cuda/GridHandle.cuh + + \author Ken Museth, Doyub Kim + + \date August 3, 2023 + + \brief Contains cuda kernels for GridHandle + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED + +#include // required for instantiation of move c-tor of GridHandle +#include // for cuda::updateChecksum +#include + +namespace nanovdb { + +namespace cuda { + +namespace {// anonymous namespace +__global__ void cpyGridHandleMeta(const GridData *d_data, GridHandleMetaData *d_meta) +{ + nanovdb::cpyGridHandleMeta(d_data, d_meta); +} + +__global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty) +{ + NANOVDB_ASSERT(gridIndex < gridCount); + if (*d_dirty = d_data->mGridIndex != gridIndex || d_data->mGridCount != gridCount) { + d_data->mGridIndex = gridIndex; + d_data->mGridCount = gridCount; + if (d_data->mChecksum.isEmpty()) *d_dirty = false;// no need to update checksum if it didn't already exist + } +} +}// anonymous namespace + +template class VectorT = std::vector> +inline typename util::enable_if::hasDeviceDual, VectorT>>::type +splitGridHandles(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ + const void *ptr = handle.deviceData(); + if (ptr == nullptr) return VectorT>(); + VectorT> handles(handle.gridCount()); + bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed + cudaCheck(util::cuda::mallocAsync((void**)&d_dirty, sizeof(bool), stream)); + for (uint32_t n=0; n(buffer.deviceData()); + const GridData *src = reinterpret_cast(ptr); + cudaCheck(cudaMemcpyAsync(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice, stream)); + updateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) tools::cuda::updateChecksum(dst, CheckMode::Partial, stream); + handles[n] = nanovdb::GridHandle(std::move(buffer)); + ptr = util::PtrAdd(ptr, handle.gridSize(n)); + } + cudaCheck(util::cuda::freeAsync(d_dirty, stream)); + return std::move(handles); +}// cuda::splitGridHandles + +template class VectorT> +inline typename util::enable_if::hasDeviceDual, GridHandle>::type +mergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; n(dst); + updateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) tools::cuda::updateChecksum(data, CheckMode::Partial, stream); + dst = util::PtrAdd(dst, h.gridSize(n)); + src = util::PtrAdd(src, h.gridSize(n)); + } + } + cudaCheck(util::cuda::freeAsync(d_dirty, stream)); + return GridHandle(std::move(buffer)); +}// cuda::mergeGridHandles + +}// namespace cuda + +template class VectorT = std::vector> +[[deprecated("Use nanovdb::cuda::splitGridHandles instead")]] +inline typename util::enable_if::hasDeviceDual, VectorT>>::type +splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cuda::splitGridHandles(handle, other, stream); } + +template class VectorT> +[[deprecated("Use nanovdb::cuda::mergeGridHandles instead")]] +inline typename util::enable_if::hasDeviceDual, GridHandle>::type +mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cuda::mergeGridHandles(handles, other, stream); } + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(util::is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyGridHandleMeta(data, mMetaData.data()); + } else { + if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { + GridData tmp; + cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); + if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); + GridHandleMetaData *d_metaData; + cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); + cuda::cpyGridHandleMeta<<<1,1>>>(d_data, d_metaData); + mMetaData.resize(tmp.mGridCount); + cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_metaData)); + } + } +}// GridHandle(T&& buffer) + +// Dummy function that ensures instantiation of the move-constructor above when BufferT=cuda::DeviceBuffer +namespace {auto __dummy(){return GridHandle(std::move(cuda::DeviceBuffer()));}} + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/cuda/NodeManager.cuh b/external/nanovdb/cuda/NodeManager.cuh new file mode 100644 index 00000000..639155ce --- /dev/null +++ b/external/nanovdb/cuda/NodeManager.cuh @@ -0,0 +1,104 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/cuda/NodeManager.cuh + + \author Ken Museth + + \date October 3, 2023 + + \brief Contains cuda kernels for NodeManager + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED + +#include // for cuda::lambdaKernel +#include +#include + +namespace nanovdb { + +namespace cuda { + +/// @brief Construct a NodeManager from a device grid pointer +/// +/// @param d_grid device grid pointer whose nodes will be accessed sequentially +/// @param buffer buffer from which to allocate the output handle +/// @param stream cuda stream +/// @return Handle that contains a device NodeManager +template +inline typename util::enable_if::hasDeviceDual, NodeManagerHandle>::type +createNodeManager(const NanoGrid *d_grid, + const BufferT& pool = BufferT(), + cudaStream_t stream = 0) +{ + auto buffer = BufferT::create(sizeof(NodeManagerData), &pool, false, stream); + auto *d_data = (NodeManagerData*)buffer.deviceData(); + size_t size = 0u, *d_size; + cudaCheck(util::cuda::mallocAsync((void**)&d_size, sizeof(size_t), stream)); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *d_data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)d_grid, {0u,0u,0u}}; +#else + *d_data = NodeManagerData{NANOVDB_MAGIC_NUMB, 0u, (void*)d_grid, {0u,0u,0u}}; +#endif + *d_size = sizeof(NodeManagerData); + auto &tree = d_grid->tree(); + if (NodeManager::FIXED_SIZE && d_grid->isBreadthFirst()) { + d_data->mLinear = uint8_t(1u); + d_data->mOff[0] = util::PtrDiff(tree.template getFirstNode<0>(), d_grid); + d_data->mOff[1] = util::PtrDiff(tree.template getFirstNode<1>(), d_grid); + d_data->mOff[2] = util::PtrDiff(tree.template getFirstNode<2>(), d_grid); + } else { + *d_size += sizeof(uint64_t)*tree.totalNodeCount(); + } + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&size, d_size, sizeof(size_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(util::cuda::freeAsync(d_size, stream)); + if (size > sizeof(NodeManagerData)) { + auto tmp = BufferT::create(size, &pool, false, stream);// only allocate buffer on the device + cudaCheck(cudaMemcpyAsync(tmp.deviceData(), buffer.deviceData(), sizeof(NodeManagerData), cudaMemcpyDeviceToDevice, stream)); + buffer = std::move(tmp); + d_data = reinterpret_cast(buffer.deviceData()); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__ (size_t) { + auto &tree = d_grid->tree(); + int64_t *ptr0 = d_data->mPtr[0] = reinterpret_cast(d_data + 1); + int64_t *ptr1 = d_data->mPtr[1] = d_data->mPtr[0] + tree.nodeCount(0); + int64_t *ptr2 = d_data->mPtr[2] = d_data->mPtr[1] + tree.nodeCount(1); + // Performs depth first traversal but breadth first insertion + for (auto it2 = tree.root().cbeginChild(); it2; ++it2) { + *ptr2++ = util::PtrDiff(&*it2, d_grid); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + *ptr1++ = util::PtrDiff(&*it1, d_grid); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + *ptr0++ = util::PtrDiff(&*it0, d_grid); + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + }); + } + + return NodeManagerHandle(toGridType(), std::move(buffer)); +}// cuda::createNodeManager + +}// namespace cuda + +template +[[deprecated("Use cuda::createNodeManager instead")]] +inline typename util::enable_if::hasDeviceDual, NodeManagerHandle>::type +cudaCreateNodeManager(const NanoGrid *d_grid, + const BufferT& pool = BufferT(), + cudaStream_t stream = 0) +{ + return cuda::createNodeManager(d_grid, pool, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/io/IO.h b/external/nanovdb/io/IO.h new file mode 100644 index 00000000..a7110846 --- /dev/null +++ b/external/nanovdb/io/IO.h @@ -0,0 +1,767 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file IO.h + + \author Ken Museth + + \date May 1, 2020 + + \brief Implements I/O for NanoVDB grids. Features optional BLOSC and ZIP + file compression, support for multiple grids per file as well as + multiple grid types. + + \note This file does NOT depend on OpenVDB, but optionally on ZIP and BLOSC + + \details NanoVDB files take on of two formats: + 1) multiple segments each with multiple grids (segments have easy to access metadata about its grids) + 2) starting with verion 32.6.0 nanovdb files also support a raw buffer with one or more grids (just a + dump of a raw grid buffer, so no new metadata in headers as when using segments mentioned above). + + // 1: Segment: FileHeader, MetaData0, gridName0...MetaDataN, gridNameN, compressed Grid0, ... compressed GridN + // 2: Raw: Grid0, ... GridN +*/ + +#ifndef NANOVDB_IO_H_HAS_BEEN_INCLUDED +#define NANOVDB_IO_H_HAS_BEEN_INCLUDED + +#include +#include +#include // for updateGridCount + +#include // for std::ifstream +#include // for std::cerr/cout +#include // for std::string +#include // for std::stringstream +#include // for std::strcmp +#include // for std::unique_ptr +#include // for std::vector +#ifdef NANOVDB_USE_ZIP +#include // for ZIP compression +#endif +#ifdef NANOVDB_USE_BLOSC +#include // for BLOSC compression +#endif + +// Due to a bug in older versions of gcc, including fstream might +// define "major" and "minor" which are used as member data below. +// See https://bugzilla.redhat.com/show_bug.cgi?id=130601 +#if defined(major) || defined(minor) +#undef major +#undef minor +#endif + +namespace nanovdb {// ========================================================== + +namespace io {// =============================================================== + +// --------------------------> writeGrid(s) <------------------------------------ + +/// @brief Write a single grid to file (over-writing existing content of the file) +/// +/// @note The single grid is written into a single segment, i.e. header with metadata about its type and size. +template +void writeGrid(const std::string& fileName, const GridHandle& handle, io::Codec codec = io::Codec::NONE, int verbose = 0); + +/// @brief Write multiple grids to file (over-writing existing content of the file) +/// +/// @note The multiple grids are written into the same segment, i.e. header with metadata about all grids +template class VecT = std::vector> +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec = Codec::NONE, int verbose = 0); + +// --------------------------> readGrid(s) <------------------------------------ + +/// @brief Read and return one or all grids from a file into a single GridHandle +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param n zero-based signed index of the grid to be read. +/// The default value of 0 means read only first grid. +/// A negative value of n means read all grids in the file. +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle with one or all grids found in the file +/// @throw will throw a std::runtime_error if the file does not contain a grid with index n +template +GridHandle readGrid(const std::string& fileName, int n = 0, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read and return the first grid with a specific name from a file +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param gridName string name of the grid to be read +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle containing the grid with the specific name +/// @throw will throw a std::runtime_error if the file does not contain a grid with the specific name +template +GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read all the grids in the file and return them as a vector of multiple GridHandles, each containing +/// all grids encoded in the same segment of the file (i.e. they where written together). This method also +/// works if the file contains a raw grid buffer in which case a single GridHandle is returned. +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return Return a vector of GridHandles each containing all grids encoded +/// in the same segment of the file (i.e. they where written together). +template class VecT = std::vector> +VecT> readGrids(const std::string& fileName, int verbose = 0, const BufferT& buffer = BufferT()); + +// ----------------------------------------------------------------------- + +/// We fix a specific size for counting bytes in files so that they +/// are saved the same regardless of machine precision. (Note there are +/// still little/bigendian issues, however) +using fileSize_t = uint64_t; + +/// @brief Internal functions for compressed read/write of a NanoVDB GridHandle into a stream +/// +/// @warning These functions should never be called directly by client code +namespace Internal { +static constexpr fileSize_t MAX_SIZE = 1UL << 30; // size is 1 GB + +template +static fileSize_t write(std::ostream& os, const GridHandle& handle, Codec codec, uint32_t n); + +template +static void read(std::istream& is, BufferT& buffer, Codec codec); + +static void read(std::istream& is, char* data, fileSize_t size, Codec codec); +} // namespace Internal + +/// @brief Standard hash function to use on strings; std::hash may vary by +/// platform/implementation and is know to produce frequent collisions. +uint64_t stringHash(const char* cstr); + +/// @brief Return a uint64_t hash key of a std::string +inline uint64_t stringHash(const std::string& str){return stringHash(str.c_str());} + +/// @brief Return a uint64_t with its bytes reversed so we can check for endianness +inline uint64_t reverseEndianness(uint64_t val) +{ + return (((val) >> 56) & 0x00000000000000FF) | (((val) >> 40) & 0x000000000000FF00) | + (((val) >> 24) & 0x0000000000FF0000) | (((val) >> 8) & 0x00000000FF000000) | + (((val) << 8) & 0x000000FF00000000) | (((val) << 24) & 0x0000FF0000000000) | + (((val) << 40) & 0x00FF000000000000) | (((val) << 56) & 0xFF00000000000000); +} + +/// @brief This class defines the meta data stored for each grid in a segment +/// +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. +/// +/// @note This class should not be confused with nanovdb::GridMetaData defined in NanoVDB.h +/// Also, io::FileMetaData is defined in NanoVDB.h. +struct FileGridMetaData : public FileMetaData +{ + static_assert(sizeof(FileMetaData) == 176, "Unexpected sizeof(FileMetaData)"); + std::string gridName; + void read(std::istream& is); + void write(std::ostream& os) const; + FileGridMetaData() {} + FileGridMetaData(uint64_t size, Codec c, const GridData &gridData); + uint64_t memUsage() const { return sizeof(FileMetaData) + nameSize; } +}; // FileGridMetaData + +/// @brief This class defines all the data stored in segment of a file +/// +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. +struct Segment +{ + // Check assumptions made during read and write of FileHeader and FileMetaData + static_assert(sizeof(FileHeader) == 16u, "Unexpected sizeof(FileHeader)"); + FileHeader header;// defined in NanoVDB.h + std::vector meta;// defined in NanoVDB.h + Segment(Codec c = Codec::NONE) +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + : header{NANOVDB_MAGIC_FILE, Version(), 0u, c} +#else + : header{NANOVDB_MAGIC_NUMB, Version(), 0u, c} +#endif + , meta() + { + } + template + void add(const GridHandle& h); + bool read(std::istream& is); + void write(std::ostream& os) const; + uint64_t memUsage() const; +}; // Segment + +/// @brief Return true if the file contains a grid with the specified name +bool hasGrid(const std::string& fileName, const std::string& gridName); + +/// @brief Return true if the stream contains a grid with the specified name +bool hasGrid(std::istream& is, const std::string& gridName); + +/// @brief Reads and returns a vector of meta data for all the grids found in the specified file +std::vector readGridMetaData(const std::string& fileName); + +/// @brief Reads and returns a vector of meta data for all the grids found in the specified stream +std::vector readGridMetaData(std::istream& is); + +// --------------------------> Implementations for Internal <------------------------------------ + +template +fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, Codec codec, unsigned int n) +{ + const char* data = reinterpret_cast(handle.gridData(n)); + fileSize_t total = 0, residual = handle.gridSize(n); + + switch (codec) { + case Codec::ZIP: { +#ifdef NANOVDB_USE_ZIP + uLongf size = compressBound(static_cast(residual)); // Get an upper bound on the size of the compressed data. + std::unique_ptr tmp(new Bytef[size]); + const int status = compress(tmp.get(), &size, reinterpret_cast(data), static_cast(residual)); + if (status != Z_OK) std::runtime_error("Internal write error in ZIP"); + if (size > residual) std::cerr << "\nWarning: Unexpected ZIP compression from " << residual << " to " << size << " bytes\n"; + const fileSize_t outBytes = size; + os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); + os.write(reinterpret_cast(tmp.get()), outBytes); + total += sizeof(fileSize_t) + outBytes; +#else + throw std::runtime_error("ZIP compression codec was disabled during build"); +#endif + break; + } + case Codec::BLOSC: { +#ifdef NANOVDB_USE_BLOSC + do { + fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE, size = chunk + BLOSC_MAX_OVERHEAD; + std::unique_ptr tmp(new char[size]); + const int count = blosc_compress_ctx(9, 1, sizeof(float), chunk, data, tmp.get(), size, BLOSC_LZ4_COMPNAME, 1 << 18, 1); + if (count <= 0) std::runtime_error("Internal write error in BLOSC"); + const fileSize_t outBytes = count; + os.write(reinterpret_cast(&outBytes), sizeof(fileSize_t)); + os.write(reinterpret_cast(tmp.get()), outBytes); + total += sizeof(fileSize_t) + outBytes; + data += chunk; + residual -= chunk; + } while (residual > 0); +#else + throw std::runtime_error("BLOSC compression codec was disabled during build"); +#endif + break; + } + default: + os.write(data, residual); + total += residual; + } + if (!os) throw std::runtime_error("Failed to write Tree to file"); + return total; +} // Internal::write + +template +void Internal::read(std::istream& is, BufferT& buffer, Codec codec) +{ + Internal::read(is, reinterpret_cast(buffer.data()), buffer.size(), codec); +} // Internal::read + +/// @brief read compressed grid from stream +/// @param is input stream to read from +/// @param data data buffer to write into. Must be of size @c residual or larger. +/// @param residual expected byte size of uncompressed data. +/// @param codec mode of compression +void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec codec) +{ + // read tree using optional compression + switch (codec) { + case Codec::ZIP: { +#ifdef NANOVDB_USE_ZIP + fileSize_t size; + is.read(reinterpret_cast(&size), sizeof(fileSize_t)); + std::unique_ptr tmp(new Bytef[size]);// temp buffer for compressed data + is.read(reinterpret_cast(tmp.get()), size); + uLongf numBytes = static_cast(residual); + int status = uncompress(reinterpret_cast(data), &numBytes, tmp.get(), static_cast(size)); + if (status != Z_OK) std::runtime_error("Internal read error in ZIP"); + if (fileSize_t(numBytes) != residual) throw std::runtime_error("UNZIP failed on byte size"); +#else + throw std::runtime_error("ZIP compression codec was disabled during build"); +#endif + break; + } + case Codec::BLOSC: { +#ifdef NANOVDB_USE_BLOSC + do { + fileSize_t size; + is.read(reinterpret_cast(&size), sizeof(fileSize_t)); + std::unique_ptr tmp(new char[size]);// temp buffer for compressed data + is.read(reinterpret_cast(tmp.get()), size); + const fileSize_t chunk = residual < MAX_SIZE ? residual : MAX_SIZE; + const int count = blosc_decompress_ctx(tmp.get(), data, size_t(chunk), 1); //fails with more threads :( + if (count < 1) std::runtime_error("Internal read error in BLOSC"); + if (count != int(chunk)) throw std::runtime_error("BLOSC failed on byte size"); + data += size_t(chunk); + residual -= chunk; + } while (residual > 0); +#else + throw std::runtime_error("BLOSC compression codec was disabled during build"); +#endif + break; + } + default: + is.read(data, residual);// read uncompressed data + } + if (!is) throw std::runtime_error("Failed to read Tree from file"); +} // Internal::read + +// --------------------------> Implementations for FileGridMetaData <------------------------------------ + +inline FileGridMetaData::FileGridMetaData(uint64_t size, Codec c, const GridData &gridData) + : FileMetaData{size, // gridSize + size, // fileSize (will typically be redefined) + 0u, // nameKey + 0u, // voxelCount + gridData.mGridType, // gridType + gridData.mGridClass, // gridClass + gridData.mWorldBBox, // worldBBox + gridData.indexBBox(), // indexBBox + gridData.mVoxelSize, // voxelSize + 0, // nameSize + {0, 0, 0, 1}, // nodeCount[4] + {0, 0, 0}, // tileCount[3] + c, // codec + 0, // padding + Version()}// version + , gridName(gridData.gridName()) +{ + auto &treeData = *reinterpret_cast(gridData.treePtr()); + nameKey = stringHash(gridName); + voxelCount = treeData.mVoxelCount; + nameSize = static_cast(gridName.size() + 1); // include '\0' + for (int i = 0; i < 3; ++i) { + FileMetaData::nodeCount[i] = treeData.mNodeCount[i]; + FileMetaData::tileCount[i] = treeData.mTileCount[i]; + } +}// FileGridMetaData::FileGridMetaData + +inline void FileGridMetaData::write(std::ostream& os) const +{ + os.write(reinterpret_cast(this), sizeof(FileMetaData)); + os.write(gridName.c_str(), nameSize); + if (!os) throw std::runtime_error("Failed writing FileGridMetaData"); +}// FileGridMetaData::write + +inline void FileGridMetaData::read(std::istream& is) +{ + is.read(reinterpret_cast(this), sizeof(FileMetaData)); + std::unique_ptr tmp(new char[nameSize]); + is.read(reinterpret_cast(tmp.get()), nameSize); + gridName.assign(tmp.get()); + if (!is) throw std::runtime_error("Failed reading FileGridMetaData"); +}// FileGridMetaData::read + +// --------------------------> Implementations for Segment <------------------------------------ + +inline uint64_t Segment::memUsage() const +{ + uint64_t sum = sizeof(FileHeader); + for (auto& m : meta) sum += m.memUsage();// includes FileMetaData + grid name + return sum; +}// Segment::memUsage + +template +inline void Segment::add(const GridHandle& h) +{ + for (uint32_t i = 0; i < h.gridCount(); ++i) { + const GridData *gridData = h.gridData(i); + if (!gridData) throw std::runtime_error("Segment::add: GridHandle does not contain grid #" + std::to_string(i)); + meta.emplace_back(h.gridSize(i), header.codec, *gridData); + } + header.gridCount += h.gridCount(); +}// Segment::add + +inline void Segment::write(std::ostream& os) const +{ + if (header.gridCount == 0) { + throw std::runtime_error("Segment contains no grids"); + } else if (!os.write(reinterpret_cast(&header), sizeof(FileHeader))) { + throw std::runtime_error("Failed to write FileHeader of Segment"); + } + for (auto& m : meta) m.write(os); +}// Segment::write + +inline bool Segment::read(std::istream& is) +{ + is.read(reinterpret_cast(&header), sizeof(FileHeader)); + if (is.eof()) {// The EOF flag is only set once a read tries to read past the end of the file + is.clear(std::ios_base::eofbit);// clear eof flag so we can rewind and read again + return false; + } + const MagicType magic = toMagic(header.magic); + if (magic != MagicType::NanoVDB && magic != MagicType::NanoFile) { + // first check for byte-swapped header magic. + if (header.magic == reverseEndianness(NANOVDB_MAGIC_NUMB) || + header.magic == reverseEndianness(NANOVDB_MAGIC_FILE)) { + throw std::runtime_error("This nvdb file has reversed endianness"); + } else { + if (magic == MagicType::OpenVDB) { + throw std::runtime_error("Expected a NanoVDB file, but read an OpenVDB file!"); + } else if (magic == MagicType::NanoGrid) { + throw std::runtime_error("Expected a NanoVDB file, but read a raw NanoVDB grid!"); + } else { + throw std::runtime_error("Expected a NanoVDB file, but read a file of unknown type!"); + } + } + } else if ( !header.version.isCompatible()) { + std::stringstream ss; + Version v; + is.read(reinterpret_cast(&v), sizeof(Version));// read GridData::mVersion located at byte 16=sizeof(FileHeader) is stream + if ( v.getMajor() == NANOVDB_MAJOR_VERSION_NUMBER) { + ss << "This file looks like it contains a raw grid buffer and not a standard file with meta data"; + } else if ( header.version.getMajor() < NANOVDB_MAJOR_VERSION_NUMBER) { + char str[30]; + ss << "The file contains an older version of NanoVDB: " << std::string(toStr(str, header.version)) << "!\n\t" + << "Recommendation: Re-generate this NanoVDB file with this version: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X of NanoVDB"; + } else { + ss << "This tool was compiled against an older version of NanoVDB: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X!\n\t" + << "Recommendation: Re-compile this tool against the newer version: " << header.version.getMajor() << ".X of NanoVDB"; + } + throw std::runtime_error("An unrecoverable error in nanovdb::Segment::read:\n\tIncompatible file format: " + ss.str()); + } + meta.resize(header.gridCount); + for (auto& m : meta) { + m.read(is); + m.version = header.version; + } + return true; +}// Segment::read + +// --------------------------> writeGrid <------------------------------------ + +template +void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec) +{ + Segment seg(codec); + seg.add(handle); + const auto start = os.tellp(); + seg.write(os); // write header without the correct fileSize (so it's allocated) + for (uint32_t i = 0; i < handle.gridCount(); ++i) { + seg.meta[i].fileSize = Internal::write(os, handle, codec, i); + } + os.seekp(start); + seg.write(os);// re-write header with the correct fileSize + os.seekp(0, std::ios_base::end);// skip to end +}// writeGrid + +template +void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec, int verbose) +{ + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) { + throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + } + writeGrid(os, handle, codec); + if (verbose) { + std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; + } +}// writeGrid + +// --------------------------> writeGrids <------------------------------------ + +template class VecT = std::vector> +void writeGrids(std::ostream& os, const VecT>& handles, Codec codec = Codec::NONE) +{ + for (auto& h : handles) writeGrid(os, h, codec); +}// writeGrids + +template class VecT> +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) +{ + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + writeGrids(os, handles, codec); + if (verbose) std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; +}// writeGrids + +// --------------------------> readGrid <------------------------------------ + +template +GridHandle readGrid(std::istream& is, int n, const BufferT& pool) +{ + GridHandle handle; + if (n<0) {// read all grids into the same buffer + try {//first try to read a raw grid buffer + handle.read(is, pool); + } catch(const std::logic_error&) { + Segment seg; + uint64_t bufferSize = 0u; + uint32_t gridCount = 0u, gridIndex = 0u; + const auto start = is.tellg(); + while (seg.read(is)) { + std::streamoff skipSize = 0; + for (auto& m : seg.meta) { + ++gridCount; + bufferSize += m.gridSize; + skipSize += m.fileSize; + }// loop over grids in segment + is.seekg(skipSize, std::ios_base::cur); // skip forward from the current position + }// loop over segments + auto buffer = BufferT::create(bufferSize, &pool); + char *ptr = (char*)buffer.data(); + is.seekg(start);// rewind + while (seg.read(is)) { + for (auto& m : seg.meta) { + Internal::read(is, ptr, m.gridSize, seg.header.codec); + tools::updateGridCount((GridData*)ptr, gridIndex++, gridCount); + ptr += m.gridSize; + }// loop over grids in segment + }// loop over segments + return GridHandle(std::move(buffer)); + } + } else {// read a specific grid + try {//first try to read a raw grid buffer + handle.read(is, uint32_t(n), pool); + tools::updateGridCount((GridData*)handle.data(), 0u, 1u); + } catch(const std::logic_error&) { + Segment seg; + int counter = -1; + while (seg.read(is)) { + std::streamoff seek = 0; + for (auto& m : seg.meta) { + if (++counter == n) { + auto buffer = BufferT::create(m.gridSize, &pool); + Internal::read(is, buffer, seg.header.codec); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } + }// loop over grids in segment + is.seekg(seek, std::ios_base::cur); // skip forward from the current position + }// loop over segments + if (n != counter) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); + } + } + return handle; +}// readGrid + +/// @brief Read the n'th grid +template +GridHandle readGrid(const std::string& fileName, int n, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handle = readGrid(is, n, buffer); + if (verbose) { + if (n<0) { + std::cout << "Read all NanoGrids from the file named \"" << fileName << "\"" << std::endl; + } else { + std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; + } + } + return handle; // is converted to r-value and return value is move constructed. +}// readGrid + +/// @brief Read a specific grid from an input stream given the name of the grid +/// @tparam BufferT Buffer type used for allocation +/// @param is input stream from which to read the grid +/// @param gridName string name of the (first) grid to be returned +/// @param pool optional memory pool from which to allocate the grid buffer +/// @return Return the first grid in the input stream with a specific name +/// @throw std::runtime_error with no grid exists with the specified name +template +GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) +{ + try { + GridHandle handle; + handle.read(is, gridName, pool); + return handle; + } catch(const std::logic_error&) { + const auto key = stringHash(gridName); + Segment seg; + while (seg.read(is)) {// loop over all segments in stream + std::streamoff seek = 0; + for (auto& m : seg.meta) {// loop over all grids in segment + if ((m.nameKey == 0u || m.nameKey == key) && m.gridName == gridName) { // check for hash key collision + auto buffer = BufferT::create(m.gridSize, &pool); + is.seekg(seek, std::ios_base::cur); // rewind + Internal::read(is, buffer, seg.header.codec); + tools::updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } + } + is.seekg(seek, std::ios_base::cur); // skip forward from the current position + } + } + throw std::runtime_error("Grid name '" + gridName + "' not found in file"); +}// readGrid + +/// @brief Read the first grid with a specific name +template +GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handle = readGrid(is, gridName, buffer); + if (verbose) { + if (handle) { + std::cout << "Read NanoGrid named \"" << gridName << "\" from the file named \"" << fileName << "\"" << std::endl; + } else { + std::cout << "File named \"" << fileName << "\" does not contain a grid named \"" + gridName + "\"" << std::endl; + } + } + return handle; // is converted to r-value and return value is move constructed. +}// readGrid + +// --------------------------> readGrids <------------------------------------ + +template class VecT = std::vector> +VecT> readGrids(std::istream& is, const BufferT& pool = BufferT()) +{ + VecT> handles; + try {//first try to read a raw grid buffer + GridHandle handle; + handle.read(is, pool);// will throw if stream does not contain a raw grid buffer + handles.push_back(std::move(handle)); // force move copy assignment + } catch(const std::logic_error&) { + Segment seg; + while (seg.read(is)) { + uint64_t bufferSize = 0; + for (auto& m : seg.meta) bufferSize += m.gridSize; + auto buffer = BufferT::create(bufferSize, &pool); + uint64_t bufferOffset = 0; + for (uint16_t i = 0; i < seg.header.gridCount; ++i) { + auto *data = util::PtrAdd(buffer.data(), bufferOffset); + Internal::read(is, (char*)data, seg.meta[i].gridSize, seg.header.codec); + tools::updateGridCount(data, uint32_t(i), uint32_t(seg.header.gridCount)); + bufferOffset += seg.meta[i].gridSize; + }// loop over grids in segment + handles.emplace_back(std::move(buffer)); // force move copy assignment + }// loop over segments + } + return handles; // is converted to r-value and return value is move constructed. +}// readGrids + +/// @brief Read all the grids +template class VecT> +VecT> readGrids(const std::string& fileName, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handles = readGrids(is, buffer); + if (verbose) std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; + return handles; // is converted to r-value and return value is move constructed. +}// readGrids + +// --------------------------> readGridMetaData <------------------------------------ + +inline std::vector readGridMetaData(const std::string& fileName) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + return readGridMetaData(is); // is converted to r-value and return value is move constructed. +}// readGridMetaData + +inline std::vector readGridMetaData(std::istream& is) +{ + Segment seg; + std::vector meta; + try { + GridHandle<> handle;// if stream contains a raw grid buffer we unfortunately have to load everything + handle.read(is); + seg.add(handle); + meta = std::move(seg.meta); + } catch(const std::logic_error&) { + while (seg.read(is)) { + std::streamoff skip = 0; + for (auto& m : seg.meta) { + meta.push_back(m); + skip += m.fileSize; + }// loop over grid meta data in segment + is.seekg(skip, std::ios_base::cur); + }// loop over segments + } + return meta; // is converted to r-value and return value is move constructed. +}// readGridMetaData + +// --------------------------> hasGrid <------------------------------------ + +inline bool hasGrid(const std::string& fileName, const std::string& gridName) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + return hasGrid(is, gridName); +}// hasGrid + +inline bool hasGrid(std::istream& is, const std::string& gridName) +{ + const auto key = stringHash(gridName); + Segment seg; + while (seg.read(is)) { + std::streamoff seek = 0; + for (auto& m : seg.meta) { + if (m.nameKey == key && m.gridName == gridName) return true; // check for hash key collision + seek += m.fileSize; + }// loop over grid meta data in segment + is.seekg(seek, std::ios_base::cur); + }// loop over segments + return false; +}// hasGrid + +// --------------------------> stringHash <------------------------------------ + +inline uint64_t stringHash(const char* c_str) +{ + uint64_t hash = 0;// zero is returned when cstr = nullptr or "\0" + if (c_str) { + for (auto* str = reinterpret_cast(c_str); *str; ++str) { + uint64_t overflow = hash >> (64 - 8); + hash *= 67; // Next-ish prime after 26 + 26 + 10 + hash += *str + overflow; + } + } + return hash; +}// stringHash + +} // namespace io ====================================================================== + +template +inline std::ostream& +operator<<(std::ostream& os, const math::BBox>& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; +} + +inline std::ostream& +operator<<(std::ostream& os, const CoordBBox& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; +} + +inline std::ostream& +operator<<(std::ostream& os, const Coord& ijk) +{ + os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; + return os; +} + +template +inline std::ostream& +operator<<(std::ostream& os, const math::Vec3& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; + return os; +} + +template +inline std::ostream& +operator<<(std::ostream& os, const math::Vec4& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << "," << v[3] << ")"; + return os; +} + +} // namespace nanovdb =================================================================== + +#endif // NANOVDB_IO_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/CSampleFromVoxels.h b/external/nanovdb/math/CSampleFromVoxels.h new file mode 100644 index 00000000..c7820a70 --- /dev/null +++ b/external/nanovdb/math/CSampleFromVoxels.h @@ -0,0 +1,327 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +// +// Simple C-wrapper for voxel interpolation functions +// + +#ifndef __CSAMPLEFROMVOXELS__ +#define __CSAMPLEFROMVOXELS__ + +#include "../CNanoVDB.h" + +#ifdef __OPENCL_VERSION__ +#else +#include +#endif + +void +cnanovdb_coord_round(cnanovdb_coord *RESTRICT coord, const cnanovdb_Vec3F *RESTRICT xyz) +{ +#ifdef __OPENCL_VERSION__ + coord->mVec[0] = floor(xyz->mVec[0]+0.5); + coord->mVec[1] = floor(xyz->mVec[1]+0.5); + coord->mVec[2] = floor(xyz->mVec[2]+0.5); +#else + coord->mVec[0] = floorf(xyz->mVec[0]+0.5); + coord->mVec[1] = floorf(xyz->mVec[1]+0.5); + coord->mVec[2] = floorf(xyz->mVec[2]+0.5); +#endif +} + +void +cnanovdb_coord_fract(cnanovdb_coord *RESTRICT coord, cnanovdb_Vec3F *RESTRICT fraction, const cnanovdb_Vec3F *RESTRICT xyz) +{ +#ifdef __OPENCL_VERSION__ + float i0, i1, i2; + fraction->mVec[0] = fract(xyz->mVec[0], &i0); + coord->mVec[0] = i0; + fraction->mVec[1] = fract(xyz->mVec[1], &i1); + coord->mVec[1] = i1; + fraction->mVec[2] = fract(xyz->mVec[2], &i2); + coord->mVec[2] = i2; +#else + float i0, i1, i2; + i0 = floorf(xyz->mVec[0]); + fraction->mVec[0] = xyz->mVec[0] - i0; + coord->mVec[0] = i0; + i1 = floorf(xyz->mVec[1]); + fraction->mVec[1] = xyz->mVec[1] - i1; + coord->mVec[1] = i1; + i2 = floorf(xyz->mVec[2]); + fraction->mVec[2] = xyz->mVec[2] - i2; + coord->mVec[2] = i2; +#endif +} + +#define CREATE_STENCIL(VALUETYPE, SUFFIX) \ +typedef struct \ +{ \ + VALUETYPE mStencil[2][2][2]; \ + cnanovdb_coord mCoord; \ +} cnanovdb_stencil1##SUFFIX; \ + \ +void \ +cnanovdb_stencil1##SUFFIX##_clear(cnanovdb_stencil1##SUFFIX *RESTRICT stencil) \ +{ \ + /* Invalid coords. */ \ + stencil->mCoord.mVec[0] = 0x80000000; \ + stencil->mCoord.mVec[1] = 0x80000000; \ + stencil->mCoord.mVec[2] = 0x80000000; \ +} \ + \ +void \ +cnanovdb_stencil1##SUFFIX##_fill(cnanovdb_stencil1##SUFFIX *RESTRICT stencil, cnanovdb_readaccessor *RESTRICT acc, cnanovdb_coord *RESTRICT coord) \ +{ \ + stencil->mStencil[0][0][0] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[2] += 1; \ + stencil->mStencil[0][0][1] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[1] += 1; \ + stencil->mStencil[0][1][1] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[2] -= 1; \ + stencil->mStencil[0][1][0] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + \ + coord->mVec[0] += 1; \ + stencil->mStencil[1][1][0] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[2] += 1; \ + stencil->mStencil[1][1][1] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[1] -= 1; \ + stencil->mStencil[1][0][1] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[2] -= 1; \ + stencil->mStencil[1][0][0] = cnanovdb_readaccessor_getValue##SUFFIX(acc, coord); \ + coord->mVec[0] -= 1; \ + \ + stencil->mCoord.mVec[0] = coord->mVec[0]; \ + stencil->mCoord.mVec[1] = coord->mVec[1]; \ + stencil->mCoord.mVec[2] = coord->mVec[2]; \ +} \ + \ +void \ +cnanovdb_stencil1##SUFFIX##_update(cnanovdb_stencil1##SUFFIX *RESTRICT stencil, cnanovdb_readaccessor *RESTRICT acc, cnanovdb_coord *RESTRICT coord) \ +{ \ + uint32_t change = (coord->mVec[0] ^ stencil->mCoord.mVec[0]) | \ + (coord->mVec[1] ^ stencil->mCoord.mVec[1]) | \ + (coord->mVec[2] ^ stencil->mCoord.mVec[2]); \ + if (!change) \ + return; \ + \ + cnanovdb_stencil1##SUFFIX##_fill(stencil, acc, coord); \ +} \ +/**/ +CREATE_STENCIL(float, F) +CREATE_STENCIL(cnanovdb_Vec3F, F3) + + +#define CREATE_LERPSIMPLE(VALUETYPE, SUFFIX) \ +VALUETYPE \ +cnanovdb_lerp##SUFFIX(VALUETYPE a, VALUETYPE b, float w) \ +{ \ + return a + w * (b - a); \ +} \ +/**/ + +CREATE_LERPSIMPLE(float, F) +CREATE_LERPSIMPLE(double, D) + +cnanovdb_Vec3F +cnanovdb_lerpF3(cnanovdb_Vec3F a, cnanovdb_Vec3F b, float w) +{ + a.mVec[0] = cnanovdb_lerpF(a.mVec[0], b.mVec[0], w); + a.mVec[1] = cnanovdb_lerpF(a.mVec[1], b.mVec[1], w); + a.mVec[2] = cnanovdb_lerpF(a.mVec[2], b.mVec[2], w); + return a; +} + +#define CREATE_SAMPLE(VALUETYPE, SUFFIX) \ +VALUETYPE \ +cnanovdb_sample##SUFFIX##_nearest(cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) \ +{ \ + cnanovdb_coord coord; \ + cnanovdb_coord_round(&coord, xyz); \ + return cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ +} \ + \ +VALUETYPE \ +cnanovdb_sample##SUFFIX##_trilinear(cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) \ +{ \ + cnanovdb_coord coord; \ + cnanovdb_Vec3F fraction; \ + cnanovdb_coord_fract(&coord, &fraction, xyz); \ + \ + VALUETYPE vx, vx1, vy, vy1, vz, vz1; \ + \ + vz = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + coord.mVec[2] += 1; \ + vz1 = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + vy = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + coord.mVec[1] += 1; \ + \ + vz1 = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + coord.mVec[2] -= 1; \ + vz = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + vy1 = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vx = cnanovdb_lerp##SUFFIX(vy, vy1, fraction.mVec[1]); \ + \ + coord.mVec[0] += 1; \ + \ + vz = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + coord.mVec[2] += 1; \ + vz1 = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + vy1 = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + coord.mVec[1] -= 1; \ + \ + vz1 = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + coord.mVec[2] -= 1; \ + vz = cnanovdb_readaccessor_getValue##SUFFIX(acc, &coord); \ + vy = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vx1 = cnanovdb_lerp##SUFFIX(vy, vy1, fraction.mVec[1]); \ + \ + return cnanovdb_lerp##SUFFIX(vx, vx1, fraction.mVec[0]); \ +} \ + \ +VALUETYPE \ +cnanovdb_sample##SUFFIX##_trilinear_stencil(cnanovdb_stencil1##SUFFIX *RESTRICT stencil, cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) \ +{ \ + cnanovdb_coord coord; \ + cnanovdb_Vec3F fraction; \ + cnanovdb_coord_fract(&coord, &fraction, xyz); \ + \ + cnanovdb_stencil1##SUFFIX##_update(stencil, acc, &coord); \ + \ + VALUETYPE vx, vx1, vy, vy1, vz, vz1; \ + \ + vz = stencil->mStencil[0][0][0]; \ + vz1 = stencil->mStencil[0][0][1]; \ + vy = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vz = stencil->mStencil[0][1][0]; \ + vz1 = stencil->mStencil[0][1][1]; \ + vy1 = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vx = cnanovdb_lerp##SUFFIX(vy, vy1, fraction.mVec[1]); \ + \ + vz = stencil->mStencil[1][1][0]; \ + vz1 = stencil->mStencil[1][1][1]; \ + vy1 = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vz = stencil->mStencil[1][0][0]; \ + vz1 = stencil->mStencil[1][0][1]; \ + vy = cnanovdb_lerp##SUFFIX(vz, vz1, fraction.mVec[2]); \ + \ + vx1 = cnanovdb_lerp##SUFFIX(vy, vy1, fraction.mVec[1]); \ + \ + return cnanovdb_lerp##SUFFIX(vx, vx1, fraction.mVec[0]); \ +} \ +/**/ +CREATE_SAMPLE(float, F) +CREATE_SAMPLE(cnanovdb_Vec3F, F3) + +void +cnanovdb_sampleF_gradient(cnanovdb_Vec3F *RESTRICT ret, cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) +{ + cnanovdb_Vec3F qxyz; + qxyz.mVec[0] = xyz->mVec[0]; + qxyz.mVec[1] = xyz->mVec[1]; + qxyz.mVec[2] = xyz->mVec[2]; + for (int i = 0; i < 3; i++) + { + float sp, sm; + + qxyz.mVec[i] -= 0.5; + sm = cnanovdb_sampleF_trilinear(acc, &qxyz); + qxyz.mVec[i] += 1.0; + sp = cnanovdb_sampleF_trilinear(acc, &qxyz); + qxyz.mVec[i] -= 0.5; + ret->mVec[i] = sp - sm; + } +} + +void +cnanovdb_sampleF_gradient0(cnanovdb_Vec3F *RESTRICT ret, cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) +{ + cnanovdb_coord coord; + cnanovdb_Vec3F fraction; + cnanovdb_coord_fract(&coord, &fraction, xyz); + + float stencil[2][2][2]; + + stencil[0][0][0] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[2] += 1; + stencil[0][0][1] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[1] += 1; + stencil[0][1][1] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[2] -= 1; + stencil[0][1][0] = cnanovdb_readaccessor_getValueF(acc, &coord); + + coord.mVec[0] += 1; + stencil[1][1][0] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[2] += 1; + stencil[1][1][1] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[1] -= 1; + stencil[1][0][1] = cnanovdb_readaccessor_getValueF(acc, &coord); + coord.mVec[2] -= 1; + stencil[1][0][0] = cnanovdb_readaccessor_getValueF(acc, &coord); + + float D[4]; + + D[0] = stencil[0][0][1] - stencil[0][0][0]; + D[1] = stencil[0][1][1] - stencil[0][1][0]; + D[2] = stencil[1][0][1] - stencil[1][0][0]; + D[3] = stencil[1][1][1] - stencil[1][1][0]; + + ret->mVec[2] = cnanovdb_lerpF( + cnanovdb_lerpF(D[0], D[1], fraction.mVec[1]), + cnanovdb_lerpF(D[2], D[3], fraction.mVec[1]), + fraction.mVec[0] ); + + float w = fraction.mVec[2]; + D[0] = stencil[0][0][0] + D[0] * w; + D[1] = stencil[0][1][0] + D[1] * w; + D[2] = stencil[1][0][0] + D[2] * w; + D[3] = stencil[1][1][0] + D[3] * w; + + ret->mVec[0] = cnanovdb_lerpF(D[2], D[3], fraction.mVec[1]) + - cnanovdb_lerpF(D[0], D[1], fraction.mVec[1]); + + ret->mVec[1] = cnanovdb_lerpF(D[1] - D[0], D[3] - D[2], fraction.mVec[0]); +} + +void +cnanovdb_sampleF_gradient0_stencil(cnanovdb_Vec3F *RESTRICT ret, cnanovdb_stencil1F *RESTRICT stencil, cnanovdb_readaccessor *RESTRICT acc, const cnanovdb_Vec3F *RESTRICT xyz) +{ + cnanovdb_coord coord; + cnanovdb_Vec3F fraction; + cnanovdb_coord_fract(&coord, &fraction, xyz); + + cnanovdb_stencil1F_update(stencil, acc, &coord); + + float D[4]; + + D[0] = stencil->mStencil[0][0][1] - stencil->mStencil[0][0][0]; + D[1] = stencil->mStencil[0][1][1] - stencil->mStencil[0][1][0]; + D[2] = stencil->mStencil[1][0][1] - stencil->mStencil[1][0][0]; + D[3] = stencil->mStencil[1][1][1] - stencil->mStencil[1][1][0]; + + ret->mVec[2] = cnanovdb_lerpF( + cnanovdb_lerpF(D[0], D[1], fraction.mVec[1]), + cnanovdb_lerpF(D[2], D[3], fraction.mVec[1]), + fraction.mVec[0] ); + + float w = fraction.mVec[2]; + D[0] = stencil->mStencil[0][0][0] + D[0] * w; + D[1] = stencil->mStencil[0][1][0] + D[1] * w; + D[2] = stencil->mStencil[1][0][0] + D[2] * w; + D[3] = stencil->mStencil[1][1][0] + D[3] * w; + + ret->mVec[0] = cnanovdb_lerpF(D[2], D[3], fraction.mVec[1]) + - cnanovdb_lerpF(D[0], D[1], fraction.mVec[1]); + + ret->mVec[1] = cnanovdb_lerpF(D[1] - D[0], D[3] - D[2], fraction.mVec[0]); +} + + +#endif diff --git a/external/nanovdb/math/DitherLUT.h b/external/nanovdb/math/DitherLUT.h new file mode 100644 index 00000000..7add4a6f --- /dev/null +++ b/external/nanovdb/math/DitherLUT.h @@ -0,0 +1,189 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 +// +/// @author Jeff Lait +/// +/// @date May 13, 2021 +/// +/// @file DitherLUT.h +/// +/// @brief Defines look up table to do dithering of 8^3 leaf nodes. + +#ifndef NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED +#define NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED + +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 + +namespace nanovdb { + +namespace math { + +class DitherLUT +{ + const bool mEnable; +public: + /// @brief Constructor with an optional scaling factor for the dithering + __hostdev__ DitherLUT(bool enable = true) : mEnable(enable) {} + + /// @brief Retrieves dither threshold for an offset within an 8^3 leaf nodes. + /// + /// @param offset into the lookup table of size 512 + __hostdev__ float operator()(const int offset) + { + +// This table was generated with +/************** + +static constexpr inline uint32 +SYSwang_inthash(uint32 key) +{ + // From http://www.concentric.net/~Ttwang/tech/inthash.htm + key += ~(key << 16); + key ^= (key >> 5); + key += (key << 3); + key ^= (key >> 13); + key += ~(key << 9); + key ^= (key >> 17); + return key; +} + +static void +ut_initDitherR(float *pattern, float offset, + int x, int y, int z, int res, int goalres) +{ + // These offsets are designed to maximize the difference between + // dither values in nearby voxels within a given 2x2x2 cell, without + // producing axis-aligned artifacts. The are organized in row-major + // order. + static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; + static const float theScale = 0.125F; + int key = (((z << res) + y) << res) + x; + + if (res == goalres) + { + pattern[key] = offset; + return; + } + + // Randomly flip (on each axis) the dithering patterns used by the + // subcells. This key is xor'd with the subcell index below before + // looking up in the dither offset list. + key = SYSwang_inthash(key) & 7; + + x <<= 1; + y <<= 1; + z <<= 1; + + offset *= theScale; + for (int i = 0; i < 8; i++) + ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, + x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); +} + +// This is a compact algorithm that accomplishes essentially the same thing +// as ut_initDither() above. We should eventually switch to use this and +// clean the dead code. +static fpreal32 * +ut_initDitherRecursive(int goalres) +{ + const int nfloat = 1 << (goalres*3); + float *pattern = new float[nfloat]; + ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); + + // This has built an even spacing from 1/nfloat to 1.0. + // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) + // So we do a correction here. Note that the earlier calculations are + // done with powers of 2 so are exact, so it does make sense to delay + // the renormalization to this pass. + float correctionterm = nfloat / (nfloat+1.0F); + for (int i = 0; i < nfloat; i++) + pattern[i] *= correctionterm; + return pattern; +} + + theDitherMatrix = ut_initDitherRecursive(3); + + for (int i = 0; i < 512/8; i ++) + { + for (int j = 0; j < 8; j ++) + std::cout << theDitherMatrix[i*8+j] << "f, "; + std::cout << std::endl; + } + + **************/ + static const float LUT[512] = + { + 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, + 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, + 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, + 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, + 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, + 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, + 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, + 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, + 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, + 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, + 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, + 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, + 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, + 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, + 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, + 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, + 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, + 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, + 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, + 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, + 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, + 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, + 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, + 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, + 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, + 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, + 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, + 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, + 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, + 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, + 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, + 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, + 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, + 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, + 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, + 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, + 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, + 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, + 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, + 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, + 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, + 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, + 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, + 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, + 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, + 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, + 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, + 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, + 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, + 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, + 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, + 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, + 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, + 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, + 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, + 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, + 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, + 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, + 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, + 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, + 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, + 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, + 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, + 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, + }; + return mEnable ? LUT[offset & 511] : 0.5f;// branch prediction should optimize this! + } +}; // DitherLUT class + +}// namspace math + +}// namespace nanovdb + +#endif // NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/HDDA.h b/external/nanovdb/math/HDDA.h new file mode 100644 index 00000000..c72a58a7 --- /dev/null +++ b/external/nanovdb/math/HDDA.h @@ -0,0 +1,510 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/// @file HDDA.h +/// +/// @author Ken Museth +/// +/// @brief Hierarchical Digital Differential Analyzers specialized for VDB. + +#ifndef NANOVDB_HDDA_H_HAS_BEEN_INCLUDED +#define NANOVDB_HDDA_H_HAS_BEEN_INCLUDED + +// Comment out to disable this explicit round-off check +#define ENFORCE_FORWARD_STEPPING + +#include // only dependency + +namespace nanovdb::math { + +/// @brief A Digital Differential Analyzer specialized for OpenVDB grids +/// @note Conceptually similar to Bresenham's line algorithm applied +/// to a 3D Ray intersecting OpenVDB nodes or voxels. Log2Dim = 0 +/// corresponds to a voxel and Log2Dim a tree node of size 2^Log2Dim. +/// +/// @note The Ray template class is expected to have the following +/// methods: test(time), t0(), t1(), invDir(), and operator()(time). +/// See the example Ray class above for their definition. +template +class HDDA +{ +public: + using RealType = typename RayT::RealType; + using RealT = RealType; + using Vec3Type = typename RayT::Vec3Type; + using Vec3T = Vec3Type; + using CoordType = CoordT; + + /// @brief Default ctor + HDDA() = default; + + /// @brief ctor from ray and dimension at which the DDA marches + __hostdev__ HDDA(const RayT& ray, int dim) { this->init(ray, dim); } + + /// @brief Re-initializes the HDDA + __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime, int dim) + { + assert(startTime <= maxTime); + mDim = dim; + mT0 = startTime; + mT1 = maxTime; + const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (dir[axis] == RealT(0)) { //handles dir = +/- 0 + mNext[axis] = Maximum::value(); //i.e. disabled! + mStep[axis] = 0; + } else if (inv[axis] > 0) { + mStep[axis] = 1; + mNext[axis] = mT0 + (mVoxel[axis] + dim - pos[axis]) * inv[axis]; + mDelta[axis] = inv[axis]; + } else { + mStep[axis] = -1; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + mDelta[axis] = -inv[axis]; + } + } + } + + /// @brief Simular to init above except it uses the bounds of the input ray + __hostdev__ void init(const RayT& ray, int dim) { this->init(ray, ray.t0(), ray.t1(), dim); } + + /// @brief Updates the HDDA to march with the specified dimension + __hostdev__ bool update(const RayT& ray, int dim) + { + if (mDim == dim) + return false; + mDim = dim; + const Vec3T &pos = ray(mT0), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (mStep[axis] == 0) + continue; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + if (mStep[axis] > 0) + mNext[axis] += dim * inv[axis]; + } + + return true; + } + + __hostdev__ int dim() const { return mDim; } + + /// @brief Increment the voxel index to next intersected voxel or node + /// and returns true if the step in time does not exceed maxTime. + __hostdev__ bool step() + { + const int axis = MinIndex(mNext); +#if 1 + switch (axis) { + case 0: + return step<0>(); + case 1: + return step<1>(); + default: + return step<2>(); + } +#else + mT0 = mNext[axis]; + mNext[axis] += mDim * mDelta[axis]; + mVoxel[axis] += mDim * mStep[axis]; + return mT0 <= mT1; +#endif + } + + /// @brief Return the index coordinates of the next node or voxel + /// intersected by the ray. If Log2Dim = 0 the return value is the + /// actual signed coordinate of the voxel, else it is the origin + /// of the corresponding VDB tree node or tile. + /// @note Incurs no computational overhead. + __hostdev__ const CoordT& voxel() const { return mVoxel; } + + /// @brief Return the time (parameterized along the Ray) of the + /// first hit of a tree node of size 2^Log2Dim. + /// @details This value is initialized to startTime or ray.t0() + /// depending on the constructor used. + /// @note Incurs no computational overhead. + __hostdev__ RealType time() const { return mT0; } + + /// @brief Return the maximum time (parameterized along the Ray). + __hostdev__ RealType maxTime() const { return mT1; } + + /// @brief Return the time (parameterized along the Ray) of the + /// second (i.e. next) hit of a tree node of size 2^Log2Dim. + /// @note Incurs a (small) computational overhead. + __hostdev__ RealType next() const + { +#if 1 //def __CUDA_ARCH__ + return fminf(mT1, fminf(mNext[0], fminf(mNext[1], mNext[2]))); +#else + return std::min(mT1, std::min(mNext[0], std::min(mNext[1], mNext[2]))); +#endif + } + +private: + // helper to implement the general form + template + __hostdev__ bool step() + { +#ifdef ENFORCE_FORWARD_STEPPING + //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + fmaxf(mNext[axis]*1.0e-6f, 1.0e-6f); + //if (mNext[axis] <= mT0) mNext[axis] += mT0 - mNext[axis] + (mNext[axis] + 1.0f)*1.0e-6f; + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[ axis] += mDim * mDelta[axis]; + mVoxel[axis] += mDim * mStep[ axis]; + return mT0 <= mT1; + } + + int32_t mDim; + RealT mT0, mT1; // min and max allowed times + CoordT mVoxel, mStep; // current voxel location and step to next voxel location + Vec3T mDelta, mNext; // delta time and next time +}; // class HDDA + +/////////////////////////////////////////// ZeroCrossing //////////////////////////////////////////// + +/// @brief returns true if the ray intersects a zero-crossing at the voxel level of the grid in the accessor +/// The empty-space ray-marching is performed at all levels of the tree using an +/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the closest +/// voxel after the intersection point, v contains the grid values at ijk, and t is set to the time of +/// the intersection along the ray. +template +inline __hostdev__ bool ZeroCrossing(RayT& ray, AccT& acc, Coord& ijk, typename AccT::ValueType& v, float& t) +{ + if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) + return false; // clip ray to bbox + static const float Delta = 1.0001f; + ijk = RoundDown(ray.start()); // first hit of bbox + HDDA hdda(ray, acc.getDim(ijk, ray)); + const auto v0 = acc.getValue(ijk); + while (hdda.step()) { + ijk = RoundDown(ray(hdda.time() + Delta)); + hdda.update(ray, acc.getDim(ijk, ray)); + if (hdda.dim() > 1 || !acc.isActive(ijk)) + continue; // either a tile value or an inactive voxel + while (hdda.step() && acc.isActive(hdda.voxel())) { // in the narrow band + v = acc.getValue(hdda.voxel()); + if (v * v0 < 0) { // zero crossing + ijk = hdda.voxel(); + t = hdda.time(); + return true; + } + } + } + return false; +} + +/////////////////////////////////////////// DDA //////////////////////////////////////////// + +/// @brief A Digital Differential Analyzer. Unlike HDDA (defined above) this DDA +/// uses a fixed step-size defined by the template parameter Dim! +/// +/// @note The Ray template class is expected to have the following +/// methods: test(time), t0(), t1(), invDir(), and operator()(time). +/// See the example Ray class above for their definition. +template +class DDA +{ + static_assert(Dim >= 1, "Dim must be >= 1"); + +public: + using RealType = typename RayT::RealType; + using RealT = RealType; + using Vec3Type = typename RayT::Vec3Type; + using Vec3T = Vec3Type; + using CoordType = CoordT; + + /// @brief Default ctor + DDA() = default; + + /// @brief ctor from ray and dimension at which the DDA marches + __hostdev__ DDA(const RayT& ray) { this->init(ray); } + + /// @brief Re-initializes the DDA + __hostdev__ void init(const RayT& ray, RealT startTime, RealT maxTime) + { + assert(startTime <= maxTime); + mT0 = startTime; + mT1 = maxTime; + const Vec3T &pos = ray(mT0), &dir = ray.dir(), &inv = ray.invDir(); + mVoxel = RoundDown(pos) & (~(Dim - 1)); + for (int axis = 0; axis < 3; ++axis) { + if (dir[axis] == RealT(0)) { //handles dir = +/- 0 + mNext[axis] = Maximum::value(); //i.e. disabled! + mStep[axis] = 0; + } else if (inv[axis] > 0) { + mStep[axis] = Dim; + mNext[axis] = (mT0 + (mVoxel[axis] + Dim - pos[axis]) * inv[axis]); + mDelta[axis] = inv[axis]; + } else { + mStep[axis] = -Dim; + mNext[axis] = mT0 + (mVoxel[axis] - pos[axis]) * inv[axis]; + mDelta[axis] = -inv[axis]; + } + } + } + + /// @brief Simular to init above except it uses the bounds of the input ray + __hostdev__ void init(const RayT& ray) { this->init(ray, ray.t0(), ray.t1()); } + + /// @brief Increment the voxel index to next intersected voxel or node + /// and returns true if the step in time does not exceed maxTime. + __hostdev__ bool step() + { + const int axis = MinIndex(mNext); +#if 1 + switch (axis) { + case 0: + return step<0>(); + case 1: + return step<1>(); + default: + return step<2>(); + } +#else +#ifdef ENFORCE_FORWARD_STEPPING + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[axis] += mDelta[axis]; + mVoxel[axis] += mStep[axis]; + return mT0 <= mT1; +#endif + } + + /// @brief Return the index coordinates of the next node or voxel + /// intersected by the ray. If Log2Dim = 0 the return value is the + /// actual signed coordinate of the voxel, else it is the origin + /// of the corresponding VDB tree node or tile. + /// @note Incurs no computational overhead. + __hostdev__ const CoordT& voxel() const { return mVoxel; } + + /// @brief Return the time (parameterized along the Ray) of the + /// first hit of a tree node of size 2^Log2Dim. + /// @details This value is initialized to startTime or ray.t0() + /// depending on the constructor used. + /// @note Incurs no computational overhead. + __hostdev__ RealType time() const { return mT0; } + + /// @brief Return the maximum time (parameterized along the Ray). + __hostdev__ RealType maxTime() const { return mT1; } + + /// @brief Return the time (parameterized along the Ray) of the + /// second (i.e. next) hit of a tree node of size 2^Log2Dim. + /// @note Incurs a (small) computational overhead. + __hostdev__ RealType next() const + { + return Min(mT1, Min(mNext[0], Min(mNext[1], mNext[2]))); + } + + __hostdev__ int nextAxis() const + { + return nanovdb::math::MinIndex(mNext); + } + +private: + // helper to implement the general form + template + __hostdev__ bool step() + { +#ifdef ENFORCE_FORWARD_STEPPING + if (mNext[axis] <= mT0) { + mNext[axis] += mT0 - 0.999999f * mNext[axis] + 1.0e-6f; + } +#endif + mT0 = mNext[axis]; + mNext[axis] += mDelta[axis]; + mVoxel[axis] += mStep[axis]; + return mT0 <= mT1; + } + + RealT mT0, mT1; // min and max allowed times + CoordT mVoxel, mStep; // current voxel location and step to next voxel location + Vec3T mDelta, mNext; // delta time and next time +}; // class DDA + +/////////////////////////////////////////// ZeroCrossingNode //////////////////////////////////////////// + +template +inline __hostdev__ bool ZeroCrossingNode(RayT& ray, const NodeT& node, float v0, nanovdb::math::Coord& ijk, float& v, float& t) +{ + math::BBox bbox(node.origin(), node.origin() + Coord(node.dim() - 1)); + + if (!ray.clip(node.bbox())) { + return false; + } + + const float t0 = ray.t0(); + + static const float Delta = 1.0001f; + ijk = Coord::Floor(ray(ray.t0() + Delta)); + + t = t0; + v = 0; + + DDA dda(ray); + while (dda.step()) { + ijk = dda.voxel(); + + if (bbox.isInside(ijk) == false) + return false; + + v = node.getValue(ijk); + if (v * v0 < 0) { + t = dda.time(); + return true; + } + } + return false; +} + +/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// + +/// @brief returns true if the ray intersects an active value at any level of the grid in the accessor. +/// The empty-space ray-marching is performed at all levels of the tree using an +/// HDDA. If an intersection is detected, then ijk is updated with the index coordinate of the first +/// active voxel or tile, and t is set to the time of its intersection along the ray. +template +inline __hostdev__ bool firstActive(RayT& ray, AccT& acc, Coord &ijk, float& t) +{ + if (!ray.clip(acc.root().bbox()) || ray.t1() > 1e20) {// clip ray to bbox + return false;// missed or undefined bbox + } + static const float Delta = 1.0001f;// forward step-size along the ray to avoid getting stuck + t = ray.t0();// initiate time + ijk = RoundDown(ray.start()); // first voxel inside bbox + for (HDDA hdda(ray, acc.getDim(ijk, ray)); !acc.isActive(ijk); hdda.update(ray, acc.getDim(ijk, ray))) { + if (!hdda.step()) return false;// leap-frog HDDA and exit if ray bound is exceeded + t = hdda.time() + Delta;// update time + ijk = RoundDown( ray(t) );// update ijk + } + return true; +} + +/////////////////////////////////////////// TreeMarcher //////////////////////////////////////////// + +/// @brief A Tree Marcher for Generic Grids + +template +class TreeMarcher +{ +public: + using ChildT = typename NodeT::ChildNodeType; + using RealType = typename RayT::RealType; + using RealT = RealType; + using CoordType = CoordT; + + inline __hostdev__ TreeMarcher(AccT& acc) + : mAcc(acc) + { + } + + /// @brief Initialize the TreeMarcher with an index-space ray. + inline __hostdev__ bool init(const RayT& indexRay) + { + mRay = indexRay; + if (!mRay.clip(mAcc.root().bbox())) + return false; // clip ray to bbox + + // tweak the intersection span into the bbox. + // CAVEAT: this will potentially clip some tiny corner intersections. + static const float Eps = 0.000001f; + const float t0 = mRay.t0() + Eps; + const float t1 = mRay.t1() - Eps; + if (t0 > t1) + return false; + + const CoordT ijk = RoundDown(mRay(t0)); + const uint32_t dim = mAcc.getDim(ijk, mRay); + mHdda.init(mRay, t0, t1, nanovdb::math::Max(dim, NodeT::dim())); + + mT0 = (dim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. + mTmax = t1; + return true; + } + + /// @brief step the ray through the tree. If the ray hits a node then + /// populate t0 & t1, and the node. + /// @return true when a node of type NodeT is intersected, false otherwise. + inline __hostdev__ bool step(const NodeT** node, float& t0, float& t1) + { + // CAVEAT: if Delta is too large then it will clip corners of nodes in a visible way. + // but it has to be quite large when very far from the grid (due to fp32 rounding) + static const float Delta = 0.01f; + bool hddaIsValid; + + do { + t0 = mT0; + + auto currentNode = mAcc.template getNode(); + + // get next node intersection... + hddaIsValid = mHdda.step(); + const CoordT nextIjk = RoundDown(mRay(mHdda.time() + Delta)); + const auto nextDim = mAcc.getDim(nextIjk, mRay); + mHdda.update(mRay, (int)Max(nextDim, NodeT::dim())); + mT0 = (nextDim <= ChildT::dim()) ? mHdda.time() : -1; // potentially begin a span. + + if (t0 >= 0) { // we are in a span. + t1 = Min(mTmax, mHdda.time()); + + // TODO: clean this up! + if (t0 >= t1 || currentNode == nullptr) + continue; + + *node = currentNode; + return true; + } + + } while (hddaIsValid); + + return false; + } + + inline __hostdev__ const RayT& ray() const { return mRay; } + + inline __hostdev__ RayT& ray() { return mRay; } + +private: + AccT& mAcc; + RayT mRay; + HDDA mHdda; + float mT0; + float mTmax; +};// TreeMarcher + +/////////////////////////////////////////// PointTreeMarcher //////////////////////////////////////////// + +/// @brief A Tree Marcher for Point Grids +/// +/// @note This class will handle correctly offseting the ray by 0.5 to ensure that +/// the underlying HDDA will intersect with the grid-cells. See details below. + +template +class PointTreeMarcher : public TreeMarcher, RayT, AccT, CoordT> +{ + using BaseT = TreeMarcher, RayT, AccT, CoordT>; +public: + __hostdev__ PointTreeMarcher(AccT& acc) : BaseT(acc) {} + + /// @brief Initiates this instance with a ray in index space. + /// + /// @details An offset by 0.5 is applied to the ray to account for the fact that points in vdb + /// grids are bucketed into so-called grid cell, which are centered round grid voxels, + /// whereas the DDA is based on so-called grid nodes, which are coincident with grid + /// voxels. So, rather than offsettting the points by 0.5 to bring them into a grid + /// node representation this method offsets the eye of the ray by 0.5, which effectively + /// ensures that the DDA operates on grid cells as oppose to grid nodes. This subtle + /// but important offset by 0.5 is explined in more details in our online documentation. + __hostdev__ bool init(RayT ray) { return BaseT::init(ray.offsetEye(0.5)); } +};// PointTreeMarcher + +} // namespace nanovdb::math + +#endif // NANOVDB_HDDA_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/Math.h b/external/nanovdb/math/Math.h new file mode 100644 index 00000000..da3a6162 --- /dev/null +++ b/external/nanovdb/math/Math.h @@ -0,0 +1,1448 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file Math.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Math functions and classes + +*/ + +#ifndef NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED + +#include // for __hostdev__ and lots of other utility functions + +namespace nanovdb {// ================================================================= + +namespace math {// ============================================================= + +// ----------------------------> Various math functions <------------------------------------- + +//@{ +/// @brief Pi constant taken from Boost to match old behaviour +template +inline __hostdev__ constexpr T pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr float pi() +{ + return 3.141592653589793238462643383279502884e+00F; +} +template<> +inline __hostdev__ constexpr double pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr long double pi() +{ + return 3.141592653589793238462643383279502884e+00L; +} +//@} + +//@{ +/// Tolerance for floating-point comparison +template +struct Tolerance; +template<> +struct Tolerance +{ + __hostdev__ static float value() { return 1e-8f; } +}; +template<> +struct Tolerance +{ + __hostdev__ static double value() { return 1e-15; } +}; +//@} + +//@{ +/// Delta for small floating-point offsets +template +struct Delta; +template<> +struct Delta +{ + __hostdev__ static float value() { return 1e-5f; } +}; +template<> +struct Delta +{ + __hostdev__ static double value() { return 1e-9; } +}; +//@} + +//@{ +/// Maximum floating-point values +template +struct Maximum; +#if defined(__CUDA_ARCH__) || defined(__HIP__) +template<> +struct Maximum +{ + __hostdev__ static int value() { return 2147483647; } +}; +template<> +struct Maximum +{ + __hostdev__ static uint32_t value() { return 4294967295u; } +}; +template<> +struct Maximum +{ + __hostdev__ static float value() { return 1e+38f; } +}; +template<> +struct Maximum +{ + __hostdev__ static double value() { return 1e+308; } +}; +#else +template +struct Maximum +{ + static T value() { return std::numeric_limits::max(); } +}; +#endif +//@} + +template +__hostdev__ inline bool isApproxZero(const Type& x) +{ + return !(x > Tolerance::value()) && !(x < -Tolerance::value()); +} + +template +__hostdev__ inline Type Min(Type a, Type b) +{ + return (a < b) ? a : b; +} +__hostdev__ inline int32_t Min(int32_t a, int32_t b) +{ + return int32_t(fminf(float(a), float(b))); +} +__hostdev__ inline uint32_t Min(uint32_t a, uint32_t b) +{ + return uint32_t(fminf(float(a), float(b))); +} +__hostdev__ inline float Min(float a, float b) +{ + return fminf(a, b); +} +__hostdev__ inline double Min(double a, double b) +{ + return fmin(a, b); +} +template +__hostdev__ inline Type Max(Type a, Type b) +{ + return (a > b) ? a : b; +} + +__hostdev__ inline int32_t Max(int32_t a, int32_t b) +{ + return int32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline uint32_t Max(uint32_t a, uint32_t b) +{ + return uint32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline float Max(float a, float b) +{ + return fmaxf(a, b); +} +__hostdev__ inline double Max(double a, double b) +{ + return fmax(a, b); +} +__hostdev__ inline float Clamp(float x, float a, float b) +{ + return Max(Min(x, b), a); +} +__hostdev__ inline double Clamp(double x, double a, double b) +{ + return Max(Min(x, b), a); +} + +__hostdev__ inline float Fract(float x) +{ + return x - floorf(x); +} +__hostdev__ inline double Fract(double x) +{ + return x - floor(x); +} + +__hostdev__ inline int32_t Floor(float x) +{ + return int32_t(floorf(x)); +} +__hostdev__ inline int32_t Floor(double x) +{ + return int32_t(floor(x)); +} + +__hostdev__ inline int32_t Ceil(float x) +{ + return int32_t(ceilf(x)); +} +__hostdev__ inline int32_t Ceil(double x) +{ + return int32_t(ceil(x)); +} + +template +__hostdev__ inline T Pow2(T x) +{ + return x * x; +} + +template +__hostdev__ inline T Pow3(T x) +{ + return x * x * x; +} + +template +__hostdev__ inline T Pow4(T x) +{ + return Pow2(x * x); +} +template +__hostdev__ inline T Abs(T x) +{ + return x < 0 ? -x : x; +} + +template<> +__hostdev__ inline float Abs(float x) +{ + return fabsf(x); +} + +template<> +__hostdev__ inline double Abs(double x) +{ + return fabs(x); +} + +template<> +__hostdev__ inline int Abs(int x) +{ + return abs(x); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz); + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2]))); + //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) ); + //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f))); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5))); +} + +template class Vec3T> +__hostdev__ inline CoordT RoundDown(const Vec3T& xyz) +{ + return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2])); +} + +//@{ +/// Return the square root of a floating-point value. +__hostdev__ inline float Sqrt(float x) +{ + return sqrtf(x); +} +__hostdev__ inline double Sqrt(double x) +{ + return sqrt(x); +} +//@} + +/// Return the sign of the given value as an integer (either -1, 0 or 1). +template +__hostdev__ inline T Sign(const T& x) +{ + return ((T(0) < x) ? T(1) : T(0)) - ((x < T(0)) ? T(1) : T(0)); +} + +template +__hostdev__ inline int MinIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] < v[1] && v[0] < v[2]) + return 0; + if (v[1] < v[2]) + return 1; + else + return 2; +#endif +} + +template +__hostdev__ inline int MaxIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] > v[1] && v[0] > v[2]) + return 0; + if (v[1] > v[2]) + return 1; + else + return 2; +#endif +} + +/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp +__hostdev__ inline uint64_t AlignUp(uint64_t byteCount) +{ + const uint64_t r = byteCount % wordSize; + return r ? byteCount - r + wordSize : byteCount; +} + +// ------------------------------> Coord <-------------------------------------- + +// forward declaration so we can define Coord::asVec3s and Coord::asVec3d +template +class Vec3; + +/// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord +class Coord +{ + int32_t mVec[3]; // private member data - three signed index coordinates +public: + using ValueType = int32_t; + using IndexType = uint32_t; + + /// @brief Initialize all coordinates to zero. + __hostdev__ Coord() + : mVec{0, 0, 0} + { + } + + /// @brief Initializes all coordinates to the given signed integer. + __hostdev__ explicit Coord(ValueType n) + : mVec{n, n, n} + { + } + + /// @brief Initializes coordinate to the given signed integers. + __hostdev__ Coord(ValueType i, ValueType j, ValueType k) + : mVec{i, j, k} + { + } + + __hostdev__ Coord(ValueType* ptr) + : mVec{ptr[0], ptr[1], ptr[2]} + { + } + + __hostdev__ int32_t x() const { return mVec[0]; } + __hostdev__ int32_t y() const { return mVec[1]; } + __hostdev__ int32_t z() const { return mVec[2]; } + + __hostdev__ int32_t& x() { return mVec[0]; } + __hostdev__ int32_t& y() { return mVec[1]; } + __hostdev__ int32_t& z() { return mVec[2]; } + + __hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); } + + __hostdev__ static Coord min() { return Coord(-int32_t((1u << 31) - 1) - 1); } + + __hostdev__ static size_t memUsage() { return sizeof(Coord); } + + /// @brief Return a const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; } + + /// @brief Return a non-const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } + + /// @brief Assignment operator that works with openvdb::Coord + template + __hostdev__ Coord& operator=(const CoordT& other) + { + static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); + mVec[0] = other[0]; + mVec[1] = other[1]; + mVec[2] = other[2]; + return *this; + } + + /// @brief Return a new instance with coordinates masked by the given unsigned integer. + __hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); } + + // @brief Return a new instance with coordinates left-shifted by the given unsigned integer. + __hostdev__ Coord operator<<(IndexType n) const { return Coord(mVec[0] << n, mVec[1] << n, mVec[2] << n); } + + // @brief Return a new instance with coordinates right-shifted by the given unsigned integer. + __hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); } + + /// @brief Return true if this Coord is lexicographically less than the given Coord. + __hostdev__ bool operator<(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] < rhs[2] ? true : false; + } + + /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. + __hostdev__ bool operator<=(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] <=rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater than the given Coord. + __hostdev__ bool operator>(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] > rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater or equal to the given Coord. + __hostdev__ bool operator>=(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] >=rhs[2] ? true : false; + } + + // @brief Return true if the Coord components are identical. + __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + __hostdev__ Coord& operator&=(int n) + { + mVec[0] &= n; + mVec[1] &= n; + mVec[2] &= n; + return *this; + } + __hostdev__ Coord& operator<<=(uint32_t n) + { + mVec[0] <<= n; + mVec[1] <<= n; + mVec[2] <<= n; + return *this; + } + __hostdev__ Coord& operator>>=(uint32_t n) + { + mVec[0] >>= n; + mVec[1] >>= n; + mVec[2] >>= n; + return *this; + } + __hostdev__ Coord& operator+=(int n) + { + mVec[0] += n; + mVec[1] += n; + mVec[2] += n; + return *this; + } + __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } + __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } + __hostdev__ Coord operator-() const { return Coord(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Coord& operator+=(const Coord& rhs) + { + mVec[0] += rhs[0]; + mVec[1] += rhs[1]; + mVec[2] += rhs[2]; + return *this; + } + __hostdev__ Coord& operator-=(const Coord& rhs) + { + mVec[0] -= rhs[0]; + mVec[1] -= rhs[1]; + mVec[2] -= rhs[2]; + return *this; + } + + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Coord& minComponent(const Coord& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Coord& maxComponent(const Coord& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline Coord& minComponentAtomic(const Coord& other) + { + atomicMin(&mVec[0], other[0]); + atomicMin(&mVec[1], other[1]); + atomicMin(&mVec[2], other[2]); + return *this; + } + __device__ inline Coord& maxComponentAtomic(const Coord& other) + { + atomicMax(&mVec[0], other[0]); + atomicMax(&mVec[1], other[1]); + atomicMax(&mVec[2], other[2]); + return *this; + } +#endif + + __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const + { + return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz); + } + + __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); } + + /// Return true if any of the components of @a a are smaller than the + /// corresponding components of @a b. + __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b) + { + return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]); + } + + /// @brief Return the largest integer coordinates that are not greater + /// than @a xyz (node centered conversion). + template + __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(math::Floor(xyz[0]), math::Floor(xyz[1]), math::Floor(xyz[2])); } + + /// @brief Return a hash key derived from the existing coordinates. + /// @details The hash function is originally taken from the SIGGRAPH paper: + /// "VDB: High-resolution sparse volumes with dynamic topology" + /// and the prime numbers are modified based on the ACM Transactions on Graphics paper: + /// "Real-time 3D reconstruction at scale using voxel hashing" (the second number had a typo!) + template + __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349669 ^ mVec[2] * 83492791); } + + /// @brief Return the octant of this Coord + //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } + __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | + (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | + (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } + + /// @brief Return a single precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3s() const; + + /// @brief Return a double precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3d() const; + + // returns a copy of itself, so it mimics the behaviour of Vec3::round() + __hostdev__ inline Coord round() const { return *this; } +}; // Coord class + +// ----------------------------> Vec3 <-------------------------------------- + +/// @brief A simple vector class with three components, similar to openvdb::math::Vec3 +template +class Vec3 +{ + T mVec[3]; + +public: + static const int SIZE = 3; + static const int size = 3; // in openvdb::math::Tuple + using ValueType = T; + Vec3() = default; + __hostdev__ explicit Vec3(T x) + : mVec{x, x, x} + { + } + __hostdev__ Vec3(T x, T y, T z) + : mVec{x, y, z} + { + } + template class Vec3T, class T2> + __hostdev__ Vec3(const Vec3T& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + } + template + __hostdev__ explicit Vec3(const Vec3& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + } + __hostdev__ explicit Vec3(const Coord& ijk) + : mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])} + { + } + __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + template class Vec3T, class T2> + __hostdev__ Vec3& operator=(const Vec3T& rhs) + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + return *this; + } + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } + template + __hostdev__ Vec3 cross(const Vec3T& v) const + { + return Vec3(mVec[1] * v[2] - mVec[2] * v[1], + mVec[2] * v[0] - mVec[0] * v[2], + mVec[0] * v[1] - mVec[1] * v[0]); + } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } + __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } + __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } + __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } + __hostdev__ Vec3 operator+(const Coord& ijk) const { return Vec3(mVec[0] + ijk[0], mVec[1] + ijk[1], mVec[2] + ijk[2]); } + __hostdev__ Vec3 operator-(const Coord& ijk) const { return Vec3(mVec[0] - ijk[0], mVec[1] - ijk[1], mVec[2] - ijk[2]); } + __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } + __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec3& operator+=(const Vec3& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + return *this; + } + __hostdev__ Vec3& operator+=(const Coord& ijk) + { + mVec[0] += T(ijk[0]); + mVec[1] += T(ijk[1]); + mVec[2] += T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator-=(const Vec3& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + return *this; + } + __hostdev__ Vec3& operator-=(const Coord& ijk) + { + mVec[0] -= T(ijk[0]); + mVec[1] -= T(ijk[1]); + mVec[2] -= T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + return *this; + } + __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec3& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec3& minComponent(const Vec3& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec3& maxComponent(const Vec3& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } + /// @brief Return the smallest vector component + __hostdev__ ValueType min() const + { + return mVec[0] < mVec[1] ? (mVec[0] < mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] < mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Return the largest vector component + __hostdev__ ValueType max() const + { + return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Round each component if this Vec up to its integer value + /// @return Return an integer Coord + __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } + /// @brief Round each component if this Vec down to its integer value + /// @return Return an integer Coord + __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } + /// @brief Round each component if this Vec to its closest integer value + /// @return Return an integer Coord + __hostdev__ Coord round() const + { + if constexpr(util::is_same::value) { + return Coord(Floor(mVec[0] + 0.5f), Floor(mVec[1] + 0.5f), Floor(mVec[2] + 0.5f)); + } else if constexpr(util::is_same::value) { + return Coord(mVec[0], mVec[1], mVec[2]); + } else { + return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); + } + } + + /// @brief return a non-const raw constant pointer to array of three vector components + __hostdev__ T* asPointer() { return mVec; } + /// @brief return a const raw constant pointer to array of three vector components + __hostdev__ const T* asPointer() const { return mVec; } +}; // Vec3 + +template +__hostdev__ inline Vec3 operator*(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar * vec[0], scalar * vec[1], scalar * vec[2]); +} +template +__hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); +} + +/// @brief Return a single precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3s() const +{ + return Vec3(float(mVec[0]), float(mVec[1]), float(mVec[2])); +} + +/// @brief Return a double precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3d() const +{ + return Vec3(double(mVec[0]), double(mVec[1]), double(mVec[2])); +} + +// ----------------------------> Vec4 <-------------------------------------- + +/// @brief A simple vector class with four components, similar to openvdb::math::Vec4 +template +class Vec4 +{ + T mVec[4]; + +public: + static const int SIZE = 4; + static const int size = 4; + using ValueType = T; + Vec4() = default; + __hostdev__ explicit Vec4(T x) + : mVec{x, x, x, x} + { + } + __hostdev__ Vec4(T x, T y, T z, T w) + : mVec{x, y, z, w} + { + } + template + __hostdev__ explicit Vec4(const Vec4& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + } + template class Vec4T, class T2> + __hostdev__ Vec4(const Vec4T& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + } + __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } + __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } + template class Vec4T, class T2> + __hostdev__ Vec4& operator=(const Vec4T& rhs) + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + mVec[3] = rhs[3]; + return *this; + } + + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } + __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } + __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } + __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } + __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } + __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } + __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec4& operator+=(const Vec4& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + mVec[3] += v[3]; + return *this; + } + __hostdev__ Vec4& operator-=(const Vec4& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + mVec[3] -= v[3]; + return *this; + } + __hostdev__ Vec4& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + mVec[3] *= s; + return *this; + } + __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec4& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec4& minComponent(const Vec4& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + if (other[3] < mVec[3]) + mVec[3] = other[3]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec4& maxComponent(const Vec4& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + if (other[3] > mVec[3]) + mVec[3] = other[3]; + return *this; + } +}; // Vec4 + +template +__hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); +} +template +__hostdev__ inline Vec4 operator/(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); +} + +// ----------------------------> matMult <-------------------------------------- + +/// @brief Multiply a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], fmaf(static_cast(xyz[2]), mat[2], vec[0]))), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[5], vec[1]))), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], fma(static_cast(xyz[2]), mat[2], vec[0]))), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[5], vec[1]))), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], fmaf(static_cast(xyz[2]), mat[6], vec[0]))), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[7], vec[1]))), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], fma(static_cast(xyz[2]), mat[6], vec[0]))), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[7], vec[1]))), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +// ----------------------------> BBox <------------------------------------- + +// Base-class for static polymorphism (cannot be constructed directly) +template +struct BaseBBox +{ + Vec3T mCoord[2]; + __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; + __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; + __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } + __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } + __hostdev__ Vec3T& min() { return mCoord[0]; } + __hostdev__ Vec3T& max() { return mCoord[1]; } + __hostdev__ const Vec3T& min() const { return mCoord[0]; } + __hostdev__ const Vec3T& max() const { return mCoord[1]; } + __hostdev__ BaseBBox& translate(const Vec3T& xyz) + { + mCoord[0] += xyz; + mCoord[1] += xyz; + return *this; + } + /// @brief Expand this bounding box to enclose point @c xyz. + __hostdev__ BaseBBox& expand(const Vec3T& xyz) + { + mCoord[0].minComponent(xyz); + mCoord[1].maxComponent(xyz); + return *this; + } + + /// @brief Expand this bounding box to enclose the given bounding box. + __hostdev__ BaseBBox& expand(const BaseBBox& bbox) + { + mCoord[0].minComponent(bbox[0]); + mCoord[1].maxComponent(bbox[1]); + return *this; + } + + /// @brief Intersect this bounding box with the given bounding box. + __hostdev__ BaseBBox& intersect(const BaseBBox& bbox) + { + mCoord[0].maxComponent(bbox[0]); + mCoord[1].minComponent(bbox[1]); + return *this; + } + + //__hostdev__ BaseBBox expandBy(typename Vec3T::ValueType padding) const + //{ + // return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding)); + //} + __hostdev__ bool isInside(const Vec3T& xyz) + { + if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2]) + return false; + if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2]) + return false; + return true; + } + +protected: + __hostdev__ BaseBBox() {} + __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max) + : mCoord{min, max} + { + } +}; // BaseBBox + +template::value> +struct BBox; + +/// @brief Partial template specialization for floating point coordinate types. +/// +/// @note Min is inclusive and max is exclusive. If min = max the dimension of +/// the bounding box is zero and therefore it is also empty. +template +struct BBox : public BaseBBox +{ + using Vec3Type = Vec3T; + using ValueType = typename Vec3T::ValueType; + static_assert(util::is_floating_point::value, "Expected a floating point coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Default construction sets BBox to an empty bbox + __hostdev__ BBox() + : BaseT(Vec3T( Maximum::value()), + Vec3T(-Maximum::value())) + { + } + __hostdev__ BBox(const Vec3T& min, const Vec3T& max) + : BaseT(min, max) + { + } + __hostdev__ BBox(const Coord& min, const Coord& max) + : BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])), + Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) + { + } + __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) + { + return BBox(min, min.offsetBy(dim)); + } + + __hostdev__ BBox(const BaseBBox& bbox) + : BBox(bbox[0], bbox[1]) + { + } + __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || + mCoord[0][1] >= mCoord[1][1] || + mCoord[0][2] >= mCoord[1][2]; } + __hostdev__ operator bool() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + __hostdev__ Vec3T dim() const { return *this ? this->max() - this->min() : Vec3T(0); } + __hostdev__ bool isInside(const Vec3T& p) const + { + return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && + p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; + } + +}; // BBox + +/// @brief Partial template specialization for integer coordinate types +/// +/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So, +/// if min = max the bounding box contains exactly one point and dim = 1! +template +struct BBox : public BaseBBox +{ + static_assert(util::is_same::value, "Expected \"int\" coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Iterator over the domain covered by a BBox + /// @details z is the fastest-moving coordinate. + class Iterator + { + const BBox& mBBox; + CoordT mPos; + + public: + __hostdev__ Iterator(const BBox& b) + : mBBox(b) + , mPos(b.min()) + { + } + __hostdev__ Iterator(const BBox& b, const Coord& p) + : mBBox(b) + , mPos(p) + { + } + __hostdev__ Iterator& operator++() + { + if (mPos[2] < mBBox[1][2]) { // this is the most common case + ++mPos[2];// increment z + } else if (mPos[1] < mBBox[1][1]) { + mPos[2] = mBBox[0][2];// reset z + ++mPos[1];// increment y + } else if (mPos[0] <= mBBox[1][0]) { + mPos[2] = mBBox[0][2];// reset z + mPos[1] = mBBox[0][1];// reset y + ++mPos[0];// increment x + } + return *this; + } + __hostdev__ Iterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + __hostdev__ bool operator==(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos == rhs.mPos; + } + __hostdev__ bool operator!=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos != rhs.mPos; + } + __hostdev__ bool operator<(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos < rhs.mPos; + } + __hostdev__ bool operator<=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos <= rhs.mPos; + } + /// @brief Return @c true if the iterator still points to a valid coordinate. + __hostdev__ operator bool() const { return mPos <= mBBox[1]; } + __hostdev__ const CoordT& operator*() const { return mPos; } + }; // Iterator + __hostdev__ Iterator begin() const { return Iterator{*this}; } + __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } + __hostdev__ BBox() + : BaseT(CoordT::max(), CoordT::min()) + { + } + __hostdev__ BBox(const CoordT& min, const CoordT& max) + : BaseT(min, max) + { + } + + template + __hostdev__ BBox(BBox& other, const SplitT&) + : BaseT(other.mCoord[0], other.mCoord[1]) + { + NANOVDB_ASSERT(this->is_divisible()); + const int n = MaxIndex(this->dim()); + mCoord[1][n] = (mCoord[0][n] + mCoord[1][n]) >> 1; + other.mCoord[0][n] = mCoord[1][n] + 1; + } + + __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) + { + return BBox(min, min.offsetBy(dim - 1)); + } + + __hostdev__ static BBox createCube(typename CoordT::ValueType min, typename CoordT::ValueType max) + { + return BBox(CoordT(min), CoordT(max)); + } + + __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + /// @brief Return true if this bounding box is empty, e.g. uninitialized + __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || + mCoord[0][1] > mCoord[1][1] || + mCoord[0][2] > mCoord[1][2]; } + /// @brief Convert this BBox to boolean true if it is not empty + __hostdev__ operator bool() const { return mCoord[0][0] <= mCoord[1][0] && + mCoord[0][1] <= mCoord[1][1] && + mCoord[0][2] <= mCoord[1][2]; } + __hostdev__ CoordT dim() const { return *this ? this->max() - this->min() + Coord(1) : Coord(0); } + __hostdev__ uint64_t volume() const + { + auto d = this->dim(); + return uint64_t(d[0]) * uint64_t(d[1]) * uint64_t(d[2]); + } + __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } + /// @brief Return @c true if the given bounding box is inside this bounding box. + __hostdev__ bool isInside(const BBox& b) const + { + return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); + } + + /// @brief Return @c true if the given bounding box overlaps with this bounding box. + __hostdev__ bool hasOverlap(const BBox& b) const + { + return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min())); + } + + /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! + template + __hostdev__ BBox> asReal() const + { + static_assert(util::is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); + return BBox>(Vec3(RealT(mCoord[0][0]), RealT(mCoord[0][1]), RealT(mCoord[0][2])), + Vec3(RealT(mCoord[1][0] + 1), RealT(mCoord[1][1] + 1), RealT(mCoord[1][2] + 1))); + } + /// @brief Return a new instance that is expanded by the specified padding. + __hostdev__ BBox expandBy(typename CoordT::ValueType padding) const + { + return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); + } + + /// @brief @brief transform this coordinate bounding box by the specified map + /// @param map mapping of index to world coordinates + /// @return world bounding box + template + __hostdev__ auto transform(const Map& map) const + { + using Vec3T = Vec3; + const Vec3T tmp = map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[0][2])); + BBox bbox(tmp, tmp);// return value + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[1][2]))); + return bbox; + } + +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline BBox& expandAtomic(const CoordT& ijk) + { + mCoord[0].minComponentAtomic(ijk); + mCoord[1].maxComponentAtomic(ijk); + return *this; + } + __device__ inline BBox& expandAtomic(const BBox& bbox) + { + mCoord[0].minComponentAtomic(bbox[0]); + mCoord[1].maxComponentAtomic(bbox[1]); + return *this; + } + __device__ inline BBox& intersectAtomic(const BBox& bbox) + { + mCoord[0].maxComponentAtomic(bbox[0]); + mCoord[1].minComponentAtomic(bbox[1]); + return *this; + } +#endif +}; // BBox + +// --------------------------> Rgba8 <------------------------------------ + +/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int +class Rgba8 +{ + union + { + uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. + uint32_t packed; // 32 bit packed representation + } mData; + +public: + static const int SIZE = 4; + using ValueType = uint8_t; + + /// @brief Default copy constructor + Rgba8(const Rgba8&) = default; + + /// @brief Default move constructor + Rgba8(Rgba8&&) = default; + + /// @brief Default move assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(Rgba8&&) = default; + + /// @brief Default copy assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(const Rgba8&) = default; + + /// @brief Default ctor initializes all channels to zero + __hostdev__ Rgba8() + : mData{{0, 0, 0, 0}} + { + static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); + } + + /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0u to 255u + __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) + : mData{{r, g, b, a}} + { + } + + /// @brief @brief ctor where all channels are initialized to the same value + /// @note value should be in the range 0u to 255u + explicit __hostdev__ Rgba8(uint8_t v) + : mData{{v, v, v, v}} + { + } + + /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) + : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers + static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union + static_cast(0.5f + b * 255.0f), + static_cast(0.5f + a * 255.0f)}} + { + } + + /// @brief Vec3f r,g,b ctor (alpha channel it set to 1) + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec3& rgb) + : Rgba8(rgb[0], rgb[1], rgb[2]) + { + } + + /// @brief Vec4f r,g,b,a ctor + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec4& rgba) + : Rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) + { + } + + __hostdev__ bool operator< (const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } + __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } + __hostdev__ float lengthSqr() const + { + return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + + float(mData.c[1]) * mData.c[1] + + float(mData.c[2]) * mData.c[2]); //1/255^2 + } + __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } + /// @brief return n'th color channel as a float in the range 0 to 1 + __hostdev__ float asFloat(int n) const { return 0.003921569f*float(mData.c[n]); }// divide by 255 + __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } + __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } + __hostdev__ const uint32_t& packed() const { return mData.packed; } + __hostdev__ uint32_t& packed() { return mData.packed; } + __hostdev__ const uint8_t& r() const { return mData.c[0]; } + __hostdev__ const uint8_t& g() const { return mData.c[1]; } + __hostdev__ const uint8_t& b() const { return mData.c[2]; } + __hostdev__ const uint8_t& a() const { return mData.c[3]; } + __hostdev__ uint8_t& r() { return mData.c[0]; } + __hostdev__ uint8_t& g() { return mData.c[1]; } + __hostdev__ uint8_t& b() { return mData.c[2]; } + __hostdev__ uint8_t& a() { return mData.c[3]; } + __hostdev__ operator Vec3() const { + return Vec3(this->asFloat(0), this->asFloat(1), this->asFloat(2)); + } + __hostdev__ operator Vec4() const { + return Vec4(this->asFloat(0), this->asFloat(1), this->asFloat(2), this->asFloat(3)); + } +}; // Rgba8 + +using Vec3d = Vec3; +using Vec3f = Vec3; +using Vec3i = Vec3; +using Vec3u = Vec3; +using Vec3u8 = Vec3; +using Vec3u16 = Vec3; + +using Vec4R = Vec4; +using Vec4d = Vec4; +using Vec4f = Vec4; +using Vec4i = Vec4; + +}// namespace math =============================================================== + +using Rgba8 [[deprecated("Use math::Rgba8 instead.")]] = math::Rgba8; +using math::Coord; + +using Vec3d = math::Vec3; +using Vec3f = math::Vec3; +using Vec3i = math::Vec3; +using Vec3u = math::Vec3; +using Vec3u8 = math::Vec3; +using Vec3u16 = math::Vec3; + +using Vec4R = math::Vec4; +using Vec4d = math::Vec4; +using Vec4f = math::Vec4; +using Vec4i = math::Vec4; + +using CoordBBox = math::BBox; +using Vec3dBBox = math::BBox; +using BBoxR [[deprecated("Use Vec3dBBox instead.")]] = math::BBox; + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/Ray.h b/external/nanovdb/math/Ray.h new file mode 100644 index 00000000..236982f3 --- /dev/null +++ b/external/nanovdb/math/Ray.h @@ -0,0 +1,557 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/// @file Ray.h +/// +/// @author Ken Museth +/// +/// @brief A Ray class. + +#ifndef NANOVDB_MATH_RAY_H_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_RAY_H_HAS_BEEN_INCLUDED + +#include // for Vec3 +namespace nanovdb {// =================================================== + +namespace math {// ====================================================== + +template +class Ray +{ +public: + using RealType = RealT; + using Vec3Type = Vec3; + using Vec3T = Vec3Type; + + struct TimeSpan + { + RealT t0, t1; + /// @brief Default constructor + __hostdev__ TimeSpan() {} + /// @brief Constructor + __hostdev__ TimeSpan(RealT _t0, RealT _t1) + : t0(_t0) + , t1(_t1) + { + } + /// @brief Set both times + __hostdev__ void set(RealT _t0, RealT _t1) + { + t0 = _t0; + t1 = _t1; + } + /// @brief Get both times + __hostdev__ void get(RealT& _t0, RealT& _t1) const + { + _t0 = t0; + _t1 = t1; + } + /// @brief Return @c true if t1 is larger than t0 by at least eps. + __hostdev__ bool valid(RealT eps = Delta::value()) const { return (t1 - t0) > eps; } + /// @brief Return the midpoint of the ray. + __hostdev__ RealT mid() const { return 0.5 * (t0 + t1); } + /// @brief Multiplies both times + __hostdev__ void scale(RealT s) + { + assert(s > 0); + t0 *= s; + t1 *= s; + } + /// @brief Return @c true if time is inclusive + __hostdev__ bool test(RealT t) const { return (t >= t0 && t <= t1); } + }; + + __hostdev__ Ray(const Vec3Type& eye = Vec3Type(0, 0, 0), + const Vec3Type& direction = Vec3Type(1, 0, 0), + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + : mEye(eye) + , mDir(direction) + , mInvDir(1 / mDir[0], 1 / mDir[1], 1 / mDir[2]) + , mTimeSpan(t0, t1) + , mSign{mInvDir[0] < 0, mInvDir[1] < 0, mInvDir[2] < 0} + { + } + + __hostdev__ Ray& offsetEye(RealT offset) + { + mEye[0] += offset; + mEye[1] += offset; + mEye[2] += offset; + return *this; + } + + __hostdev__ Ray& setEye(const Vec3Type& eye) + { + mEye = eye; + return *this; + } + + __hostdev__ Ray& setDir(const Vec3Type& dir) + { + mDir = dir; + mInvDir[0] = 1.0 / mDir[0]; + mInvDir[1] = 1.0 / mDir[1]; + mInvDir[2] = 1.0 / mDir[2]; + mSign[0] = mInvDir[0] < 0; + mSign[1] = mInvDir[1] < 0; + mSign[2] = mInvDir[2] < 0; + return *this; + } + + __hostdev__ Ray& setMinTime(RealT t0) + { + mTimeSpan.t0 = t0; + return *this; + } + + __hostdev__ Ray& setMaxTime(RealT t1) + { + mTimeSpan.t1 = t1; + return *this; + } + + __hostdev__ Ray& setTimes( + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + { + assert(t0 > 0 && t1 > 0); + mTimeSpan.set(t0, t1); + return *this; + } + + __hostdev__ Ray& scaleTimes(RealT scale) + { + mTimeSpan.scale(scale); + return *this; + } + + __hostdev__ Ray& reset( + const Vec3Type& eye, + const Vec3Type& direction, + RealT t0 = Delta::value(), + RealT t1 = Maximum::value()) + { + this->setEye(eye); + this->setDir(direction); + this->setTimes(t0, t1); + return *this; + } + + __hostdev__ const Vec3T& eye() const { return mEye; } + + __hostdev__ const Vec3T& dir() const { return mDir; } + + __hostdev__ const Vec3T& invDir() const { return mInvDir; } + + __hostdev__ RealT t0() const { return mTimeSpan.t0; } + + __hostdev__ RealT t1() const { return mTimeSpan.t1; } + + __hostdev__ int sign(int i) const { return mSign[i]; } + + /// @brief Return the position along the ray at the specified time. + __hostdev__ Vec3T operator()(RealT time) const + { +#if 1 + return Vec3T(fmaf(time, mDir[0], mEye[0]), + fmaf(time, mDir[1], mEye[1]), + fmaf(time, mDir[2], mEye[2])); +#else + return mEye + mDir * time; +#endif + } + + /// @brief Return the starting point of the ray. + __hostdev__ Vec3T start() const { return (*this)(mTimeSpan.t0); } + + /// @brief Return the endpoint of the ray. + __hostdev__ Vec3T end() const { return (*this)(mTimeSpan.t1); } + + /// @brief Return the midpoint of the ray. + __hostdev__ Vec3T mid() const { return (*this)(mTimeSpan.mid()); } + + /// @brief Return @c true if t1 is larger than t0 by at least eps. + __hostdev__ bool valid(RealT eps = Delta::value()) const { return mTimeSpan.valid(eps); } + + /// @brief Return @c true if @a time is within t0 and t1, both inclusive. + __hostdev__ bool test(RealT time) const { return mTimeSpan.test(time); } + + /// @brief Return a new Ray that is transformed with the specified map. + /// + /// @param map the map from which to construct the new Ray. + /// + /// @warning Assumes a linear map and a normalized direction. + /// + /// @details The requirement that the direction is normalized + /// follows from the transformation of t0 and t1 - and that fact that + /// we want applyMap and applyInverseMap to be inverse operations. + template + __hostdev__ Ray applyMap(const MapType& map) const + { + const Vec3T eye = map.applyMap(mEye); + const Vec3T dir = map.applyJacobian(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + template + __hostdev__ Ray applyMapF(const MapType& map) const + { + const Vec3T eye = map.applyMapF(mEye); + const Vec3T dir = map.applyJacobianF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return a new Ray that is transformed with the inverse of the specified map. + /// + /// @param map the map from which to construct the new Ray by inverse mapping. + /// + /// @warning Assumes a linear map and a normalized direction. + /// + /// @details The requirement that the direction is normalized + /// follows from the transformation of t0 and t1 - and that fact that + /// we want applyMap and applyInverseMap to be inverse operations. + template + __hostdev__ Ray applyInverseMap(const MapType& map) const + { + const Vec3T eye = map.applyInverseMap(mEye); + const Vec3T dir = map.applyInverseJacobian(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); + } + template + __hostdev__ Ray applyInverseMapF(const MapType& map) const + { + const Vec3T eye = map.applyInverseMapF(mEye); + const Vec3T dir = map.applyInverseJacobianF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + return Ray(eye, dir * invLength, length * mTimeSpan.t0, length * mTimeSpan.t1); + } + + /// @brief Return a new ray in world space, assuming the existing + /// ray is represented in the index space of the specified grid. + template + __hostdev__ Ray indexToWorldF(const GridType& grid) const + { + const Vec3T eye = grid.indexToWorldF(mEye); + const Vec3T dir = grid.indexToWorldDirF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return a new ray in index space, assuming the existing + /// ray is represented in the world space of the specified grid. + template + __hostdev__ Ray worldToIndexF(const GridType& grid) const + { + const Vec3T eye = grid.worldToIndexF(mEye); + const Vec3T dir = grid.worldToIndexDirF(mDir); + const RealT length = dir.length(), invLength = RealT(1) / length; + RealT t1 = mTimeSpan.t1; + if (mTimeSpan.t1 < Maximum::value()) { + t1 *= length; + } + return Ray(eye, dir * invLength, length * mTimeSpan.t0, t1); + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + /// @param t0 The first intersection point if an intersection exists. + /// @param t1 The second intersection point if an intersection exists. + /// + /// @note If the return value is true, i.e. a hit, and t0 = + /// this->t0() or t1 == this->t1() only one true intersection exist. + __hostdev__ bool intersects(const Vec3T& center, RealT radius, RealT& t0, RealT& t1) const + { + const Vec3T origin = mEye - center; + const RealT A = mDir.lengthSqr(); + const RealT B = 2 * mDir.dot(origin); + const RealT C = origin.lengthSqr() - radius * radius; + const RealT D = B * B - 4 * A * C; + + if (D < 0) { + return false; + } + const RealT Q = RealT(-0.5) * (B < 0 ? (B + Sqrt(D)) : (B - Sqrt(D))); + + t0 = Q / A; + t1 = C / Q; + + if (t0 > t1) { + RealT tmp = t0; + t0 = t1; + t1 = tmp; + } + if (t0 < mTimeSpan.t0) { + t0 = mTimeSpan.t0; + } + if (t1 > mTimeSpan.t1) { + t1 = mTimeSpan.t1; + } + return t0 <= t1; + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + __hostdev__ bool intersects(const Vec3T& center, RealT radius) const + { + RealT t0, t1; + return this->intersects(center, radius, t0, t1) > 0; + } + + /// @brief Return true if this ray intersects the specified sphere. + /// + /// @note For intersection this ray is clipped to the two intersection points. + /// + /// @param center The center of the sphere in the same space as this ray. + /// @param radius The radius of the sphere in the same units as this ray. + __hostdev__ bool clip(const Vec3T& center, RealT radius) + { + RealT t0, t1; + const bool hit = this->intersects(center, radius, t0, t1); + if (hit) { + mTimeSpan.set(t0, t1); + } + return hit; + } +#if 0 + /// @brief Return true if the Ray intersects the specified + /// axisaligned bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as the Ray. + /// @param t0 If an intersection is detected this is assigned + /// the time for the first intersection point. + /// @param t1 If an intersection is detected this is assigned + /// the time for the second intersection point. + template + __hostdev__ bool intersects(const BBoxT& bbox, RealT& t0, RealT& t1) const + { + t0 = (bbox[ mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t2 = (bbox[1-mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t0 > t2) return false; + t1 = (bbox[1-mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t3 = (bbox[ mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t3 > t1) return false; + if (t3 > t0) t0 = t3; + if (t2 < t1) t1 = t2; + t3 = (bbox[ mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t3 > t1) return false; + t2 = (bbox[1-mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t0 > t2) return false; + if (t3 > t0) t0 = t3; + if (mTimeSpan.t1 < t0) return false; + if (t2 < t1) t1 = t2; + if (mTimeSpan.t0 > t1) return false; + if (mTimeSpan.t0 > t0) t0 = mTimeSpan.t0; + if (mTimeSpan.t1 < t1) t1 = mTimeSpan.t1; + return true; + /* + mTimeSpan.get(_t0, _t1); + double t0 = _t0, t1 = _t1; + for (int i = 0; i < 3; ++i) { + //if (abs(mDir[i])<1e-3) continue; + double a = (double(bbox.min()[i]) - mEye[i]) * mInvDir[i]; + double b = (double(bbox.max()[i]) - mEye[i]) * mInvDir[i]; + if (a > b) { + double tmp = a; + a = b; + b = tmp; + } + if (a > t0) t0 = a; + if (b < t1) t1 = b; + if (t0 > t1) { + //if (gVerbose) printf("Missed BBOX: (%i,%i,%i) -> (%i,%i,%i) t0=%f t1=%f\n", + // bbox.min()[0], bbox.min()[1], bbox.min()[2], + // bbox.max()[0], bbox.max()[1], bbox.max()[2], t0, t1); + return false; + } + } + _t0 = t0; _t1 = t1; + return true; + */ + } +#else + /// @brief Returns true if this ray intersects an index bounding box. + /// If the return value is true t0 and t1 are set to the intersection + /// times along the ray. + /// + /// @warning Intersection with a CoordBBox internally converts to a floating-point bbox + /// which imples that the max is padded with one voxel, i.e. bbox.max += 1! This + /// avoids gaps between neighboring CoordBBox'es, say from neighboring tree nodes. + __hostdev__ bool intersects(const CoordBBox& bbox, RealT& t0, RealT& t1) const + { + mTimeSpan.get(t0, t1); + for (int i = 0; i < 3; ++i) { + RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i] + 1); + if (a >= b) { // empty bounding box + return false; + } + a = (a - mEye[i]) * mInvDir[i]; + b = (b - mEye[i]) * mInvDir[i]; + if (a > b) { + RealT tmp = a; + a = b; + b = tmp; + } + if (a > t0) { + t0 = a; + } + if (b < t1) { + t1 = b; + } + if (t0 > t1) { + return false; + } + } + return true; + } + /// @brief Returns true if this ray intersects a floating-point bounding box. + /// If the return value is true t0 and t1 are set to the intersection + /// times along the ray. + template + __hostdev__ bool intersects(const BBox& bbox, RealT& t0, RealT& t1) const + { + static_assert(util::is_floating_point::value, "Ray::intersects: Expected a floating point coordinate"); + mTimeSpan.get(t0, t1); + for (int i = 0; i < 3; ++i) { + RealT a = RealT(bbox.min()[i]), b = RealT(bbox.max()[i]); + if (a >= b) { // empty bounding box + return false; + } + a = (a - mEye[i]) * mInvDir[i]; + b = (b - mEye[i]) * mInvDir[i]; + if (a > b) { + RealT tmp = a; + a = b; + b = tmp; + } + if (a > t0) { + t0 = a; + } + if (b < t1) { + t1 = b; + } + if (t0 > t1) { + return false; + } + } + return true; + } +#endif + + /// @brief Return true if this ray intersects the specified bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as this ray. + /// + /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point + /// bounding box, which imples that the max is padded with one voxel, i.e. + /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say + /// from neighboring tree nodes. + template + __hostdev__ bool intersects(const BBoxT& bbox) const + { +#if 1 + RealT t0, t1; + return this->intersects(bbox, t0, t1); +#else + //BBox bbox(Vec3T(_bbox[0][0]-1e-4,_bbox[0][1]-1e-4,_bbox[0][2]-1e-4), + // Vec3T(_bbox[1][0]+1e-4,_bbox[1][1]+1e-4,_bbox[1][2]+1e-4)); + RealT t0 = (bbox[mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t2 = (bbox[1 - mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t0 > t2) return false; + RealT t1 = (bbox[1 - mSign[0]][0] - mEye[0]) * mInvDir[0]; + RealT t3 = (bbox[mSign[1]][1] - mEye[1]) * mInvDir[1]; + if (t3 > t1) return false; + if (t3 > t0) t0 = t3; + if (t2 < t1) t1 = t2; + t3 = (bbox[mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t3 > t1) return false; + t2 = (bbox[1 - mSign[2]][2] - mEye[2]) * mInvDir[2]; + if (t0 > t2) return false; + //if (t3 > t0) t0 = t3; + //if (mTimeSpan.t1 < t0) return false; + //if (t2 < t1) t1 = t2; + //return mTimeSpan.t0 < t1; + return true; +#endif + } + + /// @brief Return true if this ray intersects the specified bounding box. + /// + /// @param bbox Axis-aligned bounding box in the same space as this ray. + /// + /// @warning If @a bbox is of the type CoordBBox it is converted to a floating-point + /// bounding box, which imples that the max is padded with one voxel, i.e. + /// bbox.max += 1! This avoids gaps between neighboring CoordBBox'es, say + /// from neighboring tree nodes. + /// + /// @note For intersection this ray is clipped to the two intersection points. + template + __hostdev__ bool clip(const BBoxT& bbox) + { + RealT t0, t1; + const bool hit = this->intersects(bbox, t0, t1); + if (hit) { + mTimeSpan.set(t0, t1); + } + return hit; + } + + /// @brief Return true if the Ray intersects the plane specified + /// by a normal and distance from the origin. + /// + /// @param normal Normal of the plane. + /// @param distance Distance of the plane to the origin. + /// @param t Time of intersection, if one exists. + __hostdev__ bool intersects(const Vec3T& normal, RealT distance, RealT& t) const + { + const RealT cosAngle = mDir.dot(normal); + if (isApproxZero(cosAngle)) { + return false; // ray is parallel to plane + } + t = (distance - mEye.dot(normal)) / cosAngle; + return this->test(t); + } + + /// @brief Return true if the Ray intersects the plane specified + /// by a normal and point. + /// + /// @param normal Normal of the plane. + /// @param point Point in the plane. + /// @param t Time of intersection, if one exists. + __hostdev__ bool intersects(const Vec3T& normal, const Vec3T& point, RealT& t) const + { + return this->intersects(normal, point.dot(normal), t); + } + +private: + Vec3T mEye, mDir, mInvDir; + TimeSpan mTimeSpan; + int mSign[3]; +}; // end of Ray class + +} // namespace math ========================================================= + +template +using Ray [[deprecated("Use nanovdb::math::Ray instead")]] = math::Ray; + +} // namespace nanovdb ======================================================= + +#endif // NANOVDB_MATH_RAY_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/SampleFromVoxels.h b/external/nanovdb/math/SampleFromVoxels.h new file mode 100644 index 00000000..d183f74a --- /dev/null +++ b/external/nanovdb/math/SampleFromVoxels.h @@ -0,0 +1,996 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +////////////////////////////////////////////////////////////////////////// +/// +/// @file SampleFromVoxels.h +/// +/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler +/// +/// @note These interpolators employ internal caching for better performance when used repeatedly +/// in the same voxel location, so try to reuse an instance of these classes more than once. +/// +/// @warning While all the interpolators defined below work with both scalars and vectors +/// values (e.g. float and Vec3) TrilinarSampler::zeroCrossing and +/// Trilinear::gradient will only compile with floating point value types. +/// +/// @author Ken Museth +/// +/////////////////////////////////////////////////////////////////////////// + +#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED +#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED + +// Only define __hostdev__ when compiling as NVIDIA CUDA +#if defined(__CUDACC__) || defined(__HIP__) +#define __hostdev__ __host__ __device__ +#else +#include // for floor +#define __hostdev__ +#endif + +#include + +namespace nanovdb { + +namespace math { + +// Forward declaration of sampler with specific polynomial orders +template +class SampleFromVoxels; + +/// @brief Factory free-function for a sampler of specific polynomial orders +/// +/// @details This allows for the compact syntax: +/// @code +/// auto acc = grid.getAccessor(); +/// auto smp = nanovdb::math::createSampler<1>( acc ); +/// @endcode +template +__hostdev__ SampleFromVoxels createSampler(const TreeOrAccT& acc) +{ + return SampleFromVoxels(acc); +} + +/// @brief Utility function that returns the Coord of the round-down of @a xyz +/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz); + +/// @brief Template specialization of Floor for Vec3 +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz) +{ + const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])}; + xyz[0] -= ijk[0]; + xyz[1] -= ijk[1]; + xyz[2] -= ijk[2]; + return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); +} + +/// @brief Template specialization of Floor for Vec3 +template class Vec3T> +__hostdev__ inline CoordT Floor(Vec3T& xyz) +{ + const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])}; + xyz[0] -= ijk[0]; + xyz[1] -= ijk[1]; + xyz[2] -= ijk[2]; + return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2])); +} + +// ------------------------------> NearestNeighborSampler <-------------------------------------- + +/// @brief Nearest neighbor, i.e. zero order, interpolator with caching +template +class SampleFromVoxels +{ +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + static const int ORDER = 0; + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : mAcc(acc) + , mPos(CoordT::max()) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @note xyz is in index space space + template + inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT& ijk) const; + +private: + const TreeOrAccT& mAcc; + mutable CoordT mPos; + mutable ValueT mVal; // private cache +}; // SampleFromVoxels + +/// @brief Nearest neighbor, i.e. zero order, interpolator without caching +template +class SampleFromVoxels +{ +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 0; + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : mAcc(acc) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @note xyz is in index space space + template + inline __hostdev__ ValueT operator()(const Vec3T& xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT& ijk) const { return mAcc.getValue(ijk);} + +private: + const TreeOrAccT& mAcc; +}; // SampleFromVoxels + +template +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const +{ + const CoordT ijk = math::Round(xyz); + if (ijk != mPos) { + mPos = ijk; + mVal = mAcc.getValue(mPos); + } + return mVal; +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT& ijk) const +{ + if (ijk != mPos) { + mPos = ijk; + mVal = mAcc.getValue(mPos); + } + return mVal; +} + +template +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const +{ + return mAcc.getValue(math::Round(xyz)); +} + +// ------------------------------> TrilinearSampler <-------------------------------------- + +/// @brief Tri-linear sampler, i.e. first order, interpolator +template +class TrilinearSampler +{ +protected: + const TreeOrAccT& mAcc; + +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 1; + + /// @brief Protected constructor from a Tree or ReadAccessor + __hostdev__ TrilinearSampler(const TreeOrAccT& acc) : mAcc(acc) {} + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 8 values + inline __hostdev__ void stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]); + + template class Vec3T> + static inline __hostdev__ Vec3T gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]); + + static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[2][2][2]); +}; // TrilinearSamplerBase + +template +__hostdev__ void TrilinearSampler::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const +{ + v[0][0][0] = mAcc.getValue(ijk); // i, j, k + + ijk[2] += 1; + v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1 + + ijk[1] += 1; + v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1 + + ijk[2] -= 1; + v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k + + ijk[0] += 1; + ijk[1] -= 1; + v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k + + ijk[2] += 1; + v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1 + + ijk[1] += 1; + v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1 + + ijk[2] -= 1; + v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler::sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]) +{ +#if 0 + auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a + //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b +#else + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; +#endif + return lerp(lerp(lerp(v[0][0][0], v[0][0][1], uvw[2]), lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]), + lerp(lerp(v[1][0][0], v[1][0][1], uvw[2]), lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]), + uvw[0]); +} + +template +template class Vec3T> +__hostdev__ Vec3T TrilinearSampler::gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::gradient requires a floating-point type"); +#if 0 + auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a + //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b +#else + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; +#endif + + ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]}; + + // Z component + Vec3T grad(0, 0, lerp(lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0])); + + const ValueT w = ValueT(uvw[2]); + D[0] = v[0][0][0] + D[0] * w; + D[1] = v[0][1][0] + D[1] * w; + D[2] = v[1][0][0] + D[2] * w; + D[3] = v[1][1][0] + D[3] * w; + + // X component + grad[0] = lerp(D[2], D[3], uvw[1]) - lerp(D[0], D[1], uvw[1]); + + // Y component + grad[1] = lerp(D[1] - D[0], D[3] - D[2], uvw[0]); + + return grad; +} + +template +__hostdev__ bool TrilinearSampler::zeroCrossing(const ValueT (&v)[2][2][2]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); + return (less ^ (v[0][0][1] < ValueT(0))) || + (less ^ (v[0][1][1] < ValueT(0))) || + (less ^ (v[0][1][0] < ValueT(0))) || + (less ^ (v[1][0][0] < ValueT(0))) || + (less ^ (v[1][0][1] < ValueT(0))) || + (less ^ (v[1][1][1] < ValueT(0))) || + (less ^ (v[1][1][0] < ValueT(0))); +} + +/// @brief Template specialization that does not use caching of stencil points +template +class SampleFromVoxels : public TrilinearSampler +{ + using BaseT = TrilinearSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + /// @note ijk is in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + + /// @brief Return the gradient in index space. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ Vec3T gradient(Vec3T xyz) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + +}; // SampleFromVoxels + +/// @brief Template specialization with caching of stencil values +template +class SampleFromVoxels : public TrilinearSampler +{ + using BaseT = TrilinearSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mVal[2][2][2]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + // @note ijk is in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const; + + /// @brief Return the gradient in index space. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ Vec3T gradient(Vec3T xyz) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + + /// @brief Return true if the cached tri-linear stencil has a zero crossing. + /// + /// @warning Will only compile with floating point value types + __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mVal); +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const +{ + return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk); +} + +template +template class Vec3T> +__hostdev__ Vec3T SampleFromVoxels::gradient(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::gradient(xyz, mVal); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::zeroCrossing(mVal); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mVal); + } +} + +#if 0 + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::sample(xyz, val); +} + +#else + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; + + CoordT coord = Floor(xyz); + + ValueT vx, vx1, vy, vy1, vz, vz1; + + vz = BaseT::mAcc.getValue(coord); + coord[2] += 1; + vz1 = BaseT::mAcc.getValue(coord); + vy = lerp(vz, vz1, xyz[2]); + + coord[1] += 1; + + vz1 = BaseT::mAcc.getValue(coord); + coord[2] -= 1; + vz = BaseT::mAcc.getValue(coord); + vy1 = lerp(vz, vz1, xyz[2]); + + vx = lerp(vy, vy1, xyz[1]); + + coord[0] += 1; + + vz = BaseT::mAcc.getValue(coord); + coord[2] += 1; + vz1 = BaseT::mAcc.getValue(coord); + vy1 = lerp(vz, vz1, xyz[2]); + + coord[1] -= 1; + + vz1 = BaseT::mAcc.getValue(coord); + coord[2] -= 1; + vz = BaseT::mAcc.getValue(coord); + vy = lerp(vz, vz1, xyz[2]); + + vx1 = lerp(vy, vy1, xyz[1]); + + return lerp(vx, vx1, xyz[0]); +} +#endif + + +template +template class Vec3T> +__hostdev__ inline Vec3T SampleFromVoxels::gradient(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::gradient(xyz, val); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::zeroCrossing(val); +} + +// ------------------------------> TriquadraticSampler <-------------------------------------- + +/// @brief Tri-quadratic sampler, i.e. second order, interpolator +template +class TriquadraticSampler +{ +protected: + const TreeOrAccT& mAcc; + +public: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + static const int ORDER = 1; + + /// @brief Protected constructor from a Tree or ReadAccessor + __hostdev__ TriquadraticSampler(const TreeOrAccT& acc) : mAcc(acc) {} + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 27 values + inline __hostdev__ void stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]); + + static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[3][3][3]); +}; // TriquadraticSamplerBase + +template +__hostdev__ void TriquadraticSampler::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const +{ + CoordT p(ijk[0] - 1, 0, 0); + for (int dx = 0; dx < 3; ++dx, ++p[0]) { + p[1] = ijk[1] - 1; + for (int dy = 0; dy < 3; ++dy, ++p[1]) { + p[2] = ijk[2] - 1; + for (int dz = 0; dz < 3; ++dz, ++p[2]) { + v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values + } + } + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler::sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]) +{ + auto kernel = [](const ValueT* value, double weight)->ValueT { + return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + + 0.5f * (value[2] - value[0])) + value[1]; + }; + + ValueT vx[3]; + for (int dx = 0; dx < 3; ++dx) { + ValueT vy[3]; + for (int dy = 0; dy < 3; ++dy) { + vy[dy] = kernel(&v[dx][dy][0], uvw[2]); + }//loop over y + vx[dx] = kernel(vy, uvw[1]); + }//loop over x + return kernel(vx, uvw[0]); +} + +template +__hostdev__ bool TriquadraticSampler::zeroCrossing(const ValueT (&v)[3][3][3]) +{ + static_assert(util::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); + for (int dx = 0; dx < 3; ++dx) { + for (int dy = 0; dy < 3; ++dy) { + for (int dz = 0; dz < 3; ++dz) { + if (less ^ (v[dx][dy][dz] < ValueT(0))) return true; + } + } + } + return false; +} + +/// @brief Template specialization that does not use caching of stencil points +template +class SampleFromVoxels : public TriquadraticSampler +{ + using BaseT = TriquadraticSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + +}; // SampleFromVoxels + +/// @brief Template specialization with caching of stencil values +template +class SampleFromVoxels : public TriquadraticSampler +{ + using BaseT = TriquadraticSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mVal[3][3][3]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; +public: + + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){} + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + inline __hostdev__ ValueT operator()(const CoordT &ijk) const; + + /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position. + /// + /// @warning Will only compile with floating point value types + template class Vec3T> + inline __hostdev__ bool zeroCrossing(Vec3T xyz) const; + + /// @brief Return true if the cached tri-linear stencil has a zero crossing. + /// + /// @warning Will only compile with floating point value types + __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); } + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mVal); +} + +template +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const +{ + return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::zeroCrossing(mVal); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mVal); + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::sample(xyz, val); +} + +template +template class Vec3T> +__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const +{ + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, val); + return BaseT::zeroCrossing(val); +} + +// ------------------------------> TricubicSampler <-------------------------------------- + +/// @brief Tri-cubic sampler, i.e. third order, interpolator. +/// +/// @details See the following paper for implementation details: +/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions. +/// In: International Journal for Numerical Methods +/// in Engineering (2005), No. 63, p. 455-471 + +template +class TricubicSampler +{ +protected: + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + const TreeOrAccT& mAcc; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ TricubicSampler(const TreeOrAccT& acc) + : mAcc(acc) + { + } + + __hostdev__ const TreeOrAccT& accessor() const { return mAcc; } + + /// @brief Extract the stencil of 8 values + inline __hostdev__ void stencil(const CoordT& ijk, ValueT (&c)[64]) const; + + template class Vec3T> + static inline __hostdev__ ValueT sample(const Vec3T &uvw, const ValueT (&c)[64]); +}; // TricubicSampler + +template +__hostdev__ void TricubicSampler::stencil(const CoordT& ijk, ValueT (&C)[64]) const +{ + auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }; + + // fetch 64 point stencil values + for (int i = -1; i < 3; ++i) { + for (int j = -1; j < 3; ++j) { + fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1)); + fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0)); + fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1)); + fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2)); + } + } + const ValueT half(0.5), quarter(0.25), eighth(0.125); + const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value). + fetch(0, 0, 0), + fetch(1, 0, 0), + fetch(0, 1, 0), + fetch(1, 1, 0), + fetch(0, 0, 1), + fetch(1, 0, 1), + fetch(0, 1, 1), + fetch(1, 1, 1), + // values of df/dx at the 8 corners (each from 2 stencil values). + half * (fetch(1, 0, 0) - fetch(-1, 0, 0)), + half * (fetch(2, 0, 0) - fetch(0, 0, 0)), + half * (fetch(1, 1, 0) - fetch(-1, 1, 0)), + half * (fetch(2, 1, 0) - fetch(0, 1, 0)), + half * (fetch(1, 0, 1) - fetch(-1, 0, 1)), + half * (fetch(2, 0, 1) - fetch(0, 0, 1)), + half * (fetch(1, 1, 1) - fetch(-1, 1, 1)), + half * (fetch(2, 1, 1) - fetch(0, 1, 1)), + // values of df/dy at the 8 corners (each from 2 stencil values). + half * (fetch(0, 1, 0) - fetch(0, -1, 0)), + half * (fetch(1, 1, 0) - fetch(1, -1, 0)), + half * (fetch(0, 2, 0) - fetch(0, 0, 0)), + half * (fetch(1, 2, 0) - fetch(1, 0, 0)), + half * (fetch(0, 1, 1) - fetch(0, -1, 1)), + half * (fetch(1, 1, 1) - fetch(1, -1, 1)), + half * (fetch(0, 2, 1) - fetch(0, 0, 1)), + half * (fetch(1, 2, 1) - fetch(1, 0, 1)), + // values of df/dz at the 8 corners (each from 2 stencil values). + half * (fetch(0, 0, 1) - fetch(0, 0, -1)), + half * (fetch(1, 0, 1) - fetch(1, 0, -1)), + half * (fetch(0, 1, 1) - fetch(0, 1, -1)), + half * (fetch(1, 1, 1) - fetch(1, 1, -1)), + half * (fetch(0, 0, 2) - fetch(0, 0, 0)), + half * (fetch(1, 0, 2) - fetch(1, 0, 0)), + half * (fetch(0, 1, 2) - fetch(0, 1, 0)), + half * (fetch(1, 1, 2) - fetch(1, 1, 0)), + // values of d2f/dxdy at the 8 corners (each from 4 stencil values). + quarter * (fetch(1, 1, 0) - fetch(-1, 1, 0) - fetch(1, -1, 0) + fetch(-1, -1, 0)), + quarter * (fetch(2, 1, 0) - fetch(0, 1, 0) - fetch(2, -1, 0) + fetch(0, -1, 0)), + quarter * (fetch(1, 2, 0) - fetch(-1, 2, 0) - fetch(1, 0, 0) + fetch(-1, 0, 0)), + quarter * (fetch(2, 2, 0) - fetch(0, 2, 0) - fetch(2, 0, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1)), + quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1)), + quarter * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1)), + quarter * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1)), + // values of d2f/dxdz at the 8 corners (each from 4 stencil values). + quarter * (fetch(1, 0, 1) - fetch(-1, 0, 1) - fetch(1, 0, -1) + fetch(-1, 0, -1)), + quarter * (fetch(2, 0, 1) - fetch(0, 0, 1) - fetch(2, 0, -1) + fetch(0, 0, -1)), + quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1)), + quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1)), + quarter * (fetch(1, 0, 2) - fetch(-1, 0, 2) - fetch(1, 0, 0) + fetch(-1, 0, 0)), + quarter * (fetch(2, 0, 2) - fetch(0, 0, 2) - fetch(2, 0, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0)), + quarter * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0)), + // values of d2f/dydz at the 8 corners (each from 4 stencil values). + quarter * (fetch(0, 1, 1) - fetch(0, -1, 1) - fetch(0, 1, -1) + fetch(0, -1, -1)), + quarter * (fetch(1, 1, 1) - fetch(1, -1, 1) - fetch(1, 1, -1) + fetch(1, -1, -1)), + quarter * (fetch(0, 2, 1) - fetch(0, 0, 1) - fetch(0, 2, -1) + fetch(0, 0, -1)), + quarter * (fetch(1, 2, 1) - fetch(1, 0, 1) - fetch(1, 2, -1) + fetch(1, 0, -1)), + quarter * (fetch(0, 1, 2) - fetch(0, -1, 2) - fetch(0, 1, 0) + fetch(0, -1, 0)), + quarter * (fetch(1, 1, 2) - fetch(1, -1, 2) - fetch(1, 1, 0) + fetch(1, -1, 0)), + quarter * (fetch(0, 2, 2) - fetch(0, 0, 2) - fetch(0, 2, 0) + fetch(0, 0, 0)), + quarter * (fetch(1, 2, 2) - fetch(1, 0, 2) - fetch(1, 2, 0) + fetch(1, 0, 0)), + // values of d3f/dxdydz at the 8 corners (each from 8 stencil values). + eighth * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1) + fetch(1, -1, -1) - fetch(-1, -1, -1)), + eighth * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1) + fetch(2, -1, -1) - fetch(0, -1, -1)), + eighth * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1) - fetch(1, 2, -1) + fetch(-1, 2, -1) + fetch(1, 0, -1) - fetch(-1, 0, -1)), + eighth * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1) - fetch(2, 2, -1) + fetch(0, 2, -1) + fetch(2, 0, -1) - fetch(0, 0, -1)), + eighth * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, -1, 2) + fetch(-1, -1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0) + fetch(1, -1, 0) - fetch(-1, -1, 0)), + eighth * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, -1, 2) + fetch(0, -1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0) + fetch(2, -1, 0) - fetch(0, -1, 0)), + eighth * (fetch(1, 2, 2) - fetch(-1, 2, 2) - fetch(1, 0, 2) + fetch(-1, 0, 2) - fetch(1, 2, 0) + fetch(-1, 2, 0) + fetch(1, 0, 0) - fetch(-1, 0, 0)), + eighth * (fetch(2, 2, 2) - fetch(0, 2, 2) - fetch(2, 0, 2) + fetch(0, 0, 2) - fetch(2, 2, 0) + fetch(0, 2, 0) + fetch(2, 0, 0) - fetch(0, 0, 0))}; + + // 4Kb of static table (int8_t has a range of -127 -> 127 which suffices) + static const int8_t A[64][64] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}, + {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0}, + {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0}, + {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1}, + {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1}, + {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0}, + {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1}, + {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1}, + {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0}, + {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0}, + {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1}, + {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1}, + {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0}, + {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1}, + {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}}; + + for (int i = 0; i < 64; ++i) { // C = A * X + C[i] = ValueT(0); +#if 0 + for (int j = 0; j < 64; j += 4) { + C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i])))); + } +#else + for (int j = 0; j < 64; j += 4) { + C[i] += A[i][j] * X[j] + A[i][j + 1] * X[j + 1] + A[i][j + 2] * X[j + 2] + A[i][j + 3] * X[j + 3]; + } +#endif + } +} + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType TricubicSampler::sample(const Vec3T &xyz, const ValueT (&C)[64]) +{ + ValueT zPow(1), sum(0); + for (int k = 0, n = 0; k < 4; ++k) { + ValueT yPow(1); + for (int j = 0; j < 4; ++j, n += 4) { +#if 0 + sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum); +#else + sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3]))); +#endif + yPow *= xyz[1]; + } + zPow *= xyz[2]; + } + return sum; +} + +template +class SampleFromVoxels : public TricubicSampler +{ + using BaseT = TricubicSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + + mutable CoordT mPos; + mutable ValueT mC[64]; + + template class Vec3T> + __hostdev__ void cache(Vec3T& xyz) const; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : BaseT(acc) + { + } + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + // @brief Return value at the coordinate @a ijk in index space space + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + this->cache(xyz); + return BaseT::sample(xyz, mC); +} + +template +template class Vec3T> +__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const +{ + CoordT ijk = Floor(xyz); + if (ijk != mPos) { + mPos = ijk; + BaseT::stencil(ijk, mC); + } +} + +template +class SampleFromVoxels : public TricubicSampler +{ + using BaseT = TricubicSampler; + using ValueT = typename TreeOrAccT::ValueType; + using CoordT = typename TreeOrAccT::CoordType; + +public: + /// @brief Construction from a Tree or ReadAccessor + __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) + : BaseT(acc) + { + } + + /// @note xyz is in index space space + template class Vec3T> + inline __hostdev__ ValueT operator()(Vec3T xyz) const; + + __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);} + +}; // SampleFromVoxels + +template +template class Vec3T> +__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const +{ + ValueT C[64]; + CoordT ijk = Floor(xyz); + BaseT::stencil(ijk, C); + return BaseT::sample(xyz, C); +} + +}// namespace math + +template +[[deprecated("Use nanovdb::math::createSampler instead")]] +__hostdev__ math::SampleFromVoxels createSampler(const TreeOrAccT& acc) +{ + return math::SampleFromVoxels(acc); +} + +} // namespace nanovdb + +#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/math/Stencils.h b/external/nanovdb/math/Stencils.h new file mode 100644 index 00000000..e4663810 --- /dev/null +++ b/external/nanovdb/math/Stencils.h @@ -0,0 +1,1032 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 +// +/// @author Ken Museth +/// +/// @date April 9, 2021 +/// +/// @file Stencils.h +/// +/// @brief Defines various finite-difference stencils that allow for the +/// computation of gradients of order 1 to 5, mean curvatures, +/// gaussian curvatures, principal curvatures, tri-linear interpolation, +/// zero-crossing, laplacian, and closest point transform. + +#ifndef NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED + +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 + +namespace nanovdb { + +namespace math { + +// ---------------------------- WENO5 ---------------------------- + +/// @brief Implementation of nominally fifth-order finite-difference WENO +/// @details This function returns the numerical flux. See "High Order Finite Difference and +/// Finite Volume WENO Schemes and Discontinuous Galerkin Methods for CFD" - Chi-Wang Shu +/// ICASE Report No 2001-11 (page 6). Also see ICASE No 97-65 for a more complete reference +/// (Shu, 1997). +/// Given v1 = f(x-2dx), v2 = f(x-dx), v3 = f(x), v4 = f(x+dx) and v5 = f(x+2dx), +/// return an interpolated value f(x+dx/2) with the special property that +/// ( f(x+dx/2) - f(x-dx/2) ) / dx = df/dx (x) + error, +/// where the error is fifth-order in smooth regions: O(dx) <= error <=O(dx^5) +template +__hostdev__ inline ValueType +WENO5(const ValueType& v1, + const ValueType& v2, + const ValueType& v3, + const ValueType& v4, + const ValueType& v5, + RealT scale2 = 1.0)// openvdb uses scale2 = 0.01 +{ + static const RealT C = 13.0 / 12.0; + // WENO is formulated for non-dimensional equations, here the optional scale2 + // is a reference value (squared) for the function being interpolated. For + // example if 'v' is of order 1000, then scale2 = 10^6 is ok. But in practice + // leave scale2 = 1. + const RealT eps = RealT(1.0e-6) * scale2; + // {\tilde \omega_k} = \gamma_k / ( \beta_k + \epsilon)^2 in Shu's ICASE report) + const RealT A1 = RealT(0.1)/Pow2(C*Pow2(v1-2*v2+v3)+RealT(0.25)*Pow2(v1-4*v2+3*v3)+eps), + A2 = RealT(0.6)/Pow2(C*Pow2(v2-2*v3+v4)+RealT(0.25)*Pow2(v2-v4)+eps), + A3 = RealT(0.3)/Pow2(C*Pow2(v3-2*v4+v5)+RealT(0.25)*Pow2(3*v3-4*v4+v5)+eps); + + return static_cast((A1*(2*v1 - 7*v2 + 11*v3) + + A2*(5*v3 - v2 + 2*v4) + + A3*(2*v3 + 5*v4 - v5))/(6*(A1+A2+A3))); +} + +// ---------------------------- GodunovsNormSqrd ---------------------------- + +template +__hostdev__ inline RealT +GodunovsNormSqrd(bool isOutside, + RealT dP_xm, RealT dP_xp, + RealT dP_ym, RealT dP_yp, + RealT dP_zm, RealT dP_zp) +{ + RealT dPLen2; + if (isOutside) { // outside + dPLen2 = Max(Pow2(Max(dP_xm, RealT(0))), Pow2(Min(dP_xp, RealT(0)))); // (dP/dx)2 + dPLen2 += Max(Pow2(Max(dP_ym, RealT(0))), Pow2(Min(dP_yp, RealT(0)))); // (dP/dy)2 + dPLen2 += Max(Pow2(Max(dP_zm, RealT(0))), Pow2(Min(dP_zp, RealT(0)))); // (dP/dz)2 + } else { // inside + dPLen2 = Max(Pow2(Min(dP_xm, RealT(0))), Pow2(Max(dP_xp, RealT(0)))); // (dP/dx)2 + dPLen2 += Max(Pow2(Min(dP_ym, RealT(0))), Pow2(Max(dP_yp, RealT(0)))); // (dP/dy)2 + dPLen2 += Max(Pow2(Min(dP_zm, RealT(0))), Pow2(Max(dP_zp, RealT(0)))); // (dP/dz)2 + } + return dPLen2; // |\nabla\phi|^2 +} + +template +__hostdev__ inline RealT +GodunovsNormSqrd(bool isOutside, + const Vec3& gradient_m, + const Vec3& gradient_p) +{ + return GodunovsNormSqrd(isOutside, + gradient_m[0], gradient_p[0], + gradient_m[1], gradient_p[1], + gradient_m[2], gradient_p[2]); +} + +// ---------------------------- BaseStencil ---------------------------- + +// BaseStencil uses curiously recurring template pattern (CRTP) +template +class BaseStencil +{ +public: + using ValueType = typename GridT::ValueType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using AccessorType = typename GridT::AccessorType;// ReadAccessor; + + /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) + /// and its neighbors. + /// @param ijk Index coordinates of stencil center + __hostdev__ inline void moveTo(const Coord& ijk) + { + mCenter = ijk; + mValues[0] = mAcc.getValue(ijk); + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel (i, j, k) + /// and its neighbors. The method also takes a value of the center + /// element of the stencil, assuming it is already known. + /// @param ijk Index coordinates of stencil center + /// @param centerValue Value of the center element of the stencil + __hostdev__ inline void moveTo(const Coord& ijk, const ValueType& centerValue) + { + mCenter = ijk; + mValues[0] = centerValue; + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel + /// (x, y, z) and its neighbors. + /// + /// @note This version is slightly faster than the one above, since + /// the center voxel's value is read directly from the iterator. + template + __hostdev__ inline void moveTo(const IterType& iter) + { + mCenter = iter.getCoord(); + mValues[0] = *iter; + static_cast(*this).init(mCenter); + } + + /// @brief Initialize the stencil buffer with the values of voxel (x, y, z) + /// and its neighbors. + /// @param xyz Floating point voxel coordinates of stencil center + /// @details This method will check to see if it is necessary to + /// update the stencil based on the cached index coordinates of + /// the center point. + template + __hostdev__ inline void moveTo(const Vec3& xyz) + { + Coord ijk = RoundDown(xyz); + if (ijk != mCenter) this->moveTo(ijk); + } + + /// @brief Return the value from the stencil buffer with linear + /// offset pos. + /// + /// @note The default (@a pos = 0) corresponds to the first element + /// which is typically the center point of the stencil. + __hostdev__ inline const ValueType& getValue(unsigned int pos = 0) const + { + NANOVDB_ASSERT(pos < SIZE); + return mValues[pos]; + } + + /// @brief Return the value at the specified location relative to the center of the stencil + template + __hostdev__ inline const ValueType& getValue() const + { + return mValues[static_cast(*this).template pos()]; + } + + /// @brief Set the value at the specified location relative to the center of the stencil + template + __hostdev__ inline void setValue(const ValueType& value) + { + mValues[static_cast(*this).template pos()] = value; + } + + /// @brief Return the size of the stencil buffer. + __hostdev__ static int size() { return SIZE; } + + /// @brief Return the mean value of the current stencil. + __hostdev__ inline ValueType mean() const + { + ValueType sum = 0.0; + for (int i = 0; i < SIZE; ++i) sum += mValues[i]; + return sum / ValueType(SIZE); + } + + /// @brief Return the smallest value in the stencil buffer. + __hostdev__ inline ValueType min() const + { + ValueType v = mValues[0]; + for (int i=1; i v) v = mValues[i]; + } + return v; + } + + /// @brief Return the coordinates of the center point of the stencil. + __hostdev__ inline const Coord& getCenterCoord() const { return mCenter; } + + /// @brief Return the value at the center of the stencil + __hostdev__ inline const ValueType& getCenterValue() const { return mValues[0]; } + + /// @brief Return true if the center of the stencil intersects the + /// iso-contour specified by the isoValue + __hostdev__ inline bool intersects(const ValueType &isoValue = ValueType(0) ) const + { + const bool less = this->getValue< 0, 0, 0>() < isoValue; + return (less ^ (this->getValue<-1, 0, 0>() < isoValue)) || + (less ^ (this->getValue< 1, 0, 0>() < isoValue)) || + (less ^ (this->getValue< 0,-1, 0>() < isoValue)) || + (less ^ (this->getValue< 0, 1, 0>() < isoValue)) || + (less ^ (this->getValue< 0, 0,-1>() < isoValue)) || + (less ^ (this->getValue< 0, 0, 1>() < isoValue)) ; + } + struct Mask { + uint8_t bits; + __hostdev__ Mask() : bits(0u) {} + __hostdev__ void set(int i) { bits |= (1 << i); } + __hostdev__ bool test(int i) const { return bits & (1 << i); } + __hostdev__ bool any() const { return bits > 0u; } + __hostdev__ bool all() const { return bits == 255u; } + __hostdev__ bool none() const { return bits == 0u; } + __hostdev__ int count() const { return util::countOn(bits); } + };// Mask + + /// @brief Return true a bit-mask where the 6 lower bits indicates if the + /// center of the stencil intersects the iso-contour specified by the isoValue. + /// + /// @note There are 2^6 = 64 different possible cases, including no intersections! + /// + /// @details The ordering of bit mask is ( -x, +x, -y, +y, -z, +z ), so to + /// check if there is an intersection in -y use (mask & (1u<<2)) where mask is + /// ther return value from this function. To check if there are any + /// intersections use mask!=0u, and for no intersections use mask==0u. + /// To count the number of intersections use __builtin_popcount(mask). + __hostdev__ inline Mask intersectionMask(ValueType isoValue = ValueType(0)) const + { + Mask mask; + const bool less = this->getValue< 0, 0, 0>() < isoValue; + if (less ^ (this->getValue<-1, 0, 0>() < isoValue)) mask.set(0);// |= 1u; + if (less ^ (this->getValue< 1, 0, 0>() < isoValue)) mask.set(1);// |= 2u; + if (less ^ (this->getValue< 0,-1, 0>() < isoValue)) mask.set(2);// |= 4u; + if (less ^ (this->getValue< 0, 1, 0>() < isoValue)) mask.set(3);// |= 8u; + if (less ^ (this->getValue< 0, 0,-1>() < isoValue)) mask.set(4);// |= 16u; + if (less ^ (this->getValue< 0, 0, 1>() < isoValue)) mask.set(5);// |= 32u; + return mask; + } + + /// @brief Return a const reference to the grid from which this + /// stencil was constructed. + __hostdev__ inline const GridType& grid() const { return *mGrid; } + + /// @brief Return a const reference to the ValueAccessor + /// associated with this Stencil. + __hostdev__ inline const AccessorType& accessor() const { return mAcc; } + +protected: + // Constructor is protected to prevent direct instantiation. + __hostdev__ BaseStencil(const GridType& grid) + : mGrid(&grid) + , mAcc(grid) + , mCenter(Coord::max()) + { + } + + const GridType* mGrid; + AccessorType mAcc; + ValueType mValues[SIZE]; + Coord mCenter; + +}; // BaseStencil class + + +// ---------------------------- BoxStencil ---------------------------- + + +namespace { // anonymous namespace for stencil-layout map + + // the eight point box stencil + template struct BoxPt {}; + template<> struct BoxPt< 0, 0, 0> { enum { idx = 0 }; }; + template<> struct BoxPt< 0, 0, 1> { enum { idx = 1 }; }; + template<> struct BoxPt< 0, 1, 1> { enum { idx = 2 }; }; + template<> struct BoxPt< 0, 1, 0> { enum { idx = 3 }; }; + template<> struct BoxPt< 1, 0, 0> { enum { idx = 4 }; }; + template<> struct BoxPt< 1, 0, 1> { enum { idx = 5 }; }; + template<> struct BoxPt< 1, 1, 1> { enum { idx = 6 }; }; + template<> struct BoxPt< 1, 1, 0> { enum { idx = 7 }; }; + +} + +template +class BoxStencil: public BaseStencil, 8, GridT> +{ + using SelfT = BoxStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 8; + + __hostdev__ BoxStencil(const GridType& grid) : BaseType(grid) {} + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return BoxPt::idx; } + + /// @brief Return true if the center of the stencil intersects the + /// iso-contour specified by the isoValue + __hostdev__ inline bool intersects(ValueType isoValue = ValueType(0)) const + { + const bool less = mValues[0] < isoValue; + return (less ^ (mValues[1] < isoValue)) || + (less ^ (mValues[2] < isoValue)) || + (less ^ (mValues[3] < isoValue)) || + (less ^ (mValues[4] < isoValue)) || + (less ^ (mValues[5] < isoValue)) || + (less ^ (mValues[6] < isoValue)) || + (less ^ (mValues[7] < isoValue)) ; + } + + /// @brief Return the trilinear interpolation at the normalized position. + /// @param xyz Floating point coordinate position. Index space and NOT world space. + /// @warning It is assumed that the stencil has already been moved + /// to the relevant voxel position, e.g. using moveTo(xyz). + /// @note Trilinear interpolation kernal reads as: + /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw + /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw + __hostdev__ inline ValueType interpolation(const Vec3& xyz) const + { + const ValueType u = xyz[0] - mCenter[0]; + const ValueType v = xyz[1] - mCenter[1]; + const ValueType w = xyz[2] - mCenter[2]; + + NANOVDB_ASSERT(u>=0 && u<=1); + NANOVDB_ASSERT(v>=0 && v<=1); + NANOVDB_ASSERT(w>=0 && w<=1); + + ValueType V = BaseType::template getValue<0,0,0>(); + ValueType A = V + (BaseType::template getValue<0,0,1>() - V) * w; + V = BaseType::template getValue< 0, 1, 0>(); + ValueType B = V + (BaseType::template getValue<0,1,1>() - V) * w; + ValueType C = A + (B - A) * v; + + V = BaseType::template getValue<1,0,0>(); + A = V + (BaseType::template getValue<1,0,1>() - V) * w; + V = BaseType::template getValue<1,1,0>(); + B = V + (BaseType::template getValue<1,1,1>() - V) * w; + ValueType D = A + (B - A) * v; + + return C + (D - C) * u; + } + + /// @brief Return the gradient in world space of the trilinear interpolation kernel. + /// @param xyz Floating point coordinate position. + /// @warning It is assumed that the stencil has already been moved + /// to the relevant voxel position, e.g. using moveTo(xyz). + /// @note Computed as partial derivatives of the trilinear interpolation kernel: + /// v000 (1-u)(1-v)(1-w) + v001 (1-u)(1-v)w + v010 (1-u)v(1-w) + v011 (1-u)vw + /// + v100 u(1-v)(1-w) + v101 u(1-v)w + v110 uv(1-w) + v111 uvw + __hostdev__ inline Vec3 gradient(const Vec3& xyz) const + { + const ValueType u = xyz[0] - mCenter[0]; + const ValueType v = xyz[1] - mCenter[1]; + const ValueType w = xyz[2] - mCenter[2]; + + NANOVDB_ASSERT(u>=0 && u<=1); + NANOVDB_ASSERT(v>=0 && v<=1); + NANOVDB_ASSERT(w>=0 && w<=1); + + ValueType D[4]={BaseType::template getValue<0,0,1>()-BaseType::template getValue<0,0,0>(), + BaseType::template getValue<0,1,1>()-BaseType::template getValue<0,1,0>(), + BaseType::template getValue<1,0,1>()-BaseType::template getValue<1,0,0>(), + BaseType::template getValue<1,1,1>()-BaseType::template getValue<1,1,0>()}; + + // Z component + ValueType A = D[0] + (D[1]- D[0]) * v; + ValueType B = D[2] + (D[3]- D[2]) * v; + Vec3 grad(0, 0, A + (B - A) * u); + + D[0] = BaseType::template getValue<0,0,0>() + D[0] * w; + D[1] = BaseType::template getValue<0,1,0>() + D[1] * w; + D[2] = BaseType::template getValue<1,0,0>() + D[2] * w; + D[3] = BaseType::template getValue<1,1,0>() + D[3] * w; + + // X component + A = D[0] + (D[1] - D[0]) * v; + B = D[2] + (D[3] - D[2]) * v; + + grad[0] = B - A; + + // Y component + A = D[1] - D[0]; + B = D[3] - D[2]; + + grad[1] = A + (B - A) * u; + + return BaseType::mGrid->map().applyIJT(grad); + } + +private: + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 1, 1, 1)); + mValues[ 7] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + using BaseType::mCenter; +};// BoxStencil class + + +// ---------------------------- GradStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct GradPt {}; + template<> struct GradPt< 0, 0, 0> { enum { idx = 0 }; }; + template<> struct GradPt< 1, 0, 0> { enum { idx = 2 }; }; + template<> struct GradPt< 0, 1, 0> { enum { idx = 4 }; }; + template<> struct GradPt< 0, 0, 1> { enum { idx = 6 }; }; + template<> struct GradPt<-1, 0, 0> { enum { idx = 1 }; }; + template<> struct GradPt< 0,-1, 0> { enum { idx = 3 }; }; + template<> struct GradPt< 0, 0,-1> { enum { idx = 5 }; }; +} + +/// This is a simple 7-point nearest neighbor stencil that supports +/// gradient by second-order central differencing, first-order upwinding, +/// Laplacian, closest-point transform and zero-crossing test. +/// +/// @note For optimal random access performance this class +/// includes its own grid accessor. +template +class GradStencil : public BaseStencil, 7, GridT> +{ + using SelfT = GradStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 7; + + __hostdev__ GradStencil(const GridType& grid) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + __hostdev__ GradStencil(const GridType& grid, double dx) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + /// @brief Return the norm square of the single-sided upwind gradient + /// (computed via Godunov's scheme) at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType normSqGrad() const + { + return mInvDx2 * GodunovsNormSqrd(mValues[0] > ValueType(0), + mValues[0] - mValues[1], + mValues[2] - mValues[0], + mValues[0] - mValues[3], + mValues[4] - mValues[0], + mValues[0] - mValues[5], + mValues[6] - mValues[0]); + } + + /// @brief Return the gradient computed at the previously buffered + /// location by second order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return Vec3(mValues[2] - mValues[1], + mValues[4] - mValues[3], + mValues[6] - mValues[5])*mInv2Dx; + } + /// @brief Return the first-order upwind gradient corresponding to the direction V. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient(const Vec3& V) const + { + return Vec3( + V[0]>0 ? mValues[0] - mValues[1] : mValues[2] - mValues[0], + V[1]>0 ? mValues[0] - mValues[3] : mValues[4] - mValues[0], + V[2]>0 ? mValues[0] - mValues[5] : mValues[6] - mValues[0])*2*mInv2Dx; + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * (mValues[1] + mValues[2] + + mValues[3] + mValues[4] + + mValues[5] + mValues[6] - 6*mValues[0]); + } + + /// Return @c true if the sign of the value at the center point of the stencil + /// is different from the signs of any of its six nearest neighbors. + __hostdev__ inline bool zeroCrossing() const + { + return (mValues[0]>0 ? (mValues[1]<0 || mValues[2]<0 || mValues[3]<0 || mValues[4]<0 || mValues[5]<0 || mValues[6]<0) + : (mValues[1]>0 || mValues[2]>0 || mValues[3]>0 || mValues[4]>0 || mValues[5]>0 || mValues[6]>0)); + } + + /// @brief Compute the closest-point transform to a level set. + /// @return the closest point in index space to the surface + /// from which the level set was derived. + /// + /// @note This method assumes that the grid represents a level set + /// with distances in world units and a simple affine transfrom + /// with uniform scaling. + __hostdev__ inline Vec3 cpt() + { + const Coord& ijk = BaseType::getCenterCoord(); + const ValueType d = ValueType(mValues[0] * 0.5 * mInvDx2); // distance in voxels / (2dx^2) + const auto value = Vec3(ijk[0] - d*(mValues[2] - mValues[1]), + ijk[1] - d*(mValues[4] - mValues[3]), + ijk[2] - d*(mValues[6] - mValues[5])); + return value; + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return GradPt::idx; } + +private: + + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0,-1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0,-1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mInv2Dx, mInvDx2; +}; // GradStencil class + + +// ---------------------------- WenoStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct WenoPt {}; + template<> struct WenoPt< 0, 0, 0> { enum { idx = 0 }; }; + + template<> struct WenoPt<-3, 0, 0> { enum { idx = 1 }; }; + template<> struct WenoPt<-2, 0, 0> { enum { idx = 2 }; }; + template<> struct WenoPt<-1, 0, 0> { enum { idx = 3 }; }; + template<> struct WenoPt< 1, 0, 0> { enum { idx = 4 }; }; + template<> struct WenoPt< 2, 0, 0> { enum { idx = 5 }; }; + template<> struct WenoPt< 3, 0, 0> { enum { idx = 6 }; }; + + template<> struct WenoPt< 0,-3, 0> { enum { idx = 7 }; }; + template<> struct WenoPt< 0,-2, 0> { enum { idx = 8 }; }; + template<> struct WenoPt< 0,-1, 0> { enum { idx = 9 }; }; + template<> struct WenoPt< 0, 1, 0> { enum { idx =10 }; }; + template<> struct WenoPt< 0, 2, 0> { enum { idx =11 }; }; + template<> struct WenoPt< 0, 3, 0> { enum { idx =12 }; }; + + template<> struct WenoPt< 0, 0,-3> { enum { idx =13 }; }; + template<> struct WenoPt< 0, 0,-2> { enum { idx =14 }; }; + template<> struct WenoPt< 0, 0,-1> { enum { idx =15 }; }; + template<> struct WenoPt< 0, 0, 1> { enum { idx =16 }; }; + template<> struct WenoPt< 0, 0, 2> { enum { idx =17 }; }; + template<> struct WenoPt< 0, 0, 3> { enum { idx =18 }; }; + +} + +/// @brief This is a special 19-point stencil that supports optimal fifth-order WENO +/// upwinding, second-order central differencing, Laplacian, and zero-crossing test. +/// +/// @note For optimal random access performance this class +/// includes its own grid accessor. +template +class WenoStencil: public BaseStencil, 19, GridT> +{ + using SelfT = WenoStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 19; + + __hostdev__ WenoStencil(const GridType& grid) + : BaseType(grid) + , mDx2(ValueType(Pow2(grid.voxelSize()[0]))) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(1.0 / mDx2)) + { + } + + __hostdev__ WenoStencil(const GridType& grid, double dx) + : BaseType(grid) + , mDx2(ValueType(dx * dx)) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(1.0 / mDx2)) + { + } + + /// @brief Return the norm-square of the WENO upwind gradient (computed via + /// WENO upwinding and Godunov's scheme) at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType normSqGrad(ValueType isoValue = ValueType(0)) const + { + const ValueType* v = mValues; + const RealT + dP_xm = WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3],v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2), + dP_xp = WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0],v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), + dP_ym = WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9],v[10]-v[ 0],v[11]-v[10],mDx2), + dP_yp = WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0],v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), + dP_zm = WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15],v[16]-v[ 0],v[17]-v[16],mDx2), + dP_zp = WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0],v[ 0]-v[15],v[15]-v[14],mDx2); + return mInvDx2*static_cast( + GodunovsNormSqrd(v[0]>isoValue, dP_xm, dP_xp, dP_ym, dP_yp, dP_zm, dP_zp)); + } + + /// Return the optimal fifth-order upwind gradient corresponding to the + /// direction V. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient(const Vec3& V) const + { + const ValueType* v = mValues; + return 2*mInv2Dx * Vec3( + V[0]>0 ? WENO5(v[ 2]-v[ 1],v[ 3]-v[ 2],v[ 0]-v[ 3], v[ 4]-v[ 0],v[ 5]-v[ 4],mDx2) + : WENO5(v[ 6]-v[ 5],v[ 5]-v[ 4],v[ 4]-v[ 0], v[ 0]-v[ 3],v[ 3]-v[ 2],mDx2), + V[1]>0 ? WENO5(v[ 8]-v[ 7],v[ 9]-v[ 8],v[ 0]-v[ 9], v[10]-v[ 0],v[11]-v[10],mDx2) + : WENO5(v[12]-v[11],v[11]-v[10],v[10]-v[ 0], v[ 0]-v[ 9],v[ 9]-v[ 8],mDx2), + V[2]>0 ? WENO5(v[14]-v[13],v[15]-v[14],v[ 0]-v[15], v[16]-v[ 0],v[17]-v[16],mDx2) + : WENO5(v[18]-v[17],v[17]-v[16],v[16]-v[ 0], v[ 0]-v[15],v[15]-v[14],mDx2)); + } + /// Return the gradient computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return mInv2Dx * Vec3(mValues[ 4] - mValues[ 3], + mValues[10] - mValues[ 9], + mValues[16] - mValues[15]); + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * ( + mValues[ 3] + mValues[ 4] + + mValues[ 9] + mValues[10] + + mValues[15] + mValues[16] - 6*mValues[0]); + } + + /// Return @c true if the sign of the value at the center point of the stencil + /// differs from the sign of any of its six nearest neighbors + __hostdev__ inline bool zeroCrossing() const + { + const ValueType* v = mValues; + return (v[ 0]>0 ? (v[ 3]<0 || v[ 4]<0 || v[ 9]<0 || v[10]<0 || v[15]<0 || v[16]<0) + : (v[ 3]>0 || v[ 4]>0 || v[ 9]>0 || v[10]>0 || v[15]>0 || v[16]>0)); + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return WenoPt::idx; } + +private: + __hostdev__ inline void init(const Coord& ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-3, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy(-2, 0, 0)); + mValues[ 3] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 2, 0, 0)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 3, 0, 0)); + + mValues[ 7] = mAcc.getValue(ijk.offsetBy( 0, -3, 0)); + mValues[ 8] = mAcc.getValue(ijk.offsetBy( 0, -2, 0)); + mValues[ 9] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); + mValues[10] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + mValues[11] = mAcc.getValue(ijk.offsetBy( 0, 2, 0)); + mValues[12] = mAcc.getValue(ijk.offsetBy( 0, 3, 0)); + + mValues[13] = mAcc.getValue(ijk.offsetBy( 0, 0, -3)); + mValues[14] = mAcc.getValue(ijk.offsetBy( 0, 0, -2)); + mValues[15] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); + mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + mValues[17] = mAcc.getValue(ijk.offsetBy( 0, 0, 2)); + mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 0, 3)); + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mDx2, mInv2Dx, mInvDx2; +}; // WenoStencil class + + +// ---------------------------- CurvatureStencil ---------------------------- + +namespace { // anonymous namespace for stencil-layout map + + template struct CurvPt {}; + template<> struct CurvPt< 0, 0, 0> { enum { idx = 0 }; }; + + template<> struct CurvPt<-1, 0, 0> { enum { idx = 1 }; }; + template<> struct CurvPt< 1, 0, 0> { enum { idx = 2 }; }; + + template<> struct CurvPt< 0,-1, 0> { enum { idx = 3 }; }; + template<> struct CurvPt< 0, 1, 0> { enum { idx = 4 }; }; + + template<> struct CurvPt< 0, 0,-1> { enum { idx = 5 }; }; + template<> struct CurvPt< 0, 0, 1> { enum { idx = 6 }; }; + + template<> struct CurvPt<-1,-1, 0> { enum { idx = 7 }; }; + template<> struct CurvPt< 1,-1, 0> { enum { idx = 8 }; }; + template<> struct CurvPt<-1, 1, 0> { enum { idx = 9 }; }; + template<> struct CurvPt< 1, 1, 0> { enum { idx =10 }; }; + + template<> struct CurvPt<-1, 0,-1> { enum { idx =11 }; }; + template<> struct CurvPt< 1, 0,-1> { enum { idx =12 }; }; + template<> struct CurvPt<-1, 0, 1> { enum { idx =13 }; }; + template<> struct CurvPt< 1, 0, 1> { enum { idx =14 }; }; + + template<> struct CurvPt< 0,-1,-1> { enum { idx =15 }; }; + template<> struct CurvPt< 0, 1,-1> { enum { idx =16 }; }; + template<> struct CurvPt< 0,-1, 1> { enum { idx =17 }; }; + template<> struct CurvPt< 0, 1, 1> { enum { idx =18 }; }; + +} + +template +class CurvatureStencil: public BaseStencil, 19, GridT> +{ + using SelfT = CurvatureStencil; + using BaseType = BaseStencil; +public: + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using ValueType = typename GridT::ValueType; + + static constexpr int SIZE = 19; + + __hostdev__ CurvatureStencil(const GridType& grid) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / grid.voxelSize()[0])) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + __hostdev__ CurvatureStencil(const GridType& grid, double dx) + : BaseType(grid) + , mInv2Dx(ValueType(0.5 / dx)) + , mInvDx2(ValueType(4.0 * mInv2Dx * mInv2Dx)) + { + } + + /// @brief Return the mean curvature at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType meanCurvature() const + { + RealT alpha, normGrad; + return this->meanCurvature(alpha, normGrad) ? + ValueType(alpha*mInv2Dx/Pow3(normGrad)) : 0; + } + + /// @brief Return the Gaussian curvature at the previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType gaussianCurvature() const + { + RealT alpha, normGrad; + return this->gaussianCurvature(alpha, normGrad) ? + ValueType(alpha*mInvDx2/Pow4(normGrad)) : 0; + } + + /// @brief Return both the mean and the Gaussian curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void curvatures(ValueType &mean, ValueType& gauss) const + { + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + mean = ValueType(alphaM*mInv2Dx/Pow3(normGrad)); + gauss = ValueType(alphaG*mInvDx2/Pow4(normGrad)); + } else { + mean = gauss = 0; + } + } + + /// Return the mean curvature multiplied by the norm of the + /// central-difference gradient. This method is very useful for + /// mean-curvature flow of level sets! + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType meanCurvatureNormGrad() const + { + RealT alpha, normGrad; + return this->meanCurvature(alpha, normGrad) ? + ValueType(alpha*mInvDx2/(2*Pow2(normGrad))) : 0; + } + + /// Return the mean Gaussian multiplied by the norm of the + /// central-difference gradient. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType gaussianCurvatureNormGrad() const + { + RealT alpha, normGrad; + return this->gaussianCurvature(alpha, normGrad) ? + ValueType(2*alpha*mInv2Dx*mInvDx2/Pow3(normGrad)) : 0; + } + + /// @brief Return both the mean and the Gaussian curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void curvaturesNormGrad(ValueType &mean, ValueType& gauss) const + { + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + mean = ValueType(alphaM*mInvDx2/(2*Pow2(normGrad))); + gauss = ValueType(2*alphaG*mInv2Dx*mInvDx2/Pow3(normGrad)); + } else { + mean = gauss = 0; + } + } + + /// @brief Computes the minimum and maximum principal curvature at the + /// previously buffered location. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline void principalCurvatures(ValueType &min, ValueType &max) const + { + min = max = 0; + RealT alphaM, alphaG, normGrad; + if (this->curvatures(alphaM, alphaG, normGrad)) { + const RealT mean = alphaM*mInv2Dx/Pow3(normGrad); + const RealT tmp = Sqrt(mean*mean - alphaG*mInvDx2/Pow4(normGrad)); + min = ValueType(mean - tmp); + max = ValueType(mean + tmp); + } + } + + /// Return the Laplacian computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline ValueType laplacian() const + { + return mInvDx2 * ( + mValues[1] + mValues[2] + + mValues[3] + mValues[4] + + mValues[5] + mValues[6] - 6*mValues[0]); + } + + /// Return the gradient computed at the previously buffered + /// location by second-order central differencing. + /// + /// @note This method should not be called until the stencil + /// buffer has been populated via a call to moveTo(ijk). + __hostdev__ inline Vec3 gradient() const + { + return Vec3( + mValues[2] - mValues[1], + mValues[4] - mValues[3], + mValues[6] - mValues[5])*mInv2Dx; + } + + /// Return linear offset for the specified stencil point relative to its center + template + __hostdev__ unsigned int pos() const { return CurvPt::idx; } + +private: + __hostdev__ inline void init(const Coord &ijk) + { + mValues[ 1] = mAcc.getValue(ijk.offsetBy(-1, 0, 0)); + mValues[ 2] = mAcc.getValue(ijk.offsetBy( 1, 0, 0)); + + mValues[ 3] = mAcc.getValue(ijk.offsetBy( 0, -1, 0)); + mValues[ 4] = mAcc.getValue(ijk.offsetBy( 0, 1, 0)); + + mValues[ 5] = mAcc.getValue(ijk.offsetBy( 0, 0, -1)); + mValues[ 6] = mAcc.getValue(ijk.offsetBy( 0, 0, 1)); + + mValues[ 7] = mAcc.getValue(ijk.offsetBy(-1, -1, 0)); + mValues[ 8] = mAcc.getValue(ijk.offsetBy( 1, -1, 0)); + mValues[ 9] = mAcc.getValue(ijk.offsetBy(-1, 1, 0)); + mValues[10] = mAcc.getValue(ijk.offsetBy( 1, 1, 0)); + + mValues[11] = mAcc.getValue(ijk.offsetBy(-1, 0, -1)); + mValues[12] = mAcc.getValue(ijk.offsetBy( 1, 0, -1)); + mValues[13] = mAcc.getValue(ijk.offsetBy(-1, 0, 1)); + mValues[14] = mAcc.getValue(ijk.offsetBy( 1, 0, 1)); + + mValues[15] = mAcc.getValue(ijk.offsetBy( 0, -1, -1)); + mValues[16] = mAcc.getValue(ijk.offsetBy( 0, 1, -1)); + mValues[17] = mAcc.getValue(ijk.offsetBy( 0, -1, 1)); + mValues[18] = mAcc.getValue(ijk.offsetBy( 0, 1, 1)); + } + + __hostdev__ inline RealT Dx() const { return 0.5*(mValues[2] - mValues[1]); }// * 1/dx + __hostdev__ inline RealT Dy() const { return 0.5*(mValues[4] - mValues[3]); }// * 1/dx + __hostdev__ inline RealT Dz() const { return 0.5*(mValues[6] - mValues[5]); }// * 1/dx + __hostdev__ inline RealT Dxx() const { return mValues[2] - 2 * mValues[0] + mValues[1]; }// * 1/dx2 + __hostdev__ inline RealT Dyy() const { return mValues[4] - 2 * mValues[0] + mValues[3]; }// * 1/dx2} + __hostdev__ inline RealT Dzz() const { return mValues[6] - 2 * mValues[0] + mValues[5]; }// * 1/dx2 + __hostdev__ inline RealT Dxy() const { return 0.25 * (mValues[10] - mValues[ 8] + mValues[ 7] - mValues[ 9]); }// * 1/dx2 + __hostdev__ inline RealT Dxz() const { return 0.25 * (mValues[14] - mValues[12] + mValues[11] - mValues[13]); }// * 1/dx2 + __hostdev__ inline RealT Dyz() const { return 0.25 * (mValues[18] - mValues[16] + mValues[15] - mValues[17]); }// * 1/dx2 + + __hostdev__ inline bool meanCurvature(RealT& alpha, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alpha = normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(); + alpha = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - + 2*(Dx*(Dy*this->Dxy() + Dz*this->Dxz()) + Dy*Dz*this->Dyz());// * 1/dx^4 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + __hostdev__ inline bool gaussianCurvature(RealT& alpha, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alpha = normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), + Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); + alpha = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + + 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// * 1/dx^6 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + __hostdev__ inline bool curvatures(RealT& alphaM, RealT& alphaG, RealT& normGrad) const + { + // For performance all finite differences are unscaled wrt dx + const RealT Dx = this->Dx(), Dy = this->Dy(), Dz = this->Dz(), + Dx2 = Dx*Dx, Dy2 = Dy*Dy, Dz2 = Dz*Dz, normGrad2 = Dx2 + Dy2 + Dz2; + if (normGrad2 <= Tolerance::value()) { + alphaM = alphaG =normGrad = 0; + return false; + } + const RealT Dxx = this->Dxx(), Dyy = this->Dyy(), Dzz = this->Dzz(), + Dxy = this->Dxy(), Dxz = this->Dxz(), Dyz = this->Dyz(); + alphaM = Dx2*(Dyy + Dzz) + Dy2*(Dxx + Dzz) + Dz2*(Dxx + Dyy) - + 2*(Dx*(Dy*Dxy + Dz*Dxz) + Dy*Dz*Dyz);// *1/dx^4 + alphaG = Dx2*(Dyy*Dzz - Dyz*Dyz) + Dy2*(Dxx*Dzz - Dxz*Dxz) + Dz2*(Dxx*Dyy - Dxy*Dxy) + + 2*( Dy*Dz*(Dxy*Dxz - Dyz*Dxx) + Dx*Dz*(Dxy*Dyz - Dxz*Dyy) + Dx*Dy*(Dxz*Dyz - Dxy*Dzz) );// *1/dx^6 + normGrad = Sqrt(normGrad2); // * 1/dx + return true; + } + + template friend class BaseStencil; // allow base class to call init() + using BaseType::mAcc; + using BaseType::mValues; + const ValueType mInv2Dx, mInvDx2; +}; // CurvatureStencil class + +}// namespace math + +} // end nanovdb namespace + +#endif // NANOVDB_MATH_STENCILS_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/CreateNanoGrid.h b/external/nanovdb/tools/CreateNanoGrid.h new file mode 100644 index 00000000..6f1ce040 --- /dev/null +++ b/external/nanovdb/tools/CreateNanoGrid.h @@ -0,0 +1,2073 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/CreateNanoGrid.h + + \author Ken Museth + + \date June 26, 2020 + + \note In the examples below we assume that @c srcGrid is a exiting grid of type + SrcGridT = @c openvdb::FloatGrid, @c openvdb::FloatGrid or @c nanovdb::tools::build::FloatGrid. + + \brief Convert any grid to a nanovdb grid of the same type, e.g. float->float + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of a different type, e.g. float->half + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of the same type but using a CUDA buffer + \code + auto handle = nanovdb::tools::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a nanovdb grid that indices values in an existing source grid of any type. + If DstBuildT = nanovdb::ValueIndex both active and in-active values are indexed + and if DstBuildT = nanovdb::ValueOnIndex only active values are indexed. + \code + using DstBuildT = nanovdb::ValueIndex;// index both active an inactive values + auto handle = nanovdb::tools::createNanoGridSrcGridT,DstBuildT>(srcGrid,0,false,false);//no blind data, tile values or stats + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a NanoVDB grid from scratch + \code +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + using SrcGridT = openvdb::FloatGrid; +#else + using SrcGridT = nanovdb::tools::build::FloatGrid; +#endif + SrcGridT srcGrid(0.0f);// create an empty source grid + auto srcAcc = srcGrid.getAccessor();// create an accessor + srcAcc.setValue(nanovdb::Coord(1,2,3), 1.0f);// set a voxel value + + auto handle = nanovdb::tools::createNanoGrid(srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Convert a base-pointer to an openvdb grid, denoted srcGrid, to a nanovdb + grid of the same type, e.g. float -> float or openvdb::Vec3f -> nanovdb::Vec3f + \code + auto handle = nanovdb::openToNanoVDB(*srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Converts any existing grid to a NanoVDB grid, for example: + nanovdb::tools::build::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + + \note This files replaces GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h +*/ + +#ifndef NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for nanovdb::math::DitherLUT + +#include +#include +#include +#include // for memcpy +#include + +namespace nanovdb {// ============================================================================ + +namespace tools {// ============================================================================== + +// Forward declarations (defined below) +template class CreateNanoGrid; +class AbsDiff; +template struct MapToNano; + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +/// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param base Shared pointer to a base openvdb grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @return Handle to the destination NanoGrid +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + int verbose = 0); +#endif + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT Type of values in the output (destination) nanovdb Grid, e.g. float or nanovdb::Fp16 +/// @tparam BufferT Type of the buffer used ti allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename util::disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid or NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT If ValueIndex all (active and inactive) values are indexed and if +/// it is ValueOnIndex only active values are indexed. +/// @tparam BufferT BufferT Type of the buffer used ti allocate the destination grid +/// @param channels If non-zero the values (active or all) in @c srcGrid are encoded as blind +/// data in the output index grid. @c channels indicates the number of copies +/// of these blind data +/// @param includeStats If true all tree nodes will includes indices for stats, i.e. min/max/avg/std-div +/// @param includeTiles If false on values in leaf nodes are indexed +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid where T = ValueIndex or ValueOnIndex +template::type, + typename BufferT = HostBuffer> +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = FpN, i.e. variable bit-width of the output grid +/// @tparam OracleT Type of the oracle used to determine the local bit-width, i.e. N in FpN +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param oracle Instance of a oracle used to determine the local bit-width, i.e. N in FpN +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename OracleT = AbsDiff, + typename BufferT = HostBuffer> +typename util::enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + int verbose = 0, + const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid, X=4,8,16 +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = Fp4, Fp8 or Fp16, i.e. quantization bit-width of the output grid +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename util::enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Compression oracle based on absolute difference +class AbsDiff +{ + float mTolerance;// absolute error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + AbsDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + AbsDiff(const AbsDiff&) = default; + ~AbsDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void init(nanovdb::GridClass gClass, float background) { + if (gClass == GridClass::LevelSet) { + static const float halfWidth = 3.0f; + mTolerance = 0.1f * background / halfWidth;// range of ls: [-3dx; 3dx] + } else if (gClass == GridClass::FogVolume) { + mTolerance = 0.01f;// range of FOG volumes: [0;1] + } else { + mTolerance = 0.0f; + } + } + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// absolute error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return math::Abs(exact - approx) <= mTolerance; + } +};// AbsDiff + +inline std::ostream& operator<<(std::ostream& os, const AbsDiff& diff) +{ + os << "Absolute tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief Compression oracle based on relative difference +class RelDiff +{ + float mTolerance;// relative error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + RelDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + RelDiff(const RelDiff&) = default; + ~RelDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// relative error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return math::Abs(exact - approx)/math::Max(math::Abs(exact), math::Abs(approx)) <= mTolerance; + } +};// RelDiff + +inline std::ostream& operator<<(std::ostream& os, const RelDiff& diff) +{ + os << "Relative tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief The NodeAccessor provides a uniform API for accessing nodes got NanoVDB, OpenVDB and build Grids +/// +/// @note General implementation that works with nanovdb::tools::build::Grid +template +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = false; + using BuildType = typename GridT::BuildType; + using ValueType = typename GridT::ValueType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootType = typename TreeType::RootNodeType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridT &grid) : mMgr(const_cast(grid)) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + const std::string& getName() const {return this->grid().getName();}; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + build::NodeManager mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Template specialization for nanovdb::Grid which is special since its NodeManage +/// uses a handle in order to support node access on the GPU! +template +class NodeAccessor< NanoGrid > +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = true; + using BuildType = BuildT; + using BufferType = HostBuffer; + using GridType = NanoGrid; + using ValueType = typename GridType::ValueType; + using TreeType = typename GridType::TreeType; + using RootType = typename TreeType::RootType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) + : mHandle(createNodeManager(grid)) + , mMgr(*(mHandle.template mgr())) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const {return std::string(this->grid().gridName());}; + bool hasLongGridName() const {return this->grid().hasLongGridName();} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + NodeManagerHandle mHandle; + const NodeManager &mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Trait that maps any type to the corresponding nanovdb type +/// @tparam T Type to be mapped +template +struct MapToNano { using type = T; }; + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + +template<> +struct MapToNano {using type = nanovdb::ValueMask;}; +template +struct MapToNano>{using type = nanovdb::math::Vec3;}; +template +struct MapToNano>{using type = nanovdb::math::Vec4;}; +template<> +struct MapToNano {using type = uint32_t;}; +template<> +struct MapToNano {using type = uint32_t;}; + +/// Templated Grid with default 32->16->8 configuration +template +using OpenLeaf = openvdb::tree::LeafNode; +template +using OpenLower = openvdb::tree::InternalNode,4>; +template +using OpenUpper = openvdb::tree::InternalNode,5>; +template +using OpenRoot = openvdb::tree::RootNode>; +template +using OpenTree = openvdb::tree::Tree>; +template +using OpenGrid = openvdb::Grid>; + +//================================================================================================ + +/// @brief Template specialization for openvdb::Grid +template +class NodeAccessor> +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = BuildT; + using GridType = OpenGrid; + using ValueType = typename GridType::ValueType; + using TreeType = OpenTree; + using RootType = OpenRoot; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const { + switch (this->grid().getGridClass()) { + case openvdb::GRID_LEVEL_SET: + if (!util::is_floating_point::value) OPENVDB_THROW(openvdb::ValueError, "processGrid: Level sets are expected to be floating point types"); + return GridClass::LevelSet; + case openvdb::GRID_FOG_VOLUME: + return GridClass::FogVolume; + case openvdb::GRID_STAGGERED: + return GridClass::Staggered; + default: + return GridClass::Unknown; + } + } +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor> + +//================================================================================================ + +/// @brief Template specialization for openvdb::tools::PointIndexGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointIndex32; + using GridType = openvdb::tools::PointIndexGrid; + using TreeType = openvdb::tools::PointIndexTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointIndex;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +//================================================================================================ + +// @brief Template specialization for openvdb::points::PointDataGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointDataIndex32; + using GridType = openvdb::points::PointDataGrid; + using TreeType = openvdb::points::PointDataTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointData;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +#endif + +//================================================================================================ + +/// @brief Creates any nanovdb Grid from any source grid (certain combinations are obviously not allowed) +template +class CreateNanoGrid +{ +public: + // SrcGridT can be either openvdb::Grid, nanovdb::Grid or nanovdb::tools::build::Grid + using SrcNodeAccT = NodeAccessor; + using SrcBuildT = typename SrcNodeAccT::BuildType; + using SrcValueT = typename SrcNodeAccT::ValueType; + using SrcTreeT = typename SrcNodeAccT::TreeType; + using SrcRootT = typename SrcNodeAccT::RootType; + template + using SrcNodeT = typename NodeTrait::type; + + /// @brief Constructor from a source grid + /// @param srcGrid Source grid of type SrcGridT + CreateNanoGrid(const SrcGridT &srcGrid); + + /// @brief Constructor from a source node accessor (defined above) + /// @param srcNodeAcc Source node accessor of type SrcNodeAccT + CreateNanoGrid(const SrcNodeAccT &srcNodeAcc); + + /// @brief Set the level of verbosity + /// @param mode level of verbosity, mode=0 means quiet + void setVerbose(int mode = 1) { mVerbose = mode; } + + /// @brief Enable or disable dithering, i.e. randomization of the quantization error. + /// @param on enable or disable dithering + /// @warning Dithering only has an affect when DstBuildT = {Fp4, Fp8, Fp16, FpN} + void enableDithering(bool on = true) { mDitherOn = on; } + + /// @brief Set the mode used for computing statistics of the destination grid + /// @param mode specify the mode of statistics + void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } + + /// @brief Set the mode used for computing checksums of the destination grid + /// @param mode specify the mode of checksum + void setChecksum(CheckMode mode = CheckMode::Default) { mChecksum = mode; } + + /// @brief Converts the source grid into a nanovdb grid with the specified destination build type + /// @tparam DstBuildT build type of the destination, output, grid + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version is when DstBuildT != {FpN, ValueIndex, ValueOnIndex} + template::type, typename BufferT = HostBuffer> + typename util::disable_if::value || + BuildTraits::is_index, GridHandle>::type + getHandle(const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with variable bit quantization + /// @tparam DstBuildT FpN, i.e. the destination grid uses variable bit quantization + /// @tparam OracleT Type of oracle used to determine the N in FpN + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param oracle Instance of the oracle used to determine the N in FpN + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version assumes DstBuildT == FpN + template::type, typename OracleT = AbsDiff, typename BufferT = HostBuffer> + typename util::enable_if::value, GridHandle>::type + getHandle(const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with indices to external arrays of values + /// @tparam DstBuildT ValueIndex or ValueOnIndex, i.e. index all or just active values + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param channels Number of copies of values encoded as blind data in the destination grid + /// @param includeStats Specify if statics should be indexed + /// @param includeTiles Specify if tile values, i.e. non-leaf-node-values, should be indexed + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + template::type, typename BufferT = HostBuffer> + typename util::enable_if::is_index, GridHandle>::type + getHandle(uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + const BufferT &buffer = BufferT()); + + /// @brief Add blind data to the destination grid + /// @param name String name of the blind data + /// @param dataSemantic Semantics of the blind data + /// @param dataClass Class of the blind data + /// @param dataType Type of the blind data + /// @param count Element count of the blind data + /// @param size Size of each element of the blind data + /// @return Return the index used to access the blind data + uint64_t addBlindData(const std::string& name, + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t count, size_t size) + { + const size_t order = mBlindMetaData.size(); + mBlindMetaData.emplace(name, dataSemantic, dataClass, dataType, order, count, size); + return order; + } + + /// @brief This method only has affect when getHandle was called with DstBuildT = ValueIndex or ValueOnIndex + /// @return Return the number of indexed values. If called before getHandle was called with + /// DstBuildT = ValueIndex or ValueOnIndex the return value is zero. Else it is a value larger than zero. + uint64_t valueCount() const {return mValIdx[0].empty() ? 0u : mValIdx[0].back();} + + /// @brief Copy values from the source grid into a provided buffer + /// @tparam DstBuildT Must be ValueIndex or ValueOnIndex, i.e. a index grid + /// @param buffer point in which to write values + template + typename util::enable_if::is_index>::type + copyValues(SrcValueT *buffer); + +private: + + // ========================================================= + + template + typename util::enable_if::value&&LEVEL==0), typename NodeTrait, LEVEL>::type*>::type + dstNode(uint64_t i) const { + static_assert(LEVEL==0 || LEVEL==1 || LEVEL==2, "Expected LEVEL== {0,1,2}"); + using NodeT = typename NodeTrait, LEVEL>::type; + return util::PtrAdd(mBufferPtr, mOffset[5-LEVEL]) + i; + } + template + typename util::enable_if::value && LEVEL==0, NanoLeaf*>::type + dstNode(uint64_t i) const {return util::PtrAdd>(mBufferPtr, mCodec[i].offset);} + + template NanoRoot* dstRoot() const {return util::PtrAdd>(mBufferPtr, mOffset.root);} + template NanoTree* dstTree() const {return util::PtrAdd>(mBufferPtr, mOffset.tree);} + template NanoGrid* dstGrid() const {return util::PtrAdd>(mBufferPtr, mOffset.grid);} + GridBlindMetaData* dstMeta(uint32_t i) const { return util::PtrAdd(mBufferPtr, mOffset.meta) + i;}; + + // ========================================================= + + template + typename util::disable_if::value || BuildTraits::is_index>::type + preProcess(); + + template + typename util::enable_if::is_index>::type + preProcess(uint32_t channels); + + template + typename util::enable_if::value>::type + preProcess(OracleT oracle); + + // ========================================================= + + // Below are private methods use to serialize nodes into NanoVDB + template + GridHandle initHandle(const BufferT& buffer); + + // ========================================================= + + template + inline typename util::enable_if::is_index>::type + postProcess(uint32_t channels); + + template + inline typename util::disable_if::is_index>::type + postProcess(); + + // ======================================================== + + template + typename util::disable_if::is_special>::type + processLeafs(); + + template + typename util::enable_if::is_index>::type + processLeafs(); + + template + typename util::enable_if::is_FpX>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + template + typename util::enable_if::value>::type + processLeafs(); + + // ========================================================= + + template + typename util::enable_if::is_index>::type + processInternalNodes(); + + template + typename util::enable_if::is_index>::type + processInternalNodes(); + + // ========================================================= + + template + typename util::enable_if::is_index>::type + processRoot(); + + template + typename util::enable_if::is_index>::type + processRoot(); + + // ========================================================= + + template + void processTree(); + + template + void processGrid(); + + template + typename util::enable_if::is_index, uint64_t>::type + countTileValues(uint64_t valueCount); + + template + typename util::enable_if::is_index, uint64_t>::type + countValues(); + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + template + typename util::disable_if::value || + util::is_same::value, uint64_t>::type + countPoints() const; + + template + typename util::enable_if::value || + util::is_same::value, uint64_t>::type + countPoints() const; + + template + typename util::enable_if::value>::type + copyPointAttribute(size_t attIdx, AttT *attPtr); +#else + uint64_t countPoints() const {return 0u;} +#endif + + void* mBufferPtr;// pointer to the beginning of the destination nanovdb grid buffer + struct BufferOffsets { + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size; + uint64_t operator[](int i) const { return *(reinterpret_cast(this)+i); } + } mOffset; + int mVerbose; + uint64_t mLeafNodeSize;// non-trivial when DstBuiltT = FpN + + std::unique_ptr mSrcNodeAccPtr;// placeholder for potential local instance + const SrcNodeAccT &mSrcNodeAcc; + struct BlindMetaData; // forward declaration + std::set mBlindMetaData; // sorted according to BlindMetaData.order + struct Codec { float min, max; uint64_t offset; uint8_t log2; };// used for adaptive bit-rate quantization + std::unique_ptr mCodec;// defines a codec per leaf node when DstBuildT = FpN + StatsMode mStats; + CheckMode mChecksum; + bool mDitherOn, mIncludeStats, mIncludeTiles; + std::vector mValIdx[3];// store id of first value in node +}; // CreateNanoGrid + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcGridT &srcGrid) + : mVerbose(0) + , mSrcNodeAccPtr(new SrcNodeAccT(srcGrid)) + , mSrcNodeAcc(*mSrcNodeAccPtr) + , mStats(StatsMode::Default) + , mChecksum(CheckMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcNodeAccT &srcNodeAcc) + : mVerbose(0) + , mSrcNodeAccPtr(nullptr) + , mSrcNodeAcc(srcNodeAcc) + , mStats(StatsMode::Default) + , mChecksum(CheckMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +struct CreateNanoGrid::BlindMetaData +{ + BlindMetaData(const std::string& name,// name + used to derive GridBlindDataSemantic + const std::string& type,// used to derive GridType of blind data + GridBlindDataClass dataClass, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(math::AlignUp(valueCount * valueSize)) + { + util::memzero(metaData, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = BlindMetaData::mapToSemantics(name); + metaData->mDataClass = dataClass; + metaData->mDataType = BlindMetaData::mapToType(type); + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + BlindMetaData(const std::string& name,// only name + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(math::AlignUp(valueCount * valueSize)) + { + std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds character limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = dataSemantic; + metaData->mDataClass = dataClass; + metaData->mDataType = dataType; + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + ~BlindMetaData(){ delete [] reinterpret_cast(metaData); } + bool operator<(const BlindMetaData& other) const { return order < other.order; } // required by std::set + static GridType mapToType(const std::string& name) + { + GridType type = GridType::Unknown; + if ("uint32_t" == name) { + type = GridType::UInt32; + } else if ("float" == name) { + type = GridType::Float; + } else if ("vec3s"== name) { + type = GridType::Vec3f; + } else if ("int32" == name) { + type = GridType::Int32; + } else if ("int64" == name) { + type = GridType::Int64; + } + return type; + } + static GridBlindDataSemantic mapToSemantics(const std::string& name) + { + GridBlindDataSemantic semantic = GridBlindDataSemantic::Unknown; + if ("P" == name) { + semantic = GridBlindDataSemantic::PointPosition; + } else if ("V" == name) { + semantic = GridBlindDataSemantic::PointVelocity; + } else if ("Cd" == name) { + semantic = GridBlindDataSemantic::PointColor; + } else if ("N" == name) { + semantic = GridBlindDataSemantic::PointNormal; + } else if ("id" == name) { + semantic = GridBlindDataSemantic::PointId; + } + return semantic; + } + GridBlindMetaData *metaData; + const size_t order, size; +}; // CreateNanoGrid::BlindMetaData + +//================================================================================================ + +template +template +typename util::disable_if::value || + BuildTraits::is_index, GridHandle>::type +CreateNanoGrid::getHandle(const BufferT& pool) +{ + this->template preProcess(); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename util::enable_if::value, GridHandle>::type +CreateNanoGrid::getHandle(const OracleT& oracle, const BufferT& pool) +{ + this->template preProcess(oracle); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename util::enable_if::is_index, GridHandle>::type +CreateNanoGrid::getHandle(uint32_t channels, + bool includeStats, + bool includeTiles, + const BufferT &pool) +{ + mIncludeStats = includeStats; + mIncludeTiles = includeTiles; + this->template preProcess(channels); + auto handle = this->template initHandle(pool); + this->template postProcess(channels); + return handle; +}// CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +GridHandle CreateNanoGrid::initHandle(const BufferT& pool) +{ + mOffset.grid = 0;// grid is always stored at the start of the buffer! + mOffset.tree = NanoGrid::memUsage(); // grid ends and tree begins + mOffset.root = mOffset.tree + NanoTree::memUsage(); // tree ends and root node begins + mOffset.upper = mOffset.root + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()); // root node ends and upper internal nodes begin + mOffset.lower = mOffset.upper + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2); // upper internal nodes ends and lower internal nodes begin + mOffset.leaf = mOffset.lower + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); // lower internal nodes ends and leaf nodes begin + mOffset.meta = mOffset.leaf + mLeafNodeSize;// leaf nodes end and blind meta data begins + mOffset.blind = mOffset.meta + sizeof(GridBlindMetaData)*mBlindMetaData.size(); // meta data ends and blind data begins + mOffset.size = mOffset.blind;// end of buffer + for (const auto& b : mBlindMetaData) mOffset.size += b.size; // accumulate all the blind data + + auto buffer = BufferT::create(mOffset.size, &pool); + mBufferPtr = buffer.data(); + + // Concurrent processing of all tree levels! + util::invoke( [&](){this->template processLeafs();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processRoot();}, + [&](){this->template processTree();}, + [&](){this->template processGrid();} ); + + return GridHandle(std::move(buffer)); +} // CreateNanoGrid::initHandle + +//================================================================================================ + +template +template +inline typename util::disable_if::value || BuildTraits::is_index>::type +CreateNanoGrid::preProcess() +{ + if (const uint64_t pointCount = this->countPoints()) { +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + if constexpr(util::is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + this->addBlindData("index", + GridBlindDataSemantic::PointId, + GridBlindDataClass::IndexArray, + GridType::UInt32, + pointCount, + sizeof(uint32_t)); + } else if constexpr(util::is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + auto &srcLeaf = mSrcNodeAcc.template node<0>(0); + const auto& attributeSet = srcLeaf.attributeSet(); + const auto& descriptor = attributeSet.descriptor(); + const auto& nameMap = descriptor.map(); + for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { + const size_t index = it->second; + auto& attArray = srcLeaf.constAttributeArray(index); + mBlindMetaData.emplace(it->first, // name used to derive semantics + descriptor.valueType(index), // type + it->first == "id" ? GridBlindDataClass::IndexArray : GridBlindDataClass::AttributeArray, // class + index, // order + pointCount, // element count + attArray.valueTypeSize()); // element size + } + } +#endif + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::preProcess(OracleT oracle) +{ + static_assert(util::is_same::value, "preProcess: expected SrcValueT == float"); + + const size_t leafCount = mSrcNodeAcc.nodeCount(0); + if (leafCount==0) { + mLeafNodeSize = 0u; + return; + } + mCodec.reset(new Codec[leafCount]); + + if constexpr(util::is_same::value) { + if (!oracle) oracle.init(mSrcNodeAcc.gridClass(), mSrcNodeAcc.root().background()); + } + + math::DitherLUT lut(mDitherOn); + util::forEach(0, leafCount, 4, [&](const util::Range1D &r) { + for (auto i=r.begin(); i!=r.end(); ++i) { + const auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + float &min = mCodec[i].min = std::numeric_limits::max(); + float &max = mCodec[i].max = -min; + for (int j=0; j<512; ++j) { + float v = srcLeaf.getValue(j); + if (vmax) max = v; + } + const float range = max - min; + uint8_t &logBitWidth = mCodec[i].log2 = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits + while (range > 0.0f && logBitWidth < 4u) { + const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; + const float encode = mask/range; + const float decode = range/mask; + int j = 0; + do { + const float exact = srcLeaf.getValue(j);//data[j];// exact value + const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); + const float approx = code * decode + min;// approximate value + j += oracle(exact, approx) ? 1 : 513; + } while(j < 512); + if (j == 512) break; + ++logBitWidth; + } + } + }); + + auto getOffset = [&](size_t i){ + --i; + return mCodec[i].offset + NanoLeaf::DataType::memUsage(1u << mCodec[i].log2); + }; + mCodec[0].offset = NanoGrid::memUsage() + + NanoTree::memUsage() + + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()) + + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2) + + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); + for (size_t i=1; iaddBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index, uint64_t>::type +CreateNanoGrid::countTileValues(uint64_t valueCount) +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + mValIdx[LEVEL].clear(); + mValIdx[LEVEL].resize(mSrcNodeAcc.nodeCount(LEVEL) + 1, stats);// minimum 1 entry + util::forEach(1, mValIdx[LEVEL].size(), 8, [&](const util::Range1D& r){ + for (auto i = r.begin(); i!=r.end(); ++i) { + auto &srcNode = mSrcNodeAcc.template node(i-1); + if constexpr(BuildTraits::is_onindex) {// resolved at compile time + mValIdx[LEVEL][i] += srcNode.getValueMask().countOn(); + } else { + static const uint64_t maxTileCount = uint64_t(1u) << 3*srcNode.LOG2DIM; + mValIdx[LEVEL][i] += maxTileCount - srcNode.getChildMask().countOn(); + } + } + }); + mValIdx[LEVEL][0] = valueCount; + for (size_t i=1; i + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index, uint64_t>::type +CreateNanoGrid::countValues() +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + uint64_t valueCount = 1u;// offset 0 corresponds to the background value + if (mIncludeTiles) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) ++valueCount; + } else { + for (auto it = mSrcNodeAcc.root().cbeginValueAll(); it; ++it) ++valueCount; + } + valueCount += stats;// optionally append stats for the root node + valueCount = countTileValues(valueCount); + valueCount = countTileValues(valueCount); + } + mValIdx[0].clear(); + mValIdx[0].resize(mSrcNodeAcc.nodeCount(0) + 1, 512u + stats);// minimum 1 entry + if constexpr(BuildTraits::is_onindex) { + util::forEach(1, mValIdx[0].size(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + mValIdx[0][i] = stats; + mValIdx[0][i] += mSrcNodeAcc.template node<0>(i-1).getValueMask().countOn(); + } + }); + } + mValIdx[0][0] = valueCount; + util::prefixSum(mValIdx[0], true);// inclusive prefix sum + return mValIdx[0].back(); +}// CreateNanoGrid::countValues() + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::preProcess(uint32_t channels) +{ + const uint64_t valueCount = this->template countValues(); + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); + + uint32_t order = mBlindMetaData.size(); + char str[16]; + for (uint32_t i=0; i()), + GridBlindDataClass::AttributeArray, + order++, + valueCount, + sizeof(SrcValueT)); + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + } +}// preProcess + +//================================================================================================ + +template +template +inline typename util::disable_if::is_special>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + using DstValueT = typename DstDataT::ValueType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + dstLeaf->mMinimum = dstLeaf->mMaximum = typename DstDataT::ValueType(); + dstLeaf->mAverage = dstLeaf->mStdDevi = 0; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + DstValueT *dst = dstLeaf->mValues; + if constexpr(util::is_same::value && SrcNodeAccT::IS_OPENVDB) { + const SrcValueT *src = srcLeaf.buffer().data(); + for (auto *end = dst + 512u; dst != end; dst += 4, src += 4) { + dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + } else { + for (uint32_t j=0; j<512u; ++j) *dst++ = static_cast(srcLeaf.getValue(j)); + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + static_assert(DstDataT::padding()==0u, "Expected leaf nodes to have no padding"); + + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + const uint8_t flags = mIncludeStats ? 16u : 0u;// 4th bit indicates stats + DstDataT *dstLeaf = this->template dstNode(r.begin());// fixed size + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = flags; + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + dstLeaf->mOffset = mValIdx[0][i]; + if constexpr(BuildTraits::is_onindex) { + const uint64_t *w = dstLeaf->mValueMask.words(); +#ifdef USE_OLD_VALUE_ON_INDEX + int32_t sum = CountOn(*w++); + uint8_t *p = reinterpret_cast(&dstLeaf->mPrefixSum), *q = p + 7; + for (int j=0; j<7; ++j) { + *p++ = sum & 255u; + *q |= (sum >> 8) << j; + sum += CountOn(*w++); + } +#else + uint64_t &prefixSum = dstLeaf->mPrefixSum, sum = util::countOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += util::countOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } +#endif + } else { + dstLeaf->mPrefixSum = 0u; + } + if constexpr(BuildTraits::is_indexmask) dstLeaf->mMask = dstLeaf->mValueMask; + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + dstLeaf->mPadding[0] = dstLeaf->mPadding[1] = 0u; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + dstLeaf->mFlags = 0u;// enable rendering, no bbox, no stats + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + if constexpr(!util::is_same::value) { + for (int j=0; j<512; ++j) dstLeaf->mValues.set(j, static_cast(srcLeaf.getValue(j))); + } else if constexpr(SrcNodeAccT::IS_OPENVDB) { + dstLeaf->mValues = *reinterpret_cast*>(srcLeaf.buffer().data()); + } else if constexpr(SrcNodeAccT::IS_NANOVDB) { + dstLeaf->mValues = srcLeaf.data()->mValues; + } else {// tools::Leaf + dstLeaf->mValues = srcLeaf.mValues; // copy value mask + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_FpX>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + using ArrayT = typename DstDataT::ArrayType; + static_assert(util::is_same::value, "Expected ValueT == float"); + using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double + static constexpr FloatT UNITS = FloatT((1 << DstDataT::bitWidth()) - 1);// # of unique non-zero values + math::DitherLUT lut(mDitherOn); + + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + util::memzero(dstLeaf, DstDataT::memUsage()); + } else { + dstLeaf->mFlags = dstLeaf->mBBoxDif[2] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[0] = 0u; + dstLeaf->mDev = dstLeaf->mAvg = dstLeaf->mMax = dstLeaf->mMin = 0u; + } + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + // compute extrema values + float min = std::numeric_limits::max(), max = -min; + for (uint32_t j=0; j<512u; ++j) { + const float v = srcLeaf.getValue(j); + if (v < min) min = v; + if (v > max) max = v; + } + dstLeaf->init(min, max, DstDataT::bitWidth()); + // perform quantization relative to the values in the current leaf node + const FloatT encode = UNITS/(max-min); + uint32_t offset = 0; + auto quantize = [&]()->ArrayT{ + const ArrayT tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *code = reinterpret_cast(dstLeaf->mCode); + if (util::is_same::value) {// resolved at compile-time + for (uint32_t j=0; j<128u; ++j) { + auto tmp = quantize(); + *code++ = quantize() << 4 | tmp; + tmp = quantize(); + *code++ = quantize() << 4 | tmp; + } + } else { + for (uint32_t j=0; j<128u; ++j) { + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + } + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + static_assert(util::is_same::value, "Expected SrcValueT == float"); + math::DitherLUT lut(mDitherOn); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + auto *dstLeaf = this->template dstNode(i); + dstLeaf->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstLeaf->mBBoxDif[0] = dstLeaf->mBBoxDif[1] = dstLeaf->mBBoxDif[2] = 0u; + const uint8_t logBitWidth = mCodec[i].log2; + dstLeaf->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag + dstLeaf->mValueMask = srcLeaf.getValueMask(); // copy value mask + const float min = mCodec[i].min, max = mCodec[i].max; + dstLeaf->init(min, max, uint8_t(1) << logBitWidth); + // perform quantization relative to the values in the current leaf node + uint32_t offset = 0; + float encode = 0.0f; + auto quantize = [&]()->uint8_t{ + const uint8_t tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *dst = reinterpret_cast(dstLeaf+1); + switch (logBitWidth) { + case 0u: {// 1 bit + encode = 1.0f/(max - min); + for (int j=0; j<64; ++j) { + uint8_t a = 0; + for (int k=0; k<8; ++k) a |= quantize() << k; + *dst++ = a; + } + } + break; + case 1u: {// 2 bits + encode = 3.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + a |= quantize() << 2; + a |= quantize() << 4; + *dst++ = quantize() << 6 | a; + } + } + break; + case 2u: {// 4 bits + encode = 15.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + *dst++ = quantize() << 4 | a; + a = quantize(); + *dst++ = quantize() << 4 | a; + } + } + break; + case 3u: {// 8 bits + encode = 255.0f/(max - min); + for (int j=0; j<128; ++j) { + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + } + } + break; + default: {// 16 bits - special implementation using higher bit-precision + auto *dst = reinterpret_cast(dstLeaf+1); + const double encode = 65535.0/(max - min);// note that double is required! + for (int j=0; j<128; ++j) { + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + } + } + }// end switch + } + });// kernel +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstValueT = typename DstNodeT::ValueType; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstNode = this->template dstNode(0); + for (uint64_t i=0; i(static_cast(i)).getChildMask().countOn(); + } + } + + util::forEach(0, nodeCount, 4, [&](const util::Range1D& r) { + auto *dstNode = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstNode) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstNode->mFlags; + if (DstNodeT::DataType::padding()>0u) { + util::memzero(dstNode, DstNodeT::memUsage()); + } else { + dstNode->mFlags = 0;// enable rendering, no bbox, no stats + dstNode->mMinimum = dstNode->mMaximum = typename DstNodeT::ValueType(); + dstNode->mAverage = dstNode->mStdDevi = 0; + } + dstNode->mBBox[0] = srcNode.origin(); // copy origin of node + dstNode->mValueMask = srcNode.getValueMask(); // copy value mask + dstNode->mChildMask = srcNode.getChildMask(); // copy child mask + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value{}; // default initialization + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstNode->setChild(it.pos(), dstChild); + } else { + dstNode->setValue(it.pos(), static_cast(value)); + } + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + static_assert(DstNodeT::DataType::padding()==0u, "Expected internal nodes to have no padding"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstNode = this->template dstNode(0); + for (uint64_t i=0; i(i).getChildMask().countOn(); + } + } + + util::forEach(0, nodeCount, 4, [&](const util::Range1D& r) { + auto *dstNode = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstNode) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstNode->mFlags; + dstNode->mFlags = 0u; + dstNode->mBBox[0] = srcNode.origin(); // copy origin of node + dstNode->mValueMask = srcNode.getValueMask(); // copy value mask + dstNode->mChildMask = srcNode.getChildMask(); // copy child mask + uint64_t n = mIncludeTiles ? mValIdx[LEVEL][i] : 0u; + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value; + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstNode->setChild(it.pos(), dstChild); + } else { + uint64_t m = 0u; + if (mIncludeTiles && !((BuildTraits::is_onindex) && dstNode->mValueMask.isOff(it.pos()))) m = n++; + dstNode->setValue(it.pos(), m); + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstNode->mMinimum = n++; + dstNode->mMaximum = n++; + dstNode->mAverage = n++; + dstNode->mStdDevi = n++; + } else {// if not tiles or stats set stats to the background offset + dstNode->mMinimum = 0u; + dstNode->mMaximum = 0u; + dstNode->mAverage = 0u; + dstNode->mStdDevi = 0u; + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + using DstValueT = typename DstRootT::ValueType; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstRoot = this->template dstRoot(); + const uint32_t tableSize = srcRoot.getTableSize(); + if (DstRootT::DataType::padding()>0) util::memzero(dstRoot, DstRootT::memUsage(tableSize)); + dstRoot->mTableSize = tableSize; + dstRoot->mMinimum = dstRoot->mMaximum = dstRoot->mBackground = srcRoot.background(); + dstRoot->mBBox = CoordBBox(); // // set to an empty bounding box + if (tableSize==0) return; + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstRoot->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT value; + if (it.probeChild(value)) { + dstTile->setChild(it.getCoord(), dstChild++, dstRoot); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), static_cast(value)); + } + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstRoot = this->template dstRoot(); + const uint32_t tableSize = srcRoot.getTableSize(); + if (DstRootT::DataType::padding()>0) util::memzero(dstRoot, DstRootT::memUsage(tableSize)); + dstRoot->mTableSize = tableSize; + dstRoot->mBackground = 0u; + uint64_t valueCount = 0u;// the first entry is always the background value + dstRoot->mBBox = CoordBBox(); // set to an empty/invalid bounding box + + if (tableSize>0) { + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstRoot->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT tmp; + if (it.probeChild(tmp)) { + dstTile->setChild(it.getCoord(), dstChild++, dstRoot); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), 0u); + if (mIncludeTiles && !((BuildTraits::is_onindex) && !dstTile->state)) dstTile->value = ++valueCount; + } + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstRoot->mMinimum = ++valueCount; + dstRoot->mMaximum = ++valueCount; + dstRoot->mAverage = ++valueCount; + dstRoot->mStdDevi = ++valueCount; + } else if (dstRoot->padding()==0) { + dstRoot->mMinimum = 0u; + dstRoot->mMaximum = 0u; + dstRoot->mAverage = 0u; + dstRoot->mStdDevi = 0u; + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +void CreateNanoGrid::processTree() +{ + const uint64_t nodeCount[3] = {mSrcNodeAcc.nodeCount(0), mSrcNodeAcc.nodeCount(1), mSrcNodeAcc.nodeCount(2)}; + auto *dstTree = this->template dstTree(); + dstTree->setRoot( this->template dstRoot() ); + dstTree->setFirstNode(nodeCount[2] ? this->template dstNode(0) : nullptr); + dstTree->setFirstNode(nodeCount[1] ? this->template dstNode(0) : nullptr); + dstTree->setFirstNode(nodeCount[0] ? this->template dstNode(0) : nullptr); + + dstTree->mNodeCount[0] = static_cast(nodeCount[0]); + dstTree->mNodeCount[1] = static_cast(nodeCount[1]); + dstTree->mNodeCount[2] = static_cast(nodeCount[2]); + + // Count number of active leaf level tiles + dstTree->mTileCount[0] = util::reduce(util::Range1D(0,nodeCount[1]), uint32_t(0), [&](util::Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<1>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active lower internal node tiles + dstTree->mTileCount[1] = util::reduce(util::Range1D(0,nodeCount[2]), uint32_t(0), [&](util::Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<2>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active upper internal node tiles + dstTree->mTileCount[2] = 0; + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) dstTree->mTileCount[2] += 1; + + // Count number of active voxels + dstTree->mVoxelCount = util::reduce(util::Range1D(0, nodeCount[0]), uint64_t(0), [&](util::Range1D &r, uint64_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[0]) << 9;// = 3 * 3 + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[1]) << 21;// = 3 * (3+4) + dstTree->mVoxelCount += uint64_t(dstTree->mTileCount[2]) << 36;// = 3 * (3+4+5) + +} // CreateNanoGrid::processTree + +//================================================================================================ + +template +template +void CreateNanoGrid::processGrid() +{ + auto* dstGrid = this->template dstGrid(); + dstGrid->init({GridFlags::IsBreadthFirst}, mOffset.size, mSrcNodeAcc.map(), + toGridType(), toGridClass(mSrcNodeAcc.gridClass())); + dstGrid->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + dstGrid->mData1 = this->valueCount(); + +// if (!isValid(dstGrid->mGridType, dstGrid->mGridClass)) { +//#if 1 +// char str[30]; +// fprintf(stderr,"Warning: Strange combination of GridType(\"%s\") and GridClass(\"%s\"). Consider changing GridClass to \"Unknown\"\n", +// toStr(str, dstGrid->mGridType), toStr(str + 15, dstGrid->mGridClass)); +//#else +// throw std::runtime_error("Invalid combination of GridType("+std::to_string(int(dstGrid->mGridType))+ +// ") and GridClass("+std::to_string(int(dstGrid->mGridClass))+"). See NanoVDB.h for details!"); +//#endif +// } + util::memzero(dstGrid->mGridName, GridData::MaxNameSize);// initialize mGridName to zero + strncpy(dstGrid->mGridName, mSrcNodeAcc.getName().c_str(), GridData::MaxNameSize-1); + if (mSrcNodeAcc.hasLongGridName()) dstGrid->setLongGridNameOn();// grid name is long so store it as blind data + + // Partially process blind meta data - they will be complete in postProcess + if (mBlindMetaData.size()>0) { + auto *metaData = this->dstMeta(0); + dstGrid->mBlindMetadataOffset = util::PtrDiff(metaData, dstGrid); + dstGrid->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + char *blindData = util::PtrAdd(mBufferPtr, mOffset.blind); + for (const auto &b : mBlindMetaData) { + std::memcpy(metaData, b.metaData, sizeof(GridBlindMetaData)); + metaData->setBlindData(blindData);// sets metaData.mOffset + if (metaData->mDataClass == GridBlindDataClass::GridName) strcpy(blindData, mSrcNodeAcc.getName().c_str()); + ++metaData; + blindData += b.size; + } + mBlindMetaData.clear(); + } +} // CreateNanoGrid::processGrid + +//================================================================================================ + +template +template +inline typename util::disable_if::is_index>::type +CreateNanoGrid::postProcess() +{ + if constexpr(util::is_same::value) mCodec.reset(); + auto *dstGrid = this->template dstGrid(); + updateGridStats(dstGrid, mStats); +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + auto *metaData = this->dstMeta(0); + if constexpr(util::is_same::value || + util::is_same::value) { + static_assert(util::is_same::value, "expected DstBuildT==uint32_t"); + auto *dstData0 = this->template dstNode(0)->data(); + dstData0->mMinimum = 0; // start of prefix sum + dstData0->mMaximum = dstData0->mValues[511u]; + for (uint64_t i=1, n=mSrcNodeAcc.nodeCount(0); imMinimum = dstData0->mMinimum + dstData0->mMaximum; + dstData1->mMaximum = dstData1->mValues[511u]; + dstData0 = dstData1; + } + for (size_t i = 0, n = dstGrid->blindDataCount(); i < n; ++i, ++metaData) { + if constexpr(util::is_same::value) { + if (metaData->mDataClass != GridBlindDataClass::IndexArray) continue; + if (metaData->mDataType == GridType::UInt32) { + uint32_t *blindData = const_cast(metaData->template getBlindData()); + util::forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto j = r.begin(); j != r.end(); ++j, ++dstLeaf) { + uint32_t* p = blindData + dstLeaf->mMinimum; + for (uint32_t idx : mSrcNodeAcc.template node<0>(j).indices()) *p++ = idx; + } + }); + } + } else {// if constexpr(util::is_same::value) + if (metaData->mDataClass != GridBlindDataClass::AttributeArray) continue; + if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, reinterpret_cast(blindData)); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else { + char str[16]; + std::cerr << "unsupported point attribute \"" << toStr(str, metaData->mDataType) << "\"\n"; + } + }// if + }// loop + } else { // if + (void)metaData; + } +#endif + updateChecksum(dstGrid, mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +inline typename util::enable_if::is_index>::type +CreateNanoGrid::postProcess(uint32_t channels) +{ + char str[16]; + const std::string typeName = toStr(str, toGridType()); + const uint64_t valueCount = this->valueCount(); + auto *dstGrid = this->template dstGrid(); + for (uint32_t i=0; ifindBlindData(name.c_str()); + if (j<0) throw std::runtime_error("missing " + name); + auto *metaData = this->dstMeta(j);// partially set in processGrid + metaData->mDataClass = GridBlindDataClass::ChannelArray; + metaData->mDataType = toGridType(); + SrcValueT *blindData = const_cast(metaData->template getBlindData()); + if (i>0) {// concurrent copy from previous channel + util::forEach(0,valueCount,1024,[&](const util::Range1D &r){ + SrcValueT *dst=blindData+r.begin(), *end=dst+r.size(), *src=dst-valueCount; + while(dst!=end) *dst++ = *src++; + }); + } else { + this->template copyValues(blindData); + } + }// loop over channels + updateGridStats(this->template dstGrid(), std::min(StatsMode::BBox, mStats)); + updateChecksum(dstGrid, mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +typename util::enable_if::is_index>::type +CreateNanoGrid::copyValues(SrcValueT *buffer) +{// copy values from the source grid into the provided buffer + assert(mBufferPtr && buffer); + using StatsT = typename FloatTraits::FloatType; + + if (this->valueCount()==0) this->template countValues(); + + auto copyNodeValues = [&](const auto &node, SrcValueT *v) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = node.cbeginValueOn(); it; ++it) *v++ = *it; + } else { + for (auto it = node.cbeginValueAll(); it; ++it) *v++ = *it; + } + if (mIncludeStats) { + if constexpr(SrcNodeAccT::IS_NANOVDB) {// resolved at compile time + *v++ = node.minimum(); + *v++ = node.maximum(); + if constexpr(util::is_same::value) { + *v++ = node.average(); + *v++ = node.stdDeviation(); + } else {// eg when SrcValueT=Vec3f and StatsT=float + *v++ = SrcValueT(node.average()); + *v++ = SrcValueT(node.stdDeviation()); + } + } else {// openvdb and nanovdb::tools::build::Grid have no stats + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + } + } + };// copyNodeValues + + const SrcRootT &root = mSrcNodeAcc.root(); + buffer[0] = root.background();// Value array always starts with the background value + if (mIncludeTiles) { + copyNodeValues(root, buffer + 1u); + util::forEach(0, mSrcNodeAcc.nodeCount(2), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<2>(i), buffer + mValIdx[2][i]); + } + }); + util::forEach(0, mSrcNodeAcc.nodeCount(1), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<1>(i), buffer + mValIdx[1][i]); + } + }); + } + util::forEach(0, mSrcNodeAcc.nodeCount(0), 4, [&](const util::Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<0>(i), buffer + mValIdx[0][i]); + } + }); +}// CreateNanoGrid::copyValues + + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) + +template +template +typename util::disable_if::value || + util::is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(util::is_same::value, "expected default template parameter"); + return 0u; +}// CreateNanoGrid::countPoints + +template +template +typename util::enable_if::value || + util::is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(util::is_same::value, "expected default template parameter"); + return util::reduce(0, mSrcNodeAcc.nodeCount(0), 8, uint64_t(0), [&](auto &r, uint64_t sum) { + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getLastValue(); + return sum;}, std::plus()); +}// CreateNanoGrid::countPoints + +template +template +typename util::enable_if::value>::type +CreateNanoGrid::copyPointAttribute(size_t attIdx, AttT *attPtr) +{ + static_assert(util::is_same::value, "Expected default parameter"); + using HandleT = openvdb::points::AttributeHandle; + util::forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstLeaf = this->template dstNode(r.begin()); + for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { + auto& srcLeaf = mSrcNodeAcc.template node<0>(i); + HandleT handle(srcLeaf.constAttributeArray(attIdx)); + AttT *p = attPtr + dstLeaf->mMinimum; + for (auto iter = srcLeaf.beginIndexOn(); iter; ++iter) *p++ = handle.get(*iter); + } + }); +}// CreateNanoGrid::copyPointAttribute + +#endif + +//================================================================================================ + +template +typename util::disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels, + bool includeStats, + bool includeTiles, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setVerbose(verbose); + return converter.template getHandle(channels, includeStats, includeTiles, buffer); +} + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + bool ditherOn, + int verbose, + const OracleT &oracle, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(oracle, buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename util::enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + CheckMode cMode, + bool ditherOn, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode, + CheckMode cMode, + int verbose) +{ + // We need to define these types because they are not defined in OpenVDB + using openvdb_Vec4fTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4dTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4fGrid = openvdb::Grid; + using openvdb_Vec4dGrid = openvdb::Grid; + using openvdb_UInt32Grid = openvdb::Grid; + + if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else { + OPENVDB_THROW(openvdb::RuntimeError, "Unrecognized OpenVDB grid type"); + } +}// openToNanoVDB +#endif + +}// namespace tools =============================================================================== + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CREATENANOGRID_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/CreatePrimitives.h b/external/nanovdb/tools/CreatePrimitives.h new file mode 100644 index 00000000..95b84918 --- /dev/null +++ b/external/nanovdb/tools/CreatePrimitives.h @@ -0,0 +1,1752 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/CreatePrimitives.h + + \author Ken Museth + + \date June 26, 2020 + + \brief Generates volumetric primitives, e.g. sphere, torus etc, as NanoVDB grid. + + \note This has no dependency on openvdb. +*/ + +#ifndef NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED + +#define NANOVDB_PARALLEL_PRIMITIVES + +#include +#include +#include // for util::forEach and util::Range + +namespace nanovdb { + +namespace tools {// =================================================== + +/// @brief Returns a handle to a narrow-band level set of a sphere +/// +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a sphere such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// sphere to 1 at the halfWidth and interior of the sphere. +/// +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when BuildT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a sphere. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param radius Radius of sphere in world units +/// @param center Center of sphere in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename util::disable_if::value, GridHandle>::type +createPointSphere(int pointsPerVoxel = 1, + double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_points", + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a torus in the xz-plane +/// +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a torus in the xz-plane such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// torus to 1 at the halfWidth and interior of the torus. +/// +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a torus. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param majorRadius Major radius of torus in world units +/// @param minorRadius Minor radius of torus in world units +/// @param center Center of torus in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param cMode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +// +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename util::disable_if::value, GridHandle>::type +createPointTorus(int pointsPerVoxel = 1, // half-width of narrow band in voxel units + double majorRadius = 100.0, // major radius of torus in world units + double minorRadius = 50.0, // minor radius of torus in world units + const Vec3d& center = Vec3d(0.0), // center of torus in world units + double voxelSize = 1.0, // size of a voxel in world units + const Vec3d& origin = Vec3d(0.0f), // origin of grid in world units + const std::string& name = "torus_points", // name of grid + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a box +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of a box such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// box to 1 at the halfWidth and interior of the box. +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a octahedron +/// +/// @param scale Scale of octahedron in world units +/// @param center Center of octahedron in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a sparse fog volume of an octahedron such +/// that the exterior is 0 and inactive, the interior is active +/// with values varying smoothly from 0 at the surface of the +/// octahedron to 1 at the halfWidth and interior of the octahedron. +/// +/// @param scale Scale of octahedron in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_fog", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_fog_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Returns a handle to a narrow-band level set of a bounding-box (= wireframe of a box) +/// +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param thickness Thickness of the wire in world units +/// @param center Center of bbox in world units +/// @param voxelSize Size of a voxel in world units +/// @param halfWidth Half-width of narrow band in voxel units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param sMode Mode of computation for the statistics. +/// @param cMode Mode of computation for the checksum. +/// @param tolerance Global error tolerance use when VoxelT = FpN +/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param buffer Buffer used for memory allocation by the handle +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance +/// argument is only used when BuildT is set to FpN. +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "bbox_ls", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "bbox_ls_FpN", + StatsMode sMode = StatsMode::Default, + CheckMode cMode = CheckMode::Default, + float tolerance = -1.0f, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + + +//================================================================================================ + +/// @brief Returns a handle to a PointDataGrid containing points scattered +/// on the surface of a box. +/// +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param width Width of box in world units +/// @param height Height of box in world units +/// @param depth Depth of box in world units +/// @param center Center of box in world units +/// @param voxelSize Size of a voxel in world units +/// @param origin Origin of grid in world units +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +template +typename util::disable_if::value, GridHandle>::type +createPointBox(int pointsPerVoxel = 1, // half-width of narrow band in voxel units + double width = 40.0, // width of box in world units + double height = 60.0, // height of box in world units + double depth = 100.0, // depth of box in world units + const Vec3d& center = Vec3d(0.0), // center of box in world units + double voxelSize = 1.0, // size of a voxel in world units + const Vec3d& origin = Vec3d(0.0), // origin of grid in world units + const std::string& name = "box_points", // name of grid + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +/// @brief Given an input NanoVDB voxel grid this methods returns a GridHandle to another NanoVDB +/// PointDataGrid with points scattered in the active leaf voxels of in input grid. Note, the +/// coordinates of the points are encoded as blind data in world-space. +/// +/// @param srcGrid Const input grid used to determine the active voxels to scatter points into +/// @param pointsPerVoxel Number of point per voxel on on the surface +/// @param name Name of the grid +/// @param mode Mode of computation for the checksum. +/// @param buffer Buffer used for memory allocation by the handle +template +inline GridHandle +createPointScatter(const NanoGrid& srcGrid, // source grid used to scatter points into + int pointsPerVoxel = 1, // half-width of narrow band in voxel units + const std::string& name = "point_scatter", // name of grid + CheckMode mode = CheckMode::Default, + const BufferT& buffer = BufferT()); + +//================================================================================================ + +namespace { + +/// @brief Returns a shared pointer to a build::Grid containing a narrow-band SDF values for a sphere +/// +/// @brief Note, this is not (yet) a valid level set SDF field since values inside sphere (and outside +/// the narrow band) are still undefined. Call builder::sdfToLevelSet() to set those +/// values or alternatively call builder::levelSetToFog to generate a FOG volume. +/// +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. +template +std::shared_ptr> +initSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initSphere: expect floating point"); + if (!(radius > 0)) + throw std::runtime_error("Sphere: radius must be positive!"); + if (!(voxelSize > 0)) + throw std::runtime_error("Sphere: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Sphere: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define radius of sphere with narrow-band in voxel units + const ValueT r0 = radius / ValueT(voxelSize), rmax = r0 + ValueT(halfWidth); + + // Radius below the Nyquist frequency + if (r0 < ValueT(1.5f)) return grid; + + // Define center of sphere in voxel units + const math::Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define bounds of the voxel coordinates + const int imin = math::Floor(c[0] - rmax), imax = math::Ceil(c[0] + rmax); + const int jmin = math::Floor(c[1] - rmax), jmax = math::Ceil(c[1] + rmax); + const int kmin = math::Floor(c[2] - rmax), kmax = math::Ceil(c[2] + rmax); + + const util::Range<1,int> range(imin, imax+1, 32); + + auto kernel = [&](const util::Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to sphere using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = math::Pow2(ValueT(i) - c[0]); + for (j = jmin; j <= jmax; ++j) { + const auto x2y2 = math::Pow2(ValueT(j) - c[1]) + x2; + for (k = kmin; k <= kmax; k += m) { + m = 1; + const auto v = math::Sqrt(x2y2 + math::Pow2(ValueT(k) - c[2])) - r0; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initSphere + +template +std::shared_ptr> +initTorus(double radius1, // major radius of torus in world units + double radius2, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initTorus: expect floating point"); + if (!(radius2 > 0)) + throw std::runtime_error("Torus: radius2 must be positive!"); + if (!(radius1 > radius2)) + throw std::runtime_error("Torus: radius1 must be larger than radius2!"); + if (!(voxelSize > 0)) + throw std::runtime_error("Torus: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Torus: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of torus with narrow-band in voxel units + const ValueT r1 = radius1 / ValueT(voxelSize), r2 = radius2 / ValueT(voxelSize), rmax1 = r1 + r2 + ValueT(halfWidth), rmax2 = r2 + ValueT(halfWidth); + + // Radius below the Nyquist frequency + if (r2 < ValueT(1.5)) return grid; + + // Define center of torus in voxel units + const math::Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define bounds of the voxel coordinates + const int imin = math::Floor(c[0] - rmax1), imax = math::Ceil(c[0] + rmax1); + const int jmin = math::Floor(c[1] - rmax2), jmax = math::Ceil(c[1] + rmax2); + const int kmin = math::Floor(c[2] - rmax1), kmax = math::Ceil(c[2] + rmax1); + + const util::Range<1,int> range(imin, imax+1, 32); + auto kernel = [&](const util::Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to torus using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = math::Pow2(ValueT(i) - c[0]); + for (k = kmin; k <= kmax; ++k) { + const auto x2z2 = math::Pow2(math::Sqrt(math::Pow2(ValueT(k) - c[2]) + x2) - r1); + for (j = jmin; j <= jmax; j += m) { + m = 1; + const auto v = math::Sqrt(x2z2 + math::Pow2(ValueT(j) - c[1])) - r2; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel + +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; +} // initTorus + +template +std::shared_ptr> +initBox(double width, // major radius of torus in world units + double height, // minor radius of torus in world units + double depth, + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initBox: expect floating point"); + using Vec3T = math::Vec3; + if (!(width > 0)) + throw std::runtime_error("Box: width must be positive!"); + if (!(height > 0)) + throw std::runtime_error("Box: height must be positive!"); + if (!(depth > 0)) + throw std::runtime_error("Box: depth must be positive!"); + + if (!(voxelSize > 0)) + throw std::runtime_error("Box: voxelSize must be positive!"); + if (!(halfWidth > 0)) + throw std::runtime_error("Box: halfWidth must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of box with narrow-band in voxel units + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); + + // Below the Nyquist frequency + if (r.min() < ValueT(1.5)) return grid; + + // Define center of box in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; + auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - r - Vec3T(ValueT(halfWidth)), c + r + Vec3T(ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to box using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const auto q1 = math::Abs(ValueT(p[0]) - c[0]) - r[0]; + const auto x2 = math::Pow2(Pos(q1)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const auto q2 = math::Abs(ValueT(p[1]) - c[1]) - r[1]; + const auto q0 = math::Max(q1, q2); + const auto x2y2 = x2 + math::Pow2(Pos(q2)); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const auto q3 = math::Abs(ValueT(p[2]) - c[2]) - r[2]; + const auto v = math::Sqrt(x2y2 + math::Pow2(Pos(q3))) + Neg(math::Max(q0, q3)); // Distance in voxel units + const auto d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initBox + +template +std::shared_ptr> +initBBox(double width, // width of the bbox in world units + double height, // height of the bbox in world units + double depth, // depth of the bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "initBBox: expect floating point"); + using Vec3T = math::Vec3; + if (!(width > 0)) + throw std::runtime_error("BBox: width must be positive!"); + if (!(height > 0)) + throw std::runtime_error("BBox: height must be positive!"); + if (!(depth > 0)) + throw std::runtime_error("BBox: depth must be positive!"); + if (!(thickness > 0)) + throw std::runtime_error("BBox: thickness must be positive!"); + if (!(voxelSize > 0.0)) + throw std::runtime_error("BBox: voxelSize must be positive!"); + + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of bbox with narrow-band in voxel units + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); + const ValueT e = thickness / ValueT(voxelSize); + + // Below the Nyquist frequency + if (r.min() < ValueT(1.5) || e < ValueT(1.5)) return grid; + + // Define center of bbox in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto Pos = [](ValueT x) { return x > 0 ? x : 0; }; + auto Neg = [](ValueT x) { return x < 0 ? x : 0; }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - r - Vec3T(e + ValueT(halfWidth)), c + r + Vec3T(e + ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to bbox using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = math::Abs(ValueT(p[0]) - c[0]) - r[0]; + const ValueT qx = math::Abs(ValueT(px) + e) - e; + const ValueT px2 = math::Pow2(Pos(px)); + const ValueT qx2 = math::Pow2(Pos(qx)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = math::Abs(ValueT(p[1]) - c[1]) - r[1]; + const ValueT qy = math::Abs(ValueT(py) + e) - e; + const ValueT qy2 = math::Pow2(Pos(qy)); + const ValueT px2qy2 = px2 + qy2; + const ValueT qx2py2 = qx2 + math::Pow2(Pos(py)); + const ValueT qx2qy2 = qx2 + qy2; + const ValueT a[3] = {math::Max(px, qy), math::Max(qx, py), math::Max(qx, qy)}; + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = math::Abs(ValueT(p[2]) - c[2]) - r[2]; + const ValueT qz = math::Abs(ValueT(pz) + e) - e; + const ValueT qz2 = math::Pow2(Pos(qz)); + const ValueT s1 = math::Sqrt(px2qy2 + qz2) + Neg(math::Max(a[0], qz)); + const ValueT s2 = math::Sqrt(qx2py2 + qz2) + Neg(math::Max(a[1], qz)); + const ValueT s3 = math::Sqrt(qx2qy2 + math::Pow2(Pos(pz))) + Neg(math::Max(a[2], pz)); + const ValueT v = math::Min(s1, math::Min(s2, s3)); // Distance in voxel units + const ValueT d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; //kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; +} // initBBox + +template +std::shared_ptr> +initOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of octahedron in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units +{ + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; + using Vec3T = math::Vec3; + static_assert(util::is_floating_point::value, "initOctahedron: expect floating point"); + + if (!(scale > 0)) throw std::runtime_error("Octahedron: width must be positive!"); + if (!(voxelSize > 0)) throw std::runtime_error("Octahedron: voxelSize must be positive!"); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); + + // Define size of octahedron with narrow-band in voxel units + const ValueT s = scale / (2 * ValueT(voxelSize)); + + // Below the Nyquist frequency + if ( s < ValueT(1.5) ) return grid; + + // Define center of octahedron in voxel units + const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), + ValueT(center[1] - origin[1]) / ValueT(voxelSize), + ValueT(center[2] - origin[2]) / ValueT(voxelSize)); + + // Define utility functions + auto sdf = [&s](ValueT x, ValueT y, ValueT z) { + const ValueT d = ValueT(0.5)*(z - y + s); + if (d < ValueT(0)) { + return Vec3T(x, y - s, z).length(); + } else if (d > s) { + return Vec3T(x, y, z - s).length(); + } + return Vec3T(x, y - s + d, z - d).length(); + }; + + // Define bounds of the voxel coordinates + const math::BBox b(c - Vec3T(s + ValueT(halfWidth)), c + Vec3T(s + ValueT(halfWidth))); + const CoordBBox bbox(Coord(math::Floor(b[0][0]), math::Floor(b[0][1]), math::Floor(b[0][2])), + Coord(math::Ceil(b[1][0]), math::Ceil(b[1][1]), math::Ceil(b[1][2]))); + const util::Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); + + // Compute signed distances to octahedron using leapfrogging in k + auto kernel = [&](const util::Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + static const ValueT a = math::Sqrt(ValueT(1)/ValueT(3)); + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = math::Abs(ValueT(p[0]) - c[0]); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = math::Abs(ValueT(p[1]) - c[1]); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = math::Abs(ValueT(p[2]) - c[2]); + ValueT d = px + py + pz - s; + ValueT v; + if (ValueT(3)*px < d) { + v = sdf(px, py, pz); + } else if (ValueT(3)*py < d) { + v = sdf(py, pz, px); + } else if (ValueT(3)*pz < d) { + v = sdf(pz, px, py); + } else { + v = a * d; + } + d = math::Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += math::Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + util::forEach(range, kernel); +#else + kernel(range); +#endif + return grid; +} // initOctahedron + +} // unnamed namespace + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance,// only used if VoxelT = FpN + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeSphere + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance,// only used if VoxelT = FpN + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointSphere(int pointsPerVoxel, // number of points to be scattered in each active voxel + double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto sphereHandle = createLevelSetSphere(radius, center, voxelSize, 0.5, origin, "dummy", + StatsMode::BBox, CheckMode::Disable, buffer); + assert(sphereHandle); + auto* sphereGrid = sphereHandle.template grid(); + assert(sphereGrid); + auto pointHandle = createPointScatter(*sphereGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointSphere + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetTorus + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeTorus + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointTorus(int pointsPerVoxel, // number of points to be scattered in each active voxel + double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto torusHandle = createLevelSetTorus(majorRadius, minorRadius, center, voxelSize, 0.5f, origin, + "dummy", StatsMode::BBox, CheckMode::Disable, buffer); + assert(torusHandle); + auto* torusGrid = torusHandle.template grid(); + assert(torusGrid); + auto pointHandle = createPointScatter(*torusGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointTorus + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetOctahedron + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetOctahedron + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createLevelSetBBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeBox + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeBox + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeOctahedron + +//================================================================================================ + +template +typename util::enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + CheckMode cMode, // mode of computation for the checksum + float tolerance, + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + AbsDiff oracle(tolerance); + auto handle = converter.template getHandle(oracle, buffer); + assert(handle); + return handle; +} // createFogVolumeOctahedron + +//================================================================================================ + +template +typename util::disable_if::value, GridHandle>::type +createPointBox(int pointsPerVoxel, // number of points to be scattered in each active voxel + double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + auto boxHandle = createLevelSetBox(width, height, depth, center, voxelSize, 0.5, origin, "dummy", + StatsMode::BBox, CheckMode::Disable, buffer); + assert(boxHandle); + auto* boxGrid = boxHandle.template grid(); + assert(boxGrid); + auto pointHandle = createPointScatter(*boxGrid, pointsPerVoxel, name, cMode, buffer); + assert(pointHandle); + return pointHandle; +} // createPointBox + +//================================================================================================ + +template +inline GridHandle +createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units + int pointsPerVoxel, // number of points to be scattered in each active voxel + const std::string& name, // name of grid + CheckMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using ValueT = typename BuildToValueMap::type; + static_assert(util::is_floating_point::value, "createPointScatter: expect floating point"); + using Vec3T = math::Vec3; + if (pointsPerVoxel < 1) { + throw std::runtime_error("createPointScatter: Expected at least one point per voxel"); + } + if (!srcGrid.isLevelSet()) { + throw std::runtime_error("createPointScatter: Expected a level set grid"); + } + if (!srcGrid.hasBBox()) { + throw std::runtime_error("createPointScatter: ActiveVoxelCount is required"); + } + const uint64_t pointCount = pointsPerVoxel * srcGrid.activeVoxelCount(); + if (pointCount == 0) { + throw std::runtime_error("createPointScatter: No particles to scatter"); + } + std::vector xyz; + xyz.reserve(pointCount); + using DstGridT = build::Grid; + DstGridT dstGrid(std::numeric_limits::max(), name, GridClass::PointData); + dstGrid.mMap = srcGrid.map(); + auto dstAcc = dstGrid.getAccessor(); + std::srand(1234); + const ValueT s = 1 / (1 + ValueT(RAND_MAX)); // scale so s*rand() is in ] 0, 1 [ + // return a point with random local voxel coordinates (-0.5 to +0.5) + auto randomPoint = [&s](){return s * Vec3T(rand(), rand(), rand()) - Vec3T(0.5);}; + const auto& srcTree = srcGrid.tree(); + auto srcMgrHandle = createNodeManager(srcGrid); + auto *srcMgr = srcMgrHandle.template mgr(); + assert(srcMgr); + for (uint32_t i = 0, end = srcTree.nodeCount(0); i < end; ++i) { + auto& srcLeaf = srcMgr->leaf(i); + auto* dstLeaf = dstAcc.setValue(srcLeaf.origin(), pointsPerVoxel); // allocates leaf node + dstLeaf->mValueMask = srcLeaf.valueMask(); + for (uint32_t j = 0, m = 0; j < 512; ++j) { + if (dstLeaf->mValueMask.isOn(j)) { + const Vec3f ijk = dstLeaf->offsetToGlobalCoord(j).asVec3s();// floating-point representatrion of index coorindates + for (int n = 0; n < pointsPerVoxel; ++n) xyz.push_back(srcGrid.indexToWorld(randomPoint() + ijk)); + m += pointsPerVoxel; + }// active voxels + dstLeaf->mValues[j] = m; + }// loop over all voxels + }// loop over leaf nodes + assert(pointCount == xyz.size()); + CreateNanoGrid converter(dstGrid); + converter.setStats(StatsMode::MinMax); + converter.setChecksum(CheckMode::Disable); + + converter.addBlindData(name, + GridBlindDataSemantic::WorldCoords, + GridBlindDataClass::AttributeArray, + toGridType(), + pointCount, + sizeof(Vec3T)); + auto handle = converter.template getHandle(buffer); + assert(handle); + + auto* grid = handle.template grid(); + assert(grid && grid->template isSequential<0>()); + auto &tree = grid->tree(); + if (tree.nodeCount(0) == 0) throw std::runtime_error("Expect leaf nodes!"); + auto *leafData = tree.getFirstLeaf()->data(); + leafData[0].mMinimum = 0; // start of prefix sum + for (uint32_t i = 1, n = tree.nodeCount(0); i < n; ++i) { + leafData[i].mMinimum = leafData[i - 1].mMinimum + leafData[i - 1].mMaximum; + } + if (Vec3T *blindData = grid->template getBlindData(0)) { + memcpy(blindData, xyz.data(), xyz.size() * sizeof(Vec3T)); + } else { + throw std::runtime_error("Blind data pointer was NULL"); + } + updateChecksum(grid, cMode); + return handle; +} // createPointScatter + +}// namespace tools + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_PRIMITIVES_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/GridBuilder.h b/external/nanovdb/tools/GridBuilder.h new file mode 100644 index 00000000..428215ba --- /dev/null +++ b/external/nanovdb/tools/GridBuilder.h @@ -0,0 +1,2315 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/GridBuilder.h + + \author Ken Museth + + \date June 26, 2020 + + \brief This file defines a minimum set of tree nodes and tools that + can be used (instead of OpenVDB) to build nanovdb grids on the CPU. +*/ + +#ifndef NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED + +#include + +#include +#include +#include // for stringstream +#include +#include // for memcpy +#include +#include +#include + +#include +#include +#include + +namespace nanovdb { + +namespace tools::build { + +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- + +template struct GetValue; +template struct SetValue; +template struct TouchLeaf; +template struct GetState; +template struct ProbeValue; + +// ----------------------------> RootNode <-------------------------------------- + +template +struct RootNode +{ + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v, bool s) : child(nullptr), value(v), state(s) {} + bool isChild() const { return child!=nullptr; } + bool isValue() const { return child==nullptr; } + bool isActive() const { return child==nullptr && state; } + ChildT* child; + ValueType value; + bool state; + }; + using MapT = std::map; + MapT mTable; + ValueType mBackground; + + Tile* probeTile(const Coord &ijk) { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); + } + + const Tile* probeTile(const Coord &ijk) const { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); + } + + class ChildIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ChildIterator() : mParent(nullptr), mIter() {} + ChildIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child==nullptr) ++mIter; + } + ChildIterator& operator=(const ChildIterator&) = default; + ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mIter->second.child;} + ChildT* operator->() const {NANOVDB_ASSERT(*this); return mIter->second.child;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ChildIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child==nullptr) ++mIter; + return *this; + } + ChildIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ChildIterator + + ChildIterator cbeginChild() const {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb + + class ValueIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueIterator() : mParent(nullptr), mIter() {} + ValueIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + } + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + bool isActive() const {NANOVDB_ASSERT(*this); return mIter->second.state;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + return *this;; + } + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + class ValueOnIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueOnIterator() : mParent(nullptr), mIter() {} + ValueOnIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueOnIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + return *this;; + } + ValueOnIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + class TileIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + TileIterator() : mParent(nullptr), mIter() {} + TileIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + NANOVDB_ASSERT(mParent); + } + TileIterator& operator=(const TileIterator&) = default; + const Tile& operator*() const {NANOVDB_ASSERT(*this); return mIter->second;} + const Tile* operator->() const {NANOVDB_ASSERT(*this); return &(mIter->second);} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + const ChildT* probeChild(ValueType &value) { + NANOVDB_ASSERT(*this); + const ChildT *child = mIter->second.child; + if (child==nullptr) value = mIter->second.value; + return child; + } + bool isValueOn() const {return mIter->second.child==nullptr && mIter->second.state;} + TileIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + return *this; + } + TileIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class TileIterator + + TileIterator beginTile() {return TileIterator(this);} + TileIterator cbeginChildAll() const {return TileIterator(this);} + + //class DenseIterator : public TileIterator + + RootNode(const ValueType& background) : mBackground(background) {} + RootNode(const RootNode&) = delete; // disallow copy-construction + RootNode(RootNode&&) = default; // allow move construction + RootNode& operator=(const RootNode&) = delete; // disallow copy assignment + RootNode& operator=(RootNode&&) = default; // allow move assignment + + ~RootNode() { this->clear(); } + + uint32_t tileCount() const { return uint32_t(mTable.size()); } + uint32_t getTableSize() const { return uint32_t(mTable.size()); }// match openvdb + const ValueType& background() const {return mBackground;} + + void nodeCount(std::array &count) const + { + for (auto it = this->cbeginChild(); it; ++it) { + count[ChildT::LEVEL] += 1; + it->nodeCount(count); + } + } + + bool empty() const { return mTable.empty(); } + + void clear() + { + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) delete iter->second.child; + mTable.clear(); + } + + static Coord CoordToKey(const Coord& ijk) { return ijk & ~ChildT::MASK; } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) return child->template get(ijk, args...); + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + template + auto set(const Coord& ijk, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) { + acc.insert(ijk, child); + return child->template get(ijk, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + ValueType getValue(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + ValueType operator()(const Coord& ijk) const {return this->template get>(ijk);} + ValueType operator()(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType& value) {this->template set>(ijk, value);} + bool probeValue(const Coord& ijk, ValueType& value) const {return this->template get>(ijk, value);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const + { +#if 1 + if (auto *tile = this->probeTile(ijk)) return tile->child ? tile->child->getValue(ijk) : tile->value; + return mBackground; +#else + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) { + return mBackground; + } else if (iter->second.child) { + return iter->second.child->getValue(ijk); + } else { + return iter->second.value; + } +#endif + } + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + + void setValue(const Coord& ijk, const ValueType& value) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + child->setValue(ijk, value); + } + + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) + return false; + if (iter->second.child) { + acc.insert(ijk, iter->second.child); + return iter->second.child->isActiveAndCache(ijk, acc); + } + return iter->second.state; + } + + template + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const + { + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) + return mBackground; + if (iter->second.child) { + acc.insert(ijk, iter->second.child); + return iter->second.child->getValueAndCache(ijk, acc); + } + return iter->second.value; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + child->setValueAndCache(ijk, value, acc); + } + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); + } + template + void touchLeafAndCache(const Coord &ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + acc.insert(ijk, child); + child->touchLeafAndCache(ijk, acc); + } +#endif// NANOVDB_NEW_ACCESSOR_METHODS + + template + uint32_t nodeCount() const + { + static_assert(util::is_same::value, "Root::getNodes: Invalid type"); + static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); + uint32_t sum = 0; + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) continue; // skip tiles + if constexpr(util::is_same::value) { //resolved at compile-time + ++sum; + } else { + sum += iter->second.child->template nodeCount(); + } + } + return sum; + } + + template + void getNodes(std::vector& array) + { + static_assert(util::is_same::value, "Root::getNodes: Invalid type"); + static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) + continue; + if constexpr(util::is_same::value) { //resolved at compile-time + array.push_back(reinterpret_cast(iter->second.child)); + } else { + iter->second.child->getNodes(array); + } + } + } + + void addChild(ChildT*& child) + { + NANOVDB_ASSERT(child); + const Coord key = CoordToKey(child->mOrigin); + auto iter = mTable.find(key); + if (iter != mTable.end() && iter->second.child != nullptr) { // existing child node + delete iter->second.child; + iter->second.child = child; + } else { + mTable[key] = Tile(child); + } + child = nullptr; + } + + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1, 2 or 3. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if constexpr(level == LEVEL) { + if (iter == mTable.end()) { + mTable[key] = Tile(value, state); + } else if (iter->second.child == nullptr) { + iter->second.value = value; + iter->second.state = state; + } else { + delete iter->second.child; + iter->second.child = nullptr; + iter->second.value = value; + iter->second.state = state; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + child->template addTile(ijk, value, state); + } + } + + template + void addNode(NodeT*& node) + { + if constexpr(util::is_same::value) { //resolved at compile-time + this->addChild(reinterpret_cast(node)); + } else { + ChildT* child = nullptr; + const Coord key = CoordToKey(node->mOrigin); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(node->mOrigin, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(node->mOrigin, iter->second.value, iter->second.state); + iter->second.child = child; + } + child->addNode(node); + } + } + + void merge(RootNode &other) + { + for (auto iter1 = other.mTable.begin(); iter1 != other.mTable.end(); ++iter1) { + if (iter1->second.child == nullptr) continue;// ignore input tiles + auto iter2 = mTable.find(iter1->first); + if (iter2 == mTable.end() || iter2->second.child == nullptr) { + mTable[iter1->first] = Tile(iter1->second.child); + iter1->second.child = nullptr; + } else { + iter2->second.child->merge(*iter1->second.child); + } + } + other.clear(); + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::RootNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +RootNode::signedFloodFill(T outside) +{ + std::map nodeKeys; + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { + if (iter->second.child == nullptr) + continue; + nodeKeys.insert(std::pair(iter->first, iter->second.child)); + } + + // We employ a simple z-scanline algorithm that inserts inactive tiles with + // the inside value if they are sandwiched between inside child nodes only! + auto b = nodeKeys.begin(), e = nodeKeys.end(); + if (b == e) + return; + for (auto a = b++; b != e; ++a, ++b) { + Coord d = b->first - a->first; // delta of neighboring coordinates + if (d[0] != 0 || d[1] != 0 || d[2] == int(ChildT::DIM)) + continue; // not same z-scanline or neighbors + const ValueType fill[] = {a->second->getLastValue(), b->second->getFirstValue()}; + if (!(fill[0] < 0) || !(fill[1] < 0)) + continue; // scanline isn't inside + Coord c = a->first + Coord(0u, 0u, ChildT::DIM); + for (; c[2] != b->first[2]; c[2] += ChildT::DIM) { + const Coord key = RootNode::CoordToKey(c); + mTable[key] = typename RootNode::Tile(-outside, false); // inactive tile + } + } +} // tools::build::RootNode::signedFloodFill + +// ----------------------------> InternalNode <-------------------------------------- + +template +struct InternalNode +{ + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; + static constexpr uint32_t LOG2DIM = ChildT::LOG2DIM + 1; + static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; //dimension in index space + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); //number of tile values (or child pointers) + static constexpr uint32_t MASK = DIM - 1; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using MaskT = Mask; + template + using MaskIterT = typename MaskT::template Iterator; + using NanoNodeT = typename NanoNode::Type; + + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v) : value(v) {} + union{ + ChildT* child; + ValueType value; + }; + }; + Coord mOrigin; + MaskT mValueMask; + MaskT mChildMask; + Tile mTable[SIZE]; + + union { + NanoNodeT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits child nodes of this node only + class ChildIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ChildIterator() : BaseT(), mParent(nullptr) {} + ChildIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOn()), mParent(parent) {} + ChildIterator& operator=(const ChildIterator&) = default; + const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->mTable[BaseT::pos()].child;} + const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].child;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return (*this)->origin();} + }; // Member class ChildIterator + + ChildIterator beginChild() {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb + + /// @brief Visits all tile values in this node, i.e. both inactive and active tiles + class ValueIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueIterator() : BaseT(), mParent(nullptr) {} + ValueIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOff()), mParent(parent) {} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(BaseT::pos());} + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + /// @brief Visits active tile values of this node only + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const InternalNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public MaskT::DenseIterator + { + using BaseT = typename MaskT::DenseIterator; + const InternalNode *mParent; + public: + DenseIterator() : BaseT(), mParent(nullptr) {} + DenseIterator(const InternalNode* parent) : BaseT(0), mParent(parent) {} + DenseIterator& operator=(const DenseIterator&) = default; + ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + ChildT *child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->mTable[BaseT::pos()].child; + } else { + value = mParent->mTable[BaseT::pos()].value; + } + return child; + } + Coord getCoord() const { NANOVDB_ASSERT(mParent && bool(*this)); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class DenseIterator + + DenseIterator beginDense() {return DenseIterator(this);} + DenseIterator cbeginChildAll() const {return DenseIterator(this);}// matches openvdb + + InternalNode(const Coord& origin, const ValueType& value, bool state) + : mOrigin(origin & ~MASK) + , mValueMask(state) + , mChildMask() + , mDstOffset(0) + { + for (uint32_t i = 0; i < SIZE; ++i) mTable[i].value = value; + } + InternalNode(const InternalNode&) = delete; // disallow copy-construction + InternalNode(InternalNode&&) = delete; // disallow move construction + InternalNode& operator=(const InternalNode&) = delete; // disallow copy assignment + InternalNode& operator=(InternalNode&&) = delete; // disallow move assignment + ~InternalNode() + { + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + delete mTable[*iter].child; + } + } + const MaskT& getValueMask() const {return mValueMask;} + const MaskT& valueMask() const {return mValueMask;} + const MaskT& getChildMask() const {return mChildMask;} + const MaskT& childMask() const {return mChildMask;} + const Coord& origin() const {return mOrigin;} + + void nodeCount(std::array &count) const + { + count[ChildT::LEVEL] += mChildMask.countOn(); + if constexpr(ChildT::LEVEL>0) { + for (auto it = const_cast(this)->beginChild(); it; ++it) it->nodeCount(count); + } + } + + static uint32_t CoordToOffset(const Coord& ijk) + { + return (((ijk[0] & int32_t(MASK)) >> ChildT::TOTAL) << (2 * LOG2DIM)) + + (((ijk[1] & int32_t(MASK)) >> ChildT::TOTAL) << (LOG2DIM)) + + ((ijk[2] & int32_t(MASK)) >> ChildT::TOTAL); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const uint32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & ((1 << LOG2DIM) - 1)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk <<= ChildT::TOTAL; + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = InternalNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + ValueType getFirstValue() const { return mChildMask.isOn(0) ? mTable[0].child->getFirstValue() : mTable[0].value; } + ValueType getLastValue() const { return mChildMask.isOn(SIZE - 1) ? mTable[SIZE - 1].child->getLastValue() : mTable[SIZE - 1].value; } + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) return mTable[n].child->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOff(n)) return OpT::get(*this, n, args...); + ChildT* child = mTable[n].child; + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template get(ijk, args...); + } else { + return child->template getAndCache(ijk, acc, args...); + } + } + + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template set(ijk, args...); + } else { + return child->template setAndCache(ijk, acc, args...); + } + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafNodeType& setValue(const Coord& ijk, const ValueType& value){return this->template set>(ijk, value);} +#else + ValueType getValue(const Coord& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + return mTable[n].child->getValue(ijk); + } + return mTable[n].value; + } + void setValue(const Coord& ijk, const ValueType& value) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->setValue(ijk, value); + } + + template + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + acc.insert(ijk, const_cast(mTable[n].child)); + return mTable[n].child->getValueAndCache(ijk, acc); + } + return mTable[n].value; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + child->setValueAndCache(ijk, value, acc); + } + + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); + } + + template + void touchLeafAndCache(const Coord &ijk, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + if constexpr(LEVEL>1) child->touchLeafAndCache(ijk, acc); + } + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + acc.insert(ijk, const_cast(mTable[n].child)); + return mTable[n].child->isActiveAndCache(ijk, acc); + } + return mValueMask.isOn(n); + } +#endif + + template + uint32_t nodeCount() const + { + static_assert(util::is_same::value, "Node::getNodes: Invalid type"); + NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); + uint32_t sum = 0; + if constexpr(util::is_same::value) { // resolved at compile-time + sum += mChildMask.countOn(); + } else if constexpr(LEVEL>1) { + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + sum += mTable[*iter].child->template nodeCount(); + } + } + return sum; + } + + template + void getNodes(std::vector& array) + { + static_assert(util::is_same::value, "Node::getNodes: Invalid type"); + NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); + for (auto iter = mChildMask.beginOn(); iter; ++iter) { + if constexpr(util::is_same::value) { // resolved at compile-time + array.push_back(reinterpret_cast(mTable[*iter].child)); + } else if constexpr(LEVEL>1) { + mTable[*iter].child->getNodes(array); + } + } + } + + void addChild(ChildT*& child) + { + NANOVDB_ASSERT(child && (child->mOrigin & ~MASK) == this->mOrigin); + const uint32_t n = CoordToOffset(child->mOrigin); + if (mChildMask.isOn(n)) { + delete mTable[n].child; + } else { + mChildMask.setOn(n); + } + mTable[n].child = child; + child = nullptr; + } + + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1 or 2. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const uint32_t n = CoordToOffset(ijk); + if constexpr(level == LEVEL) { + if (mChildMask.isOn(n)) { + delete mTable[n].child; + mTable[n] = Tile(value); + } else { + mValueMask.set(n, state); + mTable[n].value = value; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, value, state); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->template addTile(ijk, value, state); + } + } + + template + void addNode(NodeT*& node) + { + if constexpr(util::is_same::value) { //resolved at compile-time + this->addChild(reinterpret_cast(node)); + } else if constexpr(LEVEL>1) { + const uint32_t n = CoordToOffset(node->mOrigin); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(node->mOrigin, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->addNode(node); + } + } + + void merge(InternalNode &other) + { + for (auto iter = other.mChildMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + if (mChildMask.isOn(n)) { + mTable[n].child->merge(*other.mTable[n].child); + } else { + mTable[n].child = other.mTable[n].child; + other.mChildMask.setOff(n); + mChildMask.setOn(n); + } + } + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::InternalNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +InternalNode::signedFloodFill(T outside) +{ + const uint32_t first = *mChildMask.beginOn(); + if (first < NUM_VALUES) { + bool xInside = mTable[first].child->getFirstValue() < 0; + bool yInside = xInside, zInside = xInside; + for (uint32_t x = 0; x != (1 << LOG2DIM); ++x) { + const uint32_t x00 = x << (2 * LOG2DIM); // offset for block(x, 0, 0) + if (mChildMask.isOn(x00)) { + xInside = mTable[x00].child->getLastValue() < 0; + } + yInside = xInside; + for (uint32_t y = 0; y != (1u << LOG2DIM); ++y) { + const uint32_t xy0 = x00 + (y << LOG2DIM); // offset for block(x, y, 0) + if (mChildMask.isOn(xy0)) + yInside = mTable[xy0].child->getLastValue() < 0; + zInside = yInside; + for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { + const uint32_t xyz = xy0 + z; // offset for block(x, y, z) + if (mChildMask.isOn(xyz)) { + zInside = mTable[xyz].child->getLastValue() < 0; + } else { + mTable[xyz].value = zInside ? -outside : outside; + } + } + } + } + } +} // tools::build::InternalNode::signedFloodFill + +// ----------------------------> LeafNode <-------------------------------------- + +template +struct LeafNode +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + ValueType mValues[SIZE]; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues[mPos];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType& value, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + ValueType* target = mValues; + uint32_t n = SIZE; + while (n--) { + *target++ = value; + } + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& getValueMask() const {return mValueMask;} + const Mask& valueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + ValueType getFirstValue() const { return mValues[0]; } + ValueType getLastValue() const { return mValues[SIZE - 1]; } + const ValueType& getValue(uint32_t i) const {return mValues[i];} + const ValueType& getValue(const Coord& ijk) const {return mValues[CoordToOffset(ijk)];} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + const ValueType& getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValues[CoordToOffset(ijk)]; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues[n] = value; + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif + + void setValue(uint32_t n, const ValueType& value) + { + mValueMask.setOn(n); + mValues[n] = value; + } + void setValue(const Coord& ijk, const ValueType& value){this->setValue(CoordToOffset(ijk), value);} + + void merge(LeafNode &other) + { + other.mValueMask -= mValueMask; + for (auto iter = other.mValueMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + mValues[n] = other.mValues[n]; + } + mValueMask |= other.mValueMask; + } + + template + typename util::enable_if::value>::type + signedFloodFill(T outside); + +}; // tools::build::LeafNode + +//================================================================================================ + +template <> +struct LeafNode +{ + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return true;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return false;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType&, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const {ijk += mOrigin;} + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + bool getFirstValue() const { return mValueMask.isOn(0); } + bool getLastValue() const { return mValueMask.isOn(SIZE - 1); } + bool getValue(uint32_t i) const {return mValueMask.isOn(i);} + bool getValue(const Coord& ijk) const {return mValueMask.isOn(CoordToOffset(ijk));} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + bool getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } + + template + void setValueAndCache(const Coord& ijk, bool, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif + + void setValue(uint32_t n, bool) {mValueMask.setOn(n);} + void setValue(const Coord& ijk) {mValueMask.setOn(CoordToOffset(ijk));} + + void merge(LeafNode &other) + { + mValueMask |= other.mValueMask; + } + +}; // tools::build::LeafNode + +//================================================================================================ + +template <> +struct LeafNode +{ + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask, mValues; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, bool value, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) + , mValues(value) + , mDstOffset(0) + { + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + bool getFirstValue() const { return mValues.isOn(0); } + bool getLastValue() const { return mValues.isOn(SIZE - 1); } + + bool getValue(uint32_t i) const {return mValues.isOn(i);} + bool getValue(const Coord& ijk) const + { + return mValues.isOn(CoordToOffset(ijk)); + } +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } + + template + bool getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValues.isOn(CoordToOffset(ijk)); + } + + template + void setValueAndCache(const Coord& ijk, bool value, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues.setOn(n); + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } +#endif + + void setValue(uint32_t n, bool value) + { + mValueMask.setOn(n); + mValues.set(n, value); + } + void setValue(const Coord& ijk, bool value) {return this->setValue(CoordToOffset(ijk), value);} + + void merge(LeafNode &other) + { + mValues |= other.mValues; + mValueMask |= other.mValueMask; + } + +}; // tools::build::LeafNode + +//================================================================================================ + +template +template +inline typename util::enable_if::value>::type +LeafNode::signedFloodFill(T outside) +{ + const uint32_t first = *mValueMask.beginOn(); + if (first < SIZE) { + bool xInside = mValues[first] < 0, yInside = xInside, zInside = xInside; + for (uint32_t x = 0; x != DIM; ++x) { + const uint32_t x00 = x << (2 * LOG2DIM); + if (mValueMask.isOn(x00)) + xInside = mValues[x00] < 0; // element(x, 0, 0) + yInside = xInside; + for (uint32_t y = 0; y != DIM; ++y) { + const uint32_t xy0 = x00 + (y << LOG2DIM); + if (mValueMask.isOn(xy0)) + yInside = mValues[xy0] < 0; // element(x, y, 0) + zInside = yInside; + for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { + const uint32_t xyz = xy0 + z; // element(x, y, z) + if (mValueMask.isOn(xyz)) { + zInside = mValues[xyz] < 0; + } else { + mValues[xyz] = zInside ? -outside : outside; + } + } + } + } + } +} // tools::build::LeafNode::signedFloodFill + +// ----------------------------> ValueAccessor <-------------------------------------- + +template +struct ValueAccessor +{ + using ValueType = typename BuildToValueMap::type; + using LeafT = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + + ValueAccessor(RootNodeType& root) + : mRoot(root) + , mKeys{Coord(math::Maximum::value()), Coord(math::Maximum::value()), Coord(math::Maximum::value())} + , mNode{nullptr, nullptr, nullptr} + { + } + ValueAccessor(ValueAccessor&&) = default; // allow move construction + ValueAccessor(const ValueAccessor&) = delete; // disallow copy construction + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + template + bool isCached(const Coord& ijk) const + { + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; + } + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((const LeafT*)mNode[0])->template get(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((const Node1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((const Node2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot.template getAndCache(ijk, *this, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->template set(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return mRoot.template setAndCache(ijk, *this, args...); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafT* setValue(const Coord& ijk, const ValueType& value) {return this->template set>(ijk, value);} + LeafT* setValueOn(const Coord& ijk) {return this->template set>(ijk);} + LeafT& touchLeaf(const Coord& ijk) {return this->template set>(ijk);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->getValueAndCache(ijk, *this); + } + return mRoot.getValueAndCache(ijk, *this); + } + + /// @brief Sets value in a leaf node and returns it. + LeafT* setValue(const Coord& ijk, const ValueType& value) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueAndCache(ijk, value, *this); + } else { + mRoot.setValueAndCache(ijk, value, *this); + } + NANOVDB_ASSERT(this->isCached(ijk)); + return (LeafT*)mNode[0]; + } + void setValueOn(const Coord& ijk) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueOnAndCache(ijk, *this); + } else { + mRoot.setValueOnAndCache(ijk, *this); + } + } + void touchLeaf(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return; + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->touchLeafAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->touchLeafAndCache(ijk, *this); + } else { + mRoot.touchLeafAndCache(ijk, *this); + } + } + bool isActive(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->isActiveAndCache(ijk, *this); + } + return mRoot.isActiveAndCache(ijk, *this); + } +#endif + + bool isValueOn(const Coord& ijk) const { return this->isActive(ijk); } + template + void insert(const Coord& ijk, NodeT* node) const + { + mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; + mNode[NodeT::LEVEL] = node; + } + RootNodeType& mRoot; + mutable Coord mKeys[3]; + mutable void* mNode[3]; +}; // tools::build::ValueAccessor + +// ----------------------------> Tree <-------------------------------------- + +template +struct Tree +{ + using ValueType = typename BuildToValueMap::type; + using Node0 = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + struct WriteAccessor; + + RootNodeType mRoot; + std::mutex mMutex; + + Tree(const ValueType &background) : mRoot(background) {} + Tree(const Tree&) = delete; // disallow copy construction + Tree(Tree&&) = delete; // disallow move construction + Tree& tree() {return *this;} + RootNodeType& root() {return mRoot;} + ValueType getValue(const Coord& ijk) const {return mRoot.getValue(ijk);} + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType &value) {mRoot.setValue(ijk, value);} + std::array nodeCount() const + { + std::array count{0,0,0}; + mRoot.nodeCount(count); + return count; + } + /// @brief regular accessor for thread-safe reading and non-thread-safe writing + ValueAccessor getAccessor() { return ValueAccessor(mRoot); } + /// @brief special accessor for thread-safe writing only + WriteAccessor getWriteAccessor() { return WriteAccessor(mRoot, mMutex); } +};// tools::build::Tree + +// ----------------------------> Tree::WriteAccessor <-------------------------------------- + +template +struct Tree::WriteAccessor +{ + using AccT = ValueAccessor; + using ValueType = typename AccT::ValueType; + using LeafT = typename AccT::LeafT; + using Node1 = typename AccT::Node1; + using Node2 = typename AccT::Node2; + using RootNodeType = typename AccT::RootNodeType; + + WriteAccessor(RootNodeType& parent, std::mutex &mx) + : mParent(parent) + , mRoot(parent.mBackground) + , mAcc(mRoot) + , mMutex(mx) + { + } + WriteAccessor(const WriteAccessor&) = delete; // disallow copy construction + WriteAccessor(WriteAccessor&&) = default; // allow move construction + ~WriteAccessor() { this->merge(); } + void merge() + { + mMutex.lock(); + mParent.merge(mRoot); + mMutex.unlock(); + } + inline void setValueOn(const Coord& ijk) {mAcc.setValueOn(ijk);} + inline void setValue(const Coord& ijk, const ValueType &value) {mAcc.setValue(ijk, value);} + + RootNodeType &mParent, mRoot; + AccT mAcc; + std::mutex &mMutex; +}; // tools::build::Tree::WriteAccessor + +// ----------------------------> Grid <-------------------------------------- + +template +struct Grid : public Tree +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using TreeType = Tree; + using Node0 = LeafNode; + using Node1 = InternalNode; + using Node2 = InternalNode; + using RootNodeType = RootNode; + + GridClass mGridClass; + GridType mGridType; + Map mMap; + std::string mName; + + Grid(const ValueType &background, const std::string &name = "", GridClass gClass = GridClass::Unknown) + : TreeType(background) + , mGridClass(gClass) + , mGridType(toGridType()) + , mName(name) + { + mMap.set(1.0, Vec3d(0.0), 1.0); + } + TreeType& tree() {return *this;} + const GridType& gridType() const { return mGridType; } + const GridClass& gridClass() const { return mGridClass; } + const Map& map() const { return mMap; } + void setTransform(double scale=1.0, const Vec3d &translation = Vec3d(0.0)) {mMap.set(scale, translation, 1.0);} + const std::string& gridName() const { return mName; } + const std::string& getName() const { return mName; } + void setName(const std::string &name) { mName = name; } + /// @brief Sets grids values in domain of the @a bbox to those returned by the specified @a func with the + /// expected signature [](const Coord&)->ValueType. + /// + /// @note If @a func returns a value equal to the background value of the input grid at a + /// specific voxel coordinate, then the active state of that coordinate is off! Else the value + /// value is set and the active state is on. This is done to allow for sparse grids to be generated. + /// + /// @param func Functor used to evaluate the grid values in the @a bbox + /// @param bbox Coordinate bounding-box over which the grid values will be set. + /// @param delta Specifies a lower threshold value for rendering (optional). Typically equals the voxel size + /// for level sets and otherwise it's zero. + template + void operator()(const Func& func, const CoordBBox& bbox, ValueType delta = ValueType(0)); +};// tools::build::Grid + +template +template +void Grid::operator()(const Func& func, const CoordBBox& bbox, ValueType delta) +{ + auto &root = this->tree().root(); +#if __cplusplus >= 201703L + static_assert(util::is_same::type>::value, "GridBuilder: mismatched ValueType"); +#else// invoke_result was introduced in C++17 and result_of was removed in C++20 + static_assert(util::is_same::type>::value, "GridBuilder: mismatched ValueType"); +#endif + const CoordBBox leafBBox(bbox[0] >> Node0::TOTAL, bbox[1] >> Node0::TOTAL); + std::mutex mutex; + util::forEach(leafBBox, [&](const CoordBBox& b) { + Node0* leaf = nullptr; + for (auto it = b.begin(); it; ++it) { + Coord min(*it << Node0::TOTAL), max(min + Coord(Node0::DIM - 1)); + const CoordBBox b(min.maxComponent(bbox.min()), + max.minComponent(bbox.max()));// crop + if (leaf == nullptr) { + leaf = new Node0(b[0], root.mBackground, false); + } else { + leaf->mOrigin = b[0] & ~Node0::MASK; + NANOVDB_ASSERT(leaf->mValueMask.isOff()); + } + leaf->mDstOffset = 0;// no prune + for (auto ijk = b.begin(); ijk; ++ijk) { + const auto v = func(*ijk);// call functor + if (v != root.mBackground) leaf->setValue(*ijk, v);// don't insert background values + } + if (!leaf->mValueMask.isOff()) {// has active values + if (leaf->mValueMask.isOn()) {// only active values + const auto first = leaf->getFirstValue(); + int n=1; + while (n<512) {// 8^3 = 512 + if (leaf->mValues[n++] != first) break; + } + if (n == 512) leaf->mDstOffset = 1;// prune below + } + std::lock_guard guard(mutex); + NANOVDB_ASSERT(leaf != nullptr); + root.addNode(leaf); + NANOVDB_ASSERT(leaf == nullptr); + } + }// loop over sub-part of leafBBox + if (leaf) delete leaf; + }); + + // Prune leaf and tile nodes + for (auto it2 = root.mTable.begin(); it2 != root.mTable.end(); ++it2) { + if (auto *upper = it2->second.child) {//upper level internal node + for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { + auto *lower = upper->mTable[*it1].child;// lower level internal node + for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { + auto *leaf = lower->mTable[*it0].child;// leaf nodes + if (leaf->mDstOffset) { + lower->mTable[*it0].value = leaf->getFirstValue(); + lower->mChildMask.setOff(*it0); + lower->mValueMask.setOn(*it0); + delete leaf; + } + }// loop over leaf nodes + if (lower->mChildMask.isOff()) {//only tiles + const auto first = lower->getFirstValue(); + int n=1; + while (n < 4096) {// 16^3 = 4096 + if (lower->mTable[n++].value != first) break; + } + if (n == 4096) {// identical tile values so prune + upper->mTable[*it1].value = first; + upper->mChildMask.setOff(*it1); + upper->mValueMask.setOn(*it1); + delete lower; + } + } + }// loop over lower internal nodes + if (upper->mChildMask.isOff()) {//only tiles + const auto first = upper->getFirstValue(); + int n=1; + while (n < 32768) {// 32^3 = 32768 + if (upper->mTable[n++].value != first) break; + } + if (n == 32768) {// identical tile values so prune + it2->second.value = first; + it2->second.state = upper->mValueMask.isOn(); + it2->second.child = nullptr; + delete upper; + } + } + }// is child node of the root + }// loop over root table +}// tools::build::Grid::operator() + +//================================================================================================ + +template +using BuildLeaf = LeafNode; +template +using BuildLower = InternalNode>; +template +using BuildUpper = InternalNode>; +template +using BuildRoot = RootNode>; +template +using BuildTile = typename BuildRoot::Tile; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; +using DoubleGrid = Grid; +using Int32Grid = Grid; +using UInt32Grid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using MaskGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using BoolGrid = Grid; + +// ----------------------------> NodeManager <-------------------------------------- + +// GridT can be openvdb::Grid and nanovdb::tools::build::Grid +template +class NodeManager +{ +public: + + using ValueType = typename GridT::ValueType; + using BuildType = typename GridT::BuildType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootNodeType = typename TreeType::RootNodeType; + static_assert(RootNodeType::LEVEL == 3, "NodeManager expected LEVEL=3"); + using Node2 = typename RootNodeType::ChildNodeType; + using Node1 = typename Node2::ChildNodeType; + using Node0 = typename Node1::ChildNodeType; + + NodeManager(GridT &grid) : mGrid(grid) {this->init();} + void init() + { + mArray0.clear(); + mArray1.clear(); + mArray2.clear(); + auto counts = mGrid.tree().nodeCount(); + mArray0.reserve(counts[0]); + mArray1.reserve(counts[1]); + mArray2.reserve(counts[2]); + + for (auto it2 = mGrid.tree().root().cbeginChildOn(); it2; ++it2) { + Node2 &upper = const_cast(*it2); + mArray2.emplace_back(&upper); + for (auto it1 = upper.cbeginChildOn(); it1; ++it1) { + Node1 &lower = const_cast(*it1); + mArray1.emplace_back(&lower); + for (auto it0 = lower.cbeginChildOn(); it0; ++it0) { + Node0 &leaf = const_cast(*it0); + mArray0.emplace_back(&leaf); + }// loop over leaf nodes + }// loop over lower internal nodes + }// loop over root node + } + + /// @brief Return the number of tree nodes at the specified level + /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level + uint64_t nodeCount(int level) const + { + NANOVDB_ASSERT(level==0 || level==1 || level==2); + return level==0 ? mArray0.size() : level==1 ? mArray1.size() : mArray2.size(); + } + + template + typename util::enable_if::type node(int i) {return *mArray0[i];} + template + typename util::enable_if::type node(int i) const {return *mArray0[i];} + template + typename util::enable_if::type node(int i) {return *mArray1[i];} + template + typename util::enable_if::type node(int i) const {return *mArray1[i];} + template + typename util::enable_if::type node(int i) {return *mArray2[i];} + template + typename util::enable_if::type node(int i) const {return *mArray2[i];} + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + const Node0& leaf(uint32_t i) const { return *mArray0[i]; } + Node0& leaf(uint32_t i) { return *mArray0[i]; } + uint64_t leafCount() const {return mArray0.size();} + + /// @brief Return the i'th lower internal node with respect to breadth-first ordering + const Node1& lower(uint32_t i) const { return *mArray1[i]; } + Node1& lower(uint32_t i) { return *mArray1[i]; } + uint64_t lowerCount() const {return mArray1.size();} + + /// @brief Return the i'th upper internal node with respect to breadth-first ordering + const Node2& upper(uint32_t i) const { return *mArray2[i]; } + Node2& upper(uint32_t i) { return *mArray2[i]; } + uint64_t upperCount() const {return mArray2.size();} + + RootNodeType& root() {return mGrid.tree().root();} + const RootNodeType& root() const {return mGrid.tree().root();} + + TreeType& tree() {return mGrid.tree();} + const TreeType& tree() const {return mGrid.tree();} + + GridType& grid() {return mGrid;} + const GridType& grid() const {return mGrid;} + +protected: + + GridT &mGrid; + std::vector mArray0; // leaf nodes + std::vector mArray1; // lower internal nodes + std::vector mArray2; // upper internal nodes + +};// NodeManager + +template +typename util::enable_if::value>::type +sdfToLevelSet(NodeManagerT &mgr) +{ + mgr.grid().mGridClass = GridClass::LevelSet; + // Note that the bottom-up flood filling is essential + const auto outside = mgr.root().mBackground; + util::forEach(0, mgr.leafCount(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.leaf(i).signedFloodFill(outside); + }); + util::forEach(0, mgr.lowerCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.lower(i).signedFloodFill(outside); + }); + util::forEach(0, mgr.upperCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.upper(i).signedFloodFill(outside); + }); + mgr.root().signedFloodFill(outside); +}// sdfToLevelSet + +template +void levelSetToFog(NodeManagerT &mgr, bool rebuild = true) +{ + using ValueType = typename NodeManagerT::ValueType; + mgr.grid().mGridClass = GridClass::FogVolume; + const ValueType d = -mgr.root().mBackground, w = 1.0f / d; + //std::atomic_bool prune{false}; + std::atomic prune{false}; + auto op = [&](ValueType& v) -> bool { + if (v > ValueType(0)) { + v = ValueType(0); + return false; + } + v = v > d ? v * w : ValueType(1); + return true; + }; + util::forEach(0, mgr.leafCount(), 8, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& leaf = mgr.leaf(i); + for (uint32_t i = 0; i < 512u; ++i) leaf.mValueMask.set(i, op(leaf.mValues[i])); + } + }); + util::forEach(0, mgr.lowerCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.lower(i); + for (uint32_t i = 0; i < 4096u; ++i) { + if (node.mChildMask.isOn(i)) { + auto* leaf = node.mTable[i].child; + if (leaf->mValueMask.isOff()) {// prune leaf node + node.mTable[i].value = leaf->getFirstValue(); + node.mChildMask.setOff(i); + delete leaf; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + util::forEach(0, mgr.upperCount(), 1, [&](const util::Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.upper(i); + for (uint32_t i = 0; i < 32768u; ++i) { + if (node.mChildMask.isOn(i)) {// prune lower internal node + auto* child = node.mTable[i].child; + if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + node.mTable[i].value = child->getFirstValue(); + node.mChildMask.setOff(i); + delete child; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + + for (auto it = mgr.root().mTable.begin(); it != mgr.root().mTable.end(); ++it) { + auto* child = it->second.child; + if (child == nullptr) { + it->second.state = op(it->second.value); + } else if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + it->second.value = child->getFirstValue(); + it->second.state = false; + it->second.child = nullptr; + delete child; + prune = true; + } + } + if (rebuild && prune) mgr.init(); +}// levelSetToFog + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +template +struct TouchLeaf { + static BuildLeaf& set(BuildLeaf &leaf, uint32_t) {return leaf;} +};// TouchLeaf + +/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue { + static auto get(const BuildRoot &root) {return root.mBackground;} + static auto get(const BuildTile &tile) {return tile.value;} + static auto get(const BuildUpper &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLower &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLeaf &leaf, uint32_t n) {return leaf.getValue(n);} +};// GetValue + +/// @brief Implements Tree::isActive(Coord) +/// @tparam T Build type of the grid being called +template +struct GetState { + static bool get(const BuildRoot&) {return false;} + static bool get(const BuildTile &tile) {return tile.state;} + static bool get(const BuildUpper &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLower &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLeaf &leaf, uint32_t n) {return leaf.mValueMask.isOn(n);} +};// GetState + +/// @brief Set the value and its state at the leaf level mapped to by ijk, and create the leaf node and branch if needed. +/// @tparam T BuildType of the corresponding tree +template +struct SetValue { + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n) { + leaf.mValueMask.setOn(n);// always set the active bit + return &leaf; + } + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n, const typename BuildLeaf::ValueType &v) { + leaf.setValue(n, v); + return &leaf; + } +};// SetValue + +/// @brief Implements Tree::probeLeaf(Coord) +/// @tparam T Build type of the grid being called +template +struct ProbeValue { + using ValueT = typename BuildLeaf::ValueType; + static bool get(const BuildRoot &root, ValueT &v) { + v = root.mBackground; + return false; + } + static bool get(const BuildTile &tile, ValueT &v) { + v = tile.value; + return tile.state; + } + static bool get(const BuildUpper &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLower &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLeaf &leaf, uint32_t n, ValueT &v) { + v = leaf.getValue(n); + return leaf.isActive(n); + } +};// ProbeValue + +} // namespace tools::build + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_BUILD_GRIDBUILDER_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/GridChecksum.h b/external/nanovdb/tools/GridChecksum.h new file mode 100644 index 00000000..62323c30 --- /dev/null +++ b/external/nanovdb/tools/GridChecksum.h @@ -0,0 +1,427 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/GridChecksum.h + + \author Ken Museth + + \brief Computes a pair of uint32_t checksums, of a Grid, by means of 32 bit Cyclic Redundancy Check (CRC32) + + \details A CRC32 is the 32 bit remainder, or residue, of binary division of a message, by a polynomial. + + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] + checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> + checksum[2] after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> +*/ + +#ifndef NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED + +#include // for std::generate +#include +#include +#include +#include // offsetof macro +#include +#include +#include // for std::unique_ptr + +#include +#include +#include + +// Define log of block size for FULL CRC32 computation. +// A value of 12 corresponds to a block size of 4KB (2^12 = 4096). +#define NANOVDB_CRC32_LOG2_BLOCK_SIZE 12 + +namespace nanovdb {// ================================================================== + +namespace tools {// ==================================================================== + +/// @brief Compute the (2 x CRC32) checksum of the specified @c gridData +/// @param gridData Base pointer to the grid from which the checksum is computed. +/// @param mode Defines the mode of computation for the checksum. +/// @return Return the (2 x CRC32) checksum of the specified @c gridData +Checksum evalChecksum(const GridData *gridData, CheckMode mode = CheckMode::Default); + +/// @brief Extract the checksum of a grid +/// @param gridData Base pointer to grid with a checksum +/// @return Checksum encoded in the specified grid +inline Checksum getChecksum(const GridData *gridData) +{ + NANOVDB_ASSERT(gridData); + return gridData->mChecksum; +} + +/// @brief Return true if the checksum of @c gridData matches the expected +/// value already encoded into the grid's meta data. +/// @tparam BuildT Template parameter used to build NanoVDB grid. +/// @param grid Grid whose checksum is validated. +/// @param mode Defines the mode of computation for the checksum. +bool validateChecksum(const GridData *gridData, CheckMode mode = CheckMode::Default); + +/// @brief Updates the checksum of a grid +/// @param grid Grid whose checksum will be updated. +/// @param mode Defines the mode of computation for the checksum. +inline void updateChecksum(GridData *gridData, CheckMode mode) +{ + NANOVDB_ASSERT(gridData); + gridData->mChecksum = evalChecksum(gridData, mode); +} + +/// @brief Updates the checksum of a grid by preserving its mode +/// @param gridData Base pointer to grid +inline void updateChecksum(GridData *gridData) +{ + updateChecksum(gridData, gridData->mChecksum.mode()); +} + +}// namespace tools + +namespace util { + +/// @brief Initiate single entry in look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +/// @param n entry in table (assumed n < 256) +inline __hostdev__ void initCrc32Lut(uint32_t lut[256], uint32_t n) +{ + lut[n] = n; + uint32_t &cs = lut[n]; + for (int i = 0; i < 8; ++i) cs = (cs >> 1) ^ ((cs & 1) ? 0xEDB88320 : 0); +} + +/// @brief Initiate entire look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +inline __hostdev__ void initCrc32Lut(uint32_t lut[256]){for (uint32_t n = 0u; n < 256u; ++n) initCrc32Lut(lut, n);} + +/// @brief Create and initiate entire look-up-table for CRC32 computations +/// @return returns a unique pointer to the lookup table of size 256. +inline std::unique_ptr createCrc32Lut() +{ + std::unique_ptr lut(new uint32_t[256]); + initCrc32Lut(lut.get()); + return lut; +} + +/// @brief Compute crc32 checksum of @c data of @c size bytes (without a lookup table)) +/// @param data pointer to beginning of data +/// @param size byte size of data +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum of @c data +inline __hostdev__ uint32_t crc32(const void* data, size_t size, uint32_t crc = 0) +{ + NANOVDB_ASSERT(data); + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) { + crc ^= *p; + for (int j = 0; j < 8; ++j) crc = (crc >> 1) ^ (0xEDB88320 & (-(crc & 1))); + } + return ~crc; +} + +/// @brief Compute crc32 checksum of data between @c begin and @c end +/// @param begin points to beginning of data +/// @param end points to end of @data, (exclusive) +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum +inline __hostdev__ uint32_t crc32(const void *begin, const void *end, uint32_t crc = 0) +{ + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return crc32(begin, (const char*)end - (const char*)begin, crc); +} + +/// @brief Compute crc32 checksum of @c data with @c size bytes using a lookup table +/// @param data pointer to begenning of data +/// @param size byte size +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of the checksum +/// @return crc32 checksum of @c data with @c size bytes +inline __hostdev__ uint32_t crc32(const void *data, size_t size, const uint32_t lut[256], uint32_t crc = 0) +{ + NANOVDB_ASSERT(data); + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) crc = lut[(crc ^ *p) & 0xFF] ^ (crc >> 8); + return ~crc; +} + +/// @brief Compute crc32 checksum of data between @c begin and @c end using a lookup table +/// @param begin points to beginning of data +/// @param end points to end of @data, (exclusive) +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum +inline __hostdev__ uint32_t crc32(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) +{ + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return crc32(begin, (const char*)end - (const char*)begin, lut, crc); +}// uint32_t util::crc32(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) + +/// @brief +/// @param data +/// @param size +/// @param lut +/// @return +inline uint32_t blockedCrc32(const void *data, size_t size, const uint32_t *lut) +{ + if (size == 0 ) return ~uint32_t(0); + const uint64_t blockCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// number of 4 KB (4096 byte) blocks + std::unique_ptr checksums(new uint32_t[blockCount]); + forEach(0, blockCount, 64, [&](const Range1D &r) { + uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE, *p = checksums.get() + r.begin(); + for (auto i = r.begin(); i != r.end(); ++i) { + if (i+1 == blockCount) blockSize += static_cast(size - (blockCount<...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] +// checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> +// checksum[]2 after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> + +// ----------------------------> crc32Head <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +inline __hostdev__ uint32_t crc32Head(const GridData *gridData, const uint32_t *lut) +{ + NANOVDB_ASSERT(gridData); + const uint8_t *begin = (const uint8_t*)(gridData), *mid = begin + sizeof(GridData) + sizeof(TreeData); + if (gridData->mVersion <= Version(32,6,0)) mid = (const uint8_t*)(gridData->template nodePtr<2>()); + return util::crc32(begin + 16u, mid, lut);// exclude GridData::mMagic and GridData::mChecksum +}// uint32_t crc32Head(const GridData *gridData, const uint32_t *lut) + +/// @brief +/// @param gridData +/// @return +inline __hostdev__ uint32_t crc32Head(const GridData *gridData) +{ + NANOVDB_ASSERT(gridData); + const uint8_t *begin = (const uint8_t*)(gridData), *mid = begin + sizeof(GridData) + sizeof(TreeData); + if (gridData->mVersion <= Version(32,6,0)) mid = (const uint8_t*)(gridData->template nodePtr<2>()); + return util::crc32(begin + 16, mid);// exclude GridData::mMagic and GridData::mChecksum +}// uint32_t crc32Head(const GridData *gridData) + +// ----------------------------> crc32TailOld <-------------------------------------- + +// Old checksum +template +uint32_t crc32TailOld(const NanoGrid *grid, const uint32_t *lut) +{ + NANOVDB_ASSERT(grid->mVersion <= Version(32,6,0)); + const auto &tree = grid->tree(); + auto nodeMgrHandle = createNodeManager(*grid); + auto *nodeMgr = nodeMgrHandle.template mgr(); + assert(nodeMgr && isAligned(nodeMgr)); + const auto nodeCount = tree.nodeCount(0) + tree.nodeCount(1) + tree.nodeCount(2); + std::vector checksums(nodeCount, 0); + util::forEach(0, tree.nodeCount(2), 1,[&](const util::Range1D &r) {// process upper internal nodes + uint32_t *p = checksums.data() + r.begin(); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &node = nodeMgr->upper(static_cast(i)); + *p++ = util::crc32(&node, node.memUsage(), lut); + } + }); + util::forEach(0, tree.nodeCount(1), 1, [&](const util::Range1D &r) { // process lower internal nodes + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(2); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &node = nodeMgr->lower(static_cast(i)); + *p++ = util::crc32(&node, node.memUsage(), lut); + } + }); + util::forEach(0, tree.nodeCount(0), 8, [&](const util::Range1D &r) { // process leaf nodes + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(1) + tree.nodeCount(2); + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &leaf = nodeMgr->leaf(static_cast(i)); + *p++ = util::crc32(&leaf, leaf.memUsage(), lut); + } + }); + return util::crc32(checksums.data(), sizeof(uint32_t)*checksums.size(), lut); +}// uint32_t crc32TailOld(const NanoGrid *grid, const uint32_t *lut) + +struct Crc32TailOld { + template + static uint32_t known(const GridData *gridData, const uint32_t *lut) + { + return crc32TailOld((const NanoGrid*)gridData, lut); + } + static uint32_t unknown(const GridData*, const uint32_t*) + { + throw std::runtime_error("Cannot call Crc32TailOld with grid of unknown type"); + return 0u;//dummy + } +};// struct Crc32TailOld + +inline uint32_t crc32Tail(const GridData *gridData, const uint32_t *lut) +{ + NANOVDB_ASSERT(gridData); + if (gridData->mVersion > Version(32,6,0)) { + const uint8_t *begin = (const uint8_t*)(gridData); + return util::blockedCrc32(begin + sizeof(GridData) + sizeof(TreeData), begin + gridData->mGridSize, lut); + } else { + return callNanoGrid(gridData, lut); + } +}// uint32_t crc32Tail(const GridData *gridData, const uint32_t *lut) + +template +uint32_t crc32Tail(const NanoGrid *grid, const uint32_t *lut) +{ + NANOVDB_ASSERT(grid); + if (grid->mVersion > Version(32,6,0)) { + const uint8_t *begin = (const uint8_t*)(grid); + return util::blockedCrc32(begin + sizeof(GridData) + sizeof(TreeData), begin + grid->mGridSize, lut); + } else { + return crc32TailOld(grid, lut); + } +}// uint32_t crc32Tail(const NanoGrid *gridData, const uint32_t *lut) + +// ----------------------------> evalChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +template +Checksum evalChecksum(const NanoGrid *grid, CheckMode mode) +{ + NANOVDB_ASSERT(grid); + Checksum cs; + if (mode != CheckMode::Empty) { + auto lut = util::createCrc32Lut(); + cs.head() = crc32Head(grid, lut.get()); + if (mode == CheckMode::Full) cs.tail() = crc32Tail(grid, lut.get()); + } + return cs; +}// checksum(const NanoGrid*, CheckMode) + +template +[[deprecated("Use evalChecksum(const NanoGrid *grid, CheckMode mode) instead")]] +Checksum checksum(const NanoGrid *grid, CheckMode mode){return evalChecksum(grid, mode);} + +inline Checksum evalChecksum(const GridData *gridData, CheckMode mode) +{ + NANOVDB_ASSERT(gridData); + Checksum cs; + if (mode != CheckMode::Disable) { + auto lut = util::createCrc32Lut(); + cs.head() = crc32Head(gridData, lut.get()); + if (mode == CheckMode::Full) cs.tail() = crc32Tail(gridData, lut.get()); + } + return cs; +}// evalChecksum(GridData *data, CheckMode mode) + +[[deprecated("Use evalChecksum(const NanoGrid*, CheckMode) instead")]] +inline Checksum checksum(const GridData *gridData, CheckMode mode){return evalChecksum(gridData, mode);} + +template +[[deprecated("Use checksum(const NanoGrid*, CheckMode) instead")]] +Checksum checksum(const NanoGrid &grid, CheckMode mode){return checksum(&grid, mode);} + +// ----------------------------> validateChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +template +bool validateChecksum(const NanoGrid *grid, CheckMode mode) +{ + if (grid->mChecksum.isEmpty() || mode == CheckMode::Empty) return true; + auto lut = util::createCrc32Lut(); + bool checkHead = grid->mChecksum.head() == crc32Head(grid->data(), lut.get()); + if (grid->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) { + return checkHead; + } else { + return grid->mChecksum.tail() == crc32Tail(grid, lut.get()); + } +} + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +/// @return +inline bool validateChecksum(const GridData *gridData, CheckMode mode) +{ + if (gridData->mChecksum.isEmpty()|| mode == CheckMode::Empty) return true; + auto lut = util::createCrc32Lut(); + bool checkHead = gridData->mChecksum.head() == crc32Head(gridData, lut.get()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) { + return checkHead; + } else { + return gridData->mChecksum.tail() == crc32Tail(gridData, lut.get()); + } +}// bool validateChecksum(const GridData *gridData, CheckMode mode) + +template +[[deprecated("Use validateChecksum(const NanoGrid*, CheckMode) instead")]] +bool validateChecksum(const NanoGrid &grid, CheckMode mode){return validateChecksum(&grid, mode);} + +// ----------------------------> updateChecksum <-------------------------------------- + +/// @brief +/// @tparam ValueT +/// @param grid +/// @param mode +template +void updateChecksum(NanoGrid *grid, CheckMode mode){grid->mChecksum = evalChecksum(grid, mode);} + +template +void updateChecksum(NanoGrid *grid){grid->mChecksum = evalChecksum(grid, grid->mChecksum.mode());} + +// deprecated method that takes a reference vs a pointer +template +[[deprecated("Use updateChecksum(const NanoGrid*, CheckMode) instead")]] +void updateChecksum(NanoGrid &grid, CheckMode mode){updateChecksum(&grid, mode);} + +// ----------------------------> updateGridCount <-------------------------------------- + +/// @brief Updates the ground index and count, as well as the head checksum if needed +/// @param data Pointer to grid data +/// @param gridIndex New value of the index +/// @param gridCount New value of the grid count +inline void updateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount) +{ + NANOVDB_ASSERT(data && gridIndex < gridCount); + if (data->mGridIndex != gridIndex || data->mGridCount != gridCount) { + data->mGridIndex = gridIndex; + data->mGridCount = gridCount; + if (!data->mChecksum.isEmpty()) data->mChecksum.head() = crc32Head(data); + } +} + +} // namespace tools ====================================================================== + + +} // namespace nanovdb ==================================================================== + +#endif // NANOVDB_TOOLS_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/GridStats.h b/external/nanovdb/tools/GridStats.h new file mode 100644 index 00000000..fac54b20 --- /dev/null +++ b/external/nanovdb/tools/GridStats.h @@ -0,0 +1,877 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/GridStats.h + + \author Ken Museth + + \date August 29, 2020 + + \brief Re-computes min/max/avg/var/bbox information for each node in a + pre-existing NanoVDB grid. +*/ + +#ifndef NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED + +#include + +#ifdef NANOVDB_USE_TBB +#include +#include +#endif + +#if defined(__CUDACC__) +#include // for cuda::std::numeric_limits +#else +#include // for std::numeric_limits +#endif + +#include +#include + +namespace nanovdb { + +namespace tools {//======================================================================= + +/// @brief Grid flags which indicate what extra information is present in the grid buffer +enum class StatsMode : uint32_t { + Disable = 0,// disable the computation of any type of statistics (obviously the FASTEST!) + BBox = 1,// only compute the bbox of active values per node and total activeVoxelCount + MinMax = 2,// additionally compute extrema values + All = 3,// compute all of the statics, i.e. bbox, min/max, average and standard deviation + Default = 3,// default computational mode for statistics + End = 4, +}; + +/// @brief Re-computes the min/max, stats and bbox information for an existing NanoVDB Grid +/// @param grid Grid whose stats to update +/// @param mode Mode of computation for the statistics. +template +void updateGridStats(NanoGrid* grid, StatsMode mode = StatsMode::Default); + +template::Rank> +class Extrema; + +/// @brief Determine the extrema of all the values in a grid that +/// intersects the specified bounding box. +/// @tparam BuildT Build type of the input grid +/// @param grid typed grid +/// @param bbox index bounding box in which min/max are computed +/// @return Extream of values insixe @c bbox +template +Extrema::ValueType> +getExtrema(const NanoGrid& grid, const CoordBBox &bbox); + +//================================================================================================ + +/// @brief Template specialization of Extrema on scalar value types, i.e. rank = 0 +template +class Extrema +{ +protected: + ValueT mMin, mMax; + +public: + using ValueType = ValueT; + __hostdev__ Extrema() +#if defined(__CUDACC__) + // note "::cuda" is needed since we also define a cuda namespace + : mMin(::cuda::std::numeric_limits::max()) + , mMax(::cuda::std::numeric_limits::lowest()) +#else + : mMin(std::numeric_limits::max()) + , mMax(std::numeric_limits::lowest()) +#endif + { + } + __hostdev__ Extrema(const ValueT& v) + : mMin(v) + , mMax(v) + { + } + __hostdev__ Extrema(const ValueT& a, const ValueT& b) + : mMin(a) + , mMax(b) + { + } + __hostdev__ Extrema& min(const ValueT& v) + { + if (v < mMin) mMin = v; + return *this; + } + __hostdev__ Extrema& max(const ValueT& v) + { + if (v > mMax) mMax = v; + return *this; + } + __hostdev__ Extrema& add(const ValueT& v) + { + this->min(v); + this->max(v); + return *this; + } + __hostdev__ Extrema& add(const ValueT& v, uint64_t) { return this->add(v); } + __hostdev__ Extrema& add(const Extrema& other) + { + this->min(other.mMin); + this->max(other.mMax); + return *this; + } + __hostdev__ const ValueT& min() const { return mMin; } + __hostdev__ const ValueT& max() const { return mMax; } + __hostdev__ operator bool() const { return mMin <= mMax; } + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + } +}; // Extrema + +/// @brief Template specialization of Extrema on vector value types, i.e. rank = 1 +template +class Extrema +{ +protected: + using Real = typename VecT::ValueType; // this works with both nanovdb and openvdb vectors + struct Pair + { + Real scalar; + VecT vector; + + __hostdev__ Pair(Real s)// is only used by Extrema() default c-tor + : scalar(s) + , vector(s) + { + } + __hostdev__ Pair(const VecT& v) + : scalar(v.lengthSqr()) + , vector(v) + { + } + __hostdev__ bool operator<(const Pair& rhs) const { return scalar < rhs.scalar; } + } mMin, mMax; + __hostdev__ Extrema& add(const Pair& p) + { + if (p < mMin) mMin = p; + if (mMax < p) mMax = p; + return *this; + } + +public: + using ValueType = VecT; + __hostdev__ Extrema() +#if defined(__CUDACC__) + // note "::cuda" is needed since we also define a cuda namespace + : mMin(::cuda::std::numeric_limits::max()) + , mMax(::cuda::std::numeric_limits::lowest()) +#else + : mMin(std::numeric_limits::max()) + , mMax(std::numeric_limits::lowest()) +#endif + { + } + __hostdev__ Extrema(const VecT& v) + : mMin(v) + , mMax(v) + { + } + __hostdev__ Extrema(const VecT& a, const VecT& b) + : mMin(a) + , mMax(b) + { + } + __hostdev__ Extrema& min(const VecT& v) + { + Pair tmp(v); + if (tmp < mMin) mMin = tmp; + return *this; + } + __hostdev__ Extrema& max(const VecT& v) + { + Pair tmp(v); + if (mMax < tmp) mMax = tmp; + return *this; + } + __hostdev__ Extrema& add(const VecT& v) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const Extrema& other) + { + if (other.mMin < mMin) mMin = other.mMin; + if (mMax < other.mMax) mMax = other.mMax; + return *this; + } + __hostdev__ const VecT& min() const { return mMin.vector; } + __hostdev__ const VecT& max() const { return mMax.vector; } + __hostdev__ operator bool() const { return !(mMax < mMin); } + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + } +}; // Extrema + +//================================================================================================ + +template::Rank> +class Stats; + +/// @brief This class computes statistics (minimum value, maximum +/// value, mean, variance and standard deviation) of a population +/// of floating-point values. +/// +/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, +/// standard deviation = sqrt(variance) +/// +/// @note This class employs incremental computation and double precision. +template +class Stats : public Extrema +{ +protected: + using BaseT = Extrema; + using RealT = double; // for accuracy the internal precission must be 64 bit floats + size_t mSize; + double mAvg, mAux; + +public: + using ValueType = ValueT; + __hostdev__ Stats() + : BaseT() + , mSize(0) + , mAvg(0.0) + , mAux(0.0) + { + } + __hostdev__ Stats(const ValueT& val) + : BaseT(val) + , mSize(1) + , mAvg(RealT(val)) + , mAux(0.0) + { + } + /// @brief Add a single sample + __hostdev__ Stats& add(const ValueT& val) + { + BaseT::add(val); + mSize += 1; + const double delta = double(val) - mAvg; + mAvg += delta / double(mSize); + mAux += delta * (double(val) - mAvg); + return *this; + } + /// @brief Add @a n samples with constant value @a val. + __hostdev__ Stats& add(const ValueT& val, uint64_t n) + { + const double denom = 1.0 / double(mSize + n); + const double delta = double(val) - mAvg; + mAvg += denom * delta * double(n); + mAux += denom * delta * delta * double(mSize) * double(n); + BaseT::add(val); + mSize += n; + return *this; + } + + /// Add the samples from the other Stats instance. + __hostdev__ Stats& add(const Stats& other) + { + if (other.mSize > 0) { + const double denom = 1.0 / double(mSize + other.mSize); + const double delta = other.mAvg - mAvg; + mAvg += denom * delta * double(other.mSize); + mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); + BaseT::add(other); + mSize += other.mSize; + } + return *this; + } + + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + + __hostdev__ size_t size() const { return mSize; } + + //@{ + /// Return the arithmetic mean, i.e. average, value. + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } + //@} + + //@{ + /// @brief Return the population variance. + /// + /// @note The unbiased sample variance = population variance * num/(num-1) + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } + //@} + + //@{ + /// @brief Return the standard deviation (=Sqrt(variance)) as + /// defined from the (biased) population variance. + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } + //@} + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + node.setAvg(this->avg()); + node.setDev(this->std()); + } +}; // end Stats + +/// @brief This class computes statistics (minimum value, maximum +/// value, mean, variance and standard deviation) of a population +/// of floating-point values. +/// +/// @details variance = Mean[ (X-Mean[X])^2 ] = Mean[X^2] - Mean[X]^2, +/// standard deviation = sqrt(variance) +/// +/// @note This class employs incremental computation and double precision. +template +class Stats : public Extrema +{ +protected: + using BaseT = Extrema; + using RealT = double; // for accuracy the internal precision must be 64 bit floats + size_t mSize; + double mAvg, mAux; + +public: + using ValueType = ValueT; + __hostdev__ Stats() + : BaseT() + , mSize(0) + , mAvg(0.0) + , mAux(0.0) + { + } + /// @brief Add a single sample + __hostdev__ Stats& add(const ValueT& val) + { + typename BaseT::Pair tmp(val); + BaseT::add(tmp); + mSize += 1; + const double delta = tmp.scalar - mAvg; + mAvg += delta / double(mSize); + mAux += delta * (tmp.scalar - mAvg); + return *this; + } + /// @brief Add @a n samples with constant value @a val. + __hostdev__ Stats& add(const ValueT& val, uint64_t n) + { + typename BaseT::Pair tmp(val); + const double denom = 1.0 / double(mSize + n); + const double delta = tmp.scalar - mAvg; + mAvg += denom * delta * double(n); + mAux += denom * delta * delta * double(mSize) * double(n); + BaseT::add(tmp); + mSize += n; + return *this; + } + + /// Add the samples from the other Stats instance. + __hostdev__ Stats& add(const Stats& other) + { + if (other.mSize > 0) { + const double denom = 1.0 / double(mSize + other.mSize); + const double delta = other.mAvg - mAvg; + mAvg += denom * delta * double(other.mSize); + mAux += other.mAux + denom * delta * delta * double(mSize) * double(other.mSize); + BaseT::add(other); + mSize += other.mSize; + } + return *this; + } + + __hostdev__ static constexpr bool hasMinMax() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !util::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !util::is_same::value; } + + __hostdev__ size_t size() const { return mSize; } + + //@{ + /// Return the arithmetic mean, i.e. average, value. + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } + //@} + + //@{ + /// @brief Return the population variance. + /// + /// @note The unbiased sample variance = population variance * num/(num-1) + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } + //@} + + //@{ + /// @brief Return the standard deviation (=Sqrt(variance)) as + /// defined from the (biased) population variance. + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } + //@} + + template + __hostdev__ void setStats(NodeT &node) const + { + node.setMin(this->min()); + node.setMax(this->max()); + node.setAvg(this->avg()); + node.setDev(this->std()); + } +}; // end Stats + +/// @brief No-op Stats class +template +struct NoopStats +{ + using ValueType = ValueT; + __hostdev__ NoopStats() {} + __hostdev__ NoopStats(const ValueT&) {} + __hostdev__ NoopStats& add(const ValueT&) { return *this; } + __hostdev__ NoopStats& add(const ValueT&, uint64_t) { return *this; } + __hostdev__ NoopStats& add(const NoopStats&) { return *this; } + __hostdev__ static constexpr size_t size() { return 0; } + __hostdev__ static constexpr bool hasMinMax() { return false; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return false; } + template + __hostdev__ void setStats(NodeT&) const{} +}; // end NoopStats + +//================================================================================================ + +/// @brief Allows for the construction of NanoVDB grids without any dependency +template> +class GridStats +{ + struct NodeStats; + using TreeT = typename GridT::TreeType; + using ValueT = typename TreeT::ValueType; + using BuildT = typename TreeT::BuildType; + using Node0 = typename TreeT::Node0; // leaf + using Node1 = typename TreeT::Node1; // lower + using Node2 = typename TreeT::Node2; // upper + using RootT = typename TreeT::Node3; // root + static_assert(util::is_same::value, "Mismatching type"); + + ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta + + void process( GridT& );// process grid and all tree nodes + void process( TreeT& );// process Tree, root node and child nodes + void process( RootT& );// process root node and child nodes + NodeStats process( Node0& );// process leaf node + + template + NodeStats process( NodeT& );// process internal node and child nodes + + template + void setStats(DataT*, const Extrema&); + template + void setStats(DataT*, const Stats&); + template + void setStats(DataT*, const NoopStats&) {} + + template + typename std::enable_if::value>::type + setFlag(const T&, const T&, FlagT& flag) const { flag &= ~FlagT(1); } // unset 1st bit to enable rendering + + template + typename std::enable_if::value>::type + setFlag(const T& min, const T& max, FlagT& flag) const; + +public: + GridStats() = default; + + void update(GridT& grid, ValueT delta = ValueT(0)); + +}; // GridStats + +template +struct GridStats::NodeStats +{ + StatsT stats; + CoordBBox bbox; + + NodeStats(): stats(), bbox() {}//activeCount(0), bbox() {}; + + NodeStats& add(const NodeStats &other) + { + stats.add( other.stats );// no-op for NoopStats?! + bbox[0].minComponent(other.bbox[0]); + bbox[1].maxComponent(other.bbox[1]); + return *this; + } +};// GridStats::NodeStats + +//================================================================================================ + +template +void GridStats::update(GridT& grid, ValueT delta) +{ + mDelta = delta; // delta = voxel size for level sets, else 0 + this->process( grid ); +} + +//================================================================================================ + +template +template +inline void GridStats:: + setStats(DataT* data, const Extrema& e) +{ + data->setMin(e.min()); + data->setMax(e.max()); +} + +template +template +inline void GridStats:: + setStats(DataT* data, const Stats& s) +{ + data->setMin(s.min()); + data->setMax(s.max()); + data->setAvg(s.avg()); + data->setDev(s.std()); +} + +//================================================================================================ + +template +template +inline typename std::enable_if::value>::type +GridStats:: + setFlag(const T& min, const T& max, FlagT& flag) const +{ + if (mDelta > 0 && (min > mDelta || max < -mDelta)) {// LS: min > dx || max < -dx + flag |= FlagT(1u);// set 1st bit to disable rendering + } else { + flag &= ~FlagT(1u);// unset 1st bit to enable rendering + } +} + +//================================================================================================ + +template +void GridStats::process( GridT &grid ) +{ + this->process( grid.tree() );// this processes tree, root and all nodes + + // set world space AABB + auto& data = *grid.data(); + const auto& indexBBox = grid.tree().root().bbox(); + if (indexBBox.empty()) { + data.mWorldBBox = Vec3dBBox(); + data.setBBoxOn(false); + } else { + // Note that below max is offset by one since CoordBBox.max is inclusive + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and Vec3dBBox. This also guarantees that a grid with a single + // active voxel, does not have an empty world bbox! E.g. if a grid with a + // unit index-to-world transformation only contains the active voxel (0,0,0) + // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) + // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions + // of index and world bounding boxes inherited from OpenVDB! + grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); + grid.setBBoxOn(true); + } + + // set bit flags + data.setMinMaxOn(StatsT::hasMinMax()); + data.setAverageOn(StatsT::hasAverage()); + data.setStdDeviationOn(StatsT::hasStdDeviation()); +} // GridStats::process( Grid ) + +//================================================================================================ + +template +inline void GridStats::process( typename GridT::TreeType &tree ) +{ + this->process( tree.root() ); +} + +//================================================================================================ + +template +void GridStats::process(RootT &root) +{ + using ChildT = Node2; + auto &data = *root.data(); + if (data.mTableSize == 0) { // empty root node + data.mMinimum = data.mMaximum = data.mBackground; + data.mAverage = data.mStdDevi = 0; + data.mBBox = CoordBBox(); + } else { + NodeStats total; + for (uint32_t i = 0; i < data.mTableSize; ++i) { + auto* tile = data.tile(i); + if (tile->isChild()) { // process child node + total.add( this->process( *data.getChild(tile) ) ); + } else if (tile->state) { // active tile + const Coord ijk = tile->origin(); + total.bbox[0].minComponent(ijk); + total.bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if (StatsT::hasStats()) { // resolved at compile time + total.stats.add(tile->value, ChildT::NUM_VALUES); + } + } + } + this->setStats(&data, total.stats); + if (total.bbox.empty()) { + std::cerr << "\nWarning in GridStats: input tree only contained inactive root tiles!" + << "\nWhile not strictly an error it's rather suspicious!\n"; + } + data.mBBox = total.bbox; + } +} // GridStats::process( RootNode ) + +//================================================================================================ + +template +template +typename GridStats::NodeStats +GridStats::process(NodeT &node) +{ + static_assert(util::is_same::value || util::is_same::value, "Incorrect node type"); + using ChildT = typename NodeT::ChildNodeType; + + NodeStats total; + auto* data = node.data(); + + // Serial processing of active tiles + if (const auto tileCount = data->mValueMask.countOn()) { + //total.activeCount = tileCount * ChildT::NUM_VALUES; // active tiles + for (auto it = data->mValueMask.beginOn(); it; ++it) { + if (StatsT::hasStats()) { // resolved at compile time + total.stats.add( data->mTable[*it].value, ChildT::NUM_VALUES ); + } + const Coord ijk = node.offsetToGlobalCoord(*it); + total.bbox[0].minComponent(ijk); + total.bbox[1].maxComponent(ijk + Coord(int32_t(ChildT::DIM) - 1)); + } + } + + // Serial or parallel processing of child nodes + if (const size_t childCount = data->mChildMask.countOn()) { +#ifndef NANOVDB_USE_TBB + for (auto it = data->mChildMask.beginOn(); it; ++it) { + total.add( this->process( *data->getChild(*it) ) ); + } +#else + std::unique_ptr childNodes(new ChildT*[childCount]); + ChildT **ptr = childNodes.get(); + for (auto it = data->mChildMask.beginOn(); it; ++it) { + *ptr++ = data->getChild( *it ); + } + using RangeT = tbb::blocked_range; + total.add( tbb::parallel_reduce(RangeT(0, childCount), NodeStats(), + [&](const RangeT &r, NodeStats local)->NodeStats { + for(size_t i=r.begin(); i!=r.end(); ++i){ + local.add( this->process( *childNodes[i] ) ); + } + return local;}, + [](NodeStats a, const NodeStats &b)->NodeStats { return a.add( b ); } + )); +#endif + } + + data->mBBox = total.bbox; + if (total.bbox.empty()) { + data->mFlags |= uint32_t(1); // set 1st bit on to disable rendering of node + data->mFlags &= ~uint32_t(2); // set 2nd bit off since node does not contain active values + } else { + data->mFlags |= uint32_t(2); // set 2nd bit on since node contains active values + if (StatsT::hasStats()) { // resolved at compile time + this->setStats(data, total.stats); + this->setFlag(data->mMinimum, data->mMaximum, data->mFlags); + } + } + return total; +} // GridStats::process( InternalNode ) + +//================================================================================================ + +template +typename GridStats::NodeStats +GridStats::process(Node0 &leaf) +{ + NodeStats local; + if (leaf.updateBBox()) {// optionally update active bounding box (updates data->mFlags) + local.bbox[0] = local.bbox[1] = leaf.mBBoxMin; + local.bbox[1] += Coord(leaf.mBBoxDif[0], leaf.mBBoxDif[1], leaf.mBBoxDif[2]); + if (StatsT::hasStats()) {// resolved at compile time + for (auto it = leaf.cbeginValueOn(); it; ++it) local.stats.add(*it); + this->setStats(&leaf, local.stats); + this->setFlag(leaf.getMin(), leaf.getMax(), leaf.mFlags); + } + } + return local; +} // GridStats::process( LeafNode ) + +//================================================================================================ + +template +void updateGridStats(NanoGrid* grid, StatsMode mode) +{ + NANOVDB_ASSERT(grid); + using GridT = NanoGrid; + using ValueT = typename GridT::ValueType; + if (mode == StatsMode::Disable) { + return; + } else if (mode == StatsMode::BBox || util::is_same::value) { + GridStats > stats; + stats.update(*grid); + } else if (mode == StatsMode::MinMax) { + GridStats > stats; + stats.update(*grid); + } else if (mode == StatsMode::All) { + GridStats > stats; + stats.update(*grid); + } else { + throw std::runtime_error("gridStats: Unsupported statistics mode."); + } +}// updateGridStats + +template +[[deprecated("Use nanovdb::tools::updateGridStats(NanoGrid*, StatsMode) instead")]] +void gridStats(NanoGrid& grid, StatsMode mode = StatsMode::Default) +{ + updateGridStats(&grid, mode); +} + +//================================================================================================ + +namespace { + +// returns a bitmask (of size 32^3 or 16^3) that marks all the entries +// in a node table that intersects with the specified bounding box. +template +Mask getBBoxMask(const CoordBBox &bbox, const NodeT* node) +{ + Mask mask;// typically 32^3 or 16^3 bit mask + auto b = CoordBBox::createCube(node->origin(), node->dim()); + assert( bbox.hasOverlap(b) ); + if ( bbox.isInside(b) ) { + mask.setOn();//node is completely inside the bbox so early out + } else { + b.intersect(bbox);// trim bounding box + // transform bounding box from global to local coordinates + b.min() &= NodeT::DIM-1u; + b.min() >>= NodeT::ChildNodeType::TOTAL; + b.max() &= NodeT::DIM-1u; + b.max() >>= NodeT::ChildNodeType::TOTAL; + assert( !b.empty() ); + auto it = b.begin();// iterates over all the child nodes or tiles that intersects bbox + for (const Coord& ijk = *it; it; ++it) { + mask.setOn(ijk[2] + (ijk[1] << NodeT::LOG2DIM) + (ijk[0] << 2*NodeT::LOG2DIM)); + } + } + return mask; +}// getBBoxMask + +}// end of unnamed namespace + +/// @brief return the extrema of all the values in a grid that +/// intersects the specified bounding box. +template +Extrema::ValueType> +getExtrema(const NanoGrid& grid, const CoordBBox &bbox) +{ + using GridT = NanoGrid; + using ValueT = typename GridT::ValueType; + using TreeT = typename GridTree::type; + using RootT = typename NodeTrait::type;// root node + using Node2 = typename NodeTrait::type;// upper internal node + using Node1 = typename NodeTrait::type;// lower internal node + using Node0 = typename NodeTrait::type;// leaf node + + Extrema extrema; + const RootT &root = grid.tree().root(); + const auto &bbox3 = root.bbox(); + if (bbox.isInside(bbox3)) {// bbox3 is contained inside bbox + extrema.min(root.minimum()); + extrema.max(root.maximum()); + extrema.add(root.background()); + } else if (bbox.hasOverlap(bbox3)) { + const auto *data3 = root.data(); + for (uint32_t i=0; imTableSize; ++i) { + const auto *tile = data3->tile(i); + CoordBBox bbox2 = CoordBBox::createCube(tile->origin(), Node2::dim()); + if (!bbox.hasOverlap(bbox2)) continue; + if (tile->isChild()) { + const Node2 *node2 = data3->getChild(tile); + if (bbox.isInside(bbox2)) { + extrema.min(node2->minimum()); + extrema.max(node2->maximum()); + } else {// partial intersections at level 2 + auto *data2 = node2->data(); + const auto bboxMask2 = getBBoxMask(bbox, node2); + for (auto it2 = bboxMask2.beginOn(); it2; ++it2) { + if (data2->mChildMask.isOn(*it2)) { + const Node1* node1 = data2->getChild(*it2); + CoordBBox bbox1 = CoordBBox::createCube(node1->origin(), Node1::dim()); + if (bbox.isInside(bbox1)) { + extrema.min(node1->minimum()); + extrema.max(node1->maximum()); + } else {// partial intersection at level 1 + auto *data1 = node1->data(); + const auto bboxMask1 = getBBoxMask(bbox, node1); + for (auto it1 = bboxMask1.beginOn(); it1; ++it1) { + if (data1->mChildMask.isOn(*it1)) { + const Node0* node0 = data1->getChild(*it1); + CoordBBox bbox0 = CoordBBox::createCube(node0->origin(), Node0::dim()); + if (bbox.isInside(bbox0)) { + extrema.min(node0->minimum()); + extrema.max(node0->maximum()); + } else {// partial intersection at level 0 + auto *data0 = node0->data(); + const auto bboxMask0 = getBBoxMask(bbox, node0); + for (auto it0 = bboxMask0.beginOn(); it0; ++it0) { + extrema.add(data0->getValue(*it0)); + } + }// end partial intersection at level 0 + } else {// tile at level 1 + extrema.add(data1->mTable[*it1].value); + } + } + }// end of partial intersection at level 1 + } else {// tile at level 2 + extrema.add(data2->mTable[*it2].value); + } + }// loop over tiles and nodes at level 2 + }// end of partial intersection at level 1 + } else {// tile at root level + extrema.add(tile->value); + } + }// loop over root table + } else {// bbox does not overlap the grid + extrema.add(root.background()); + } + return extrema; +}// getExtrema + +}// namespace tools + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_GRIDSTATS_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/GridValidator.h b/external/nanovdb/tools/GridValidator.h new file mode 100644 index 00000000..6a8565cb --- /dev/null +++ b/external/nanovdb/tools/GridValidator.h @@ -0,0 +1,244 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/GridValidator.h + + \author Ken Museth + + \date August 30, 2020 + + \brief Checks the validity of an existing NanoVDB grid. + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] +*/ + +#ifndef NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED + +#include // for std::cerr + +#include +#include + +namespace nanovdb { + +namespace tools { + +/// @brief Performs several validation tests on a grid pointer. +/// @tparam ValueT Build type of the input grid +/// @param grid const point to the grid that needs validation +/// @param mode Mode of the validation check (defined in GridChecksum.h) +/// @param verbose If true information about the first failed test is printed to std::cerr +/// @return Return true if the specified grid passes several validation tests. +template +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose = false); + +/// @brief Return true if the specified grid passes several validation tests. +/// @tparam ValueT Build type of the input grid +/// @param grid Grid to validate +/// @param detailed If true the validation test is detailed and relatively slow. +/// @param verbose If true information about the first failed test is printed to std::cerr +/// @note This method has been deprecated by the one defined above +template +[[deprecated("Use isValue(const NanoGrid*, CheckMode, bool) instead.")]] +bool isValid(const NanoGrid &grid, bool detailed = true, bool verbose = false) +{ + return isValid(&grid, detailed ? CheckMode::Full : CheckMode::Half, verbose); +} + +//================================================================================================ + +/// @brief validate grid +template +__hostdev__ char* checkGrid(const NanoGrid *grid, char *error, CheckMode mode = CheckMode::Full) +{ + *error = '\0';// reset error string + char str[32];// temporary buffer for toStr + + // check Grid + if (grid == nullptr) { + return util::sprint(error, "Invalid pointer: Grid is NULL"); + } else if (!isAligned(grid)) { + return util::sprint(error, "Invalid pointer: Grid is misaligned"); + } else if (grid->mMagic != NANOVDB_MAGIC_NUMB && grid->mMagic != NANOVDB_MAGIC_GRID) { + return util::sprint(error, "Invalid magic number: ", toStr(str, toMagic(grid->mMagic))); + } else if (!grid->mVersion.isCompatible()) { + return util::sprint(error, "Incompatible version number: ", toStr(str, grid->mVersion)); + } else if (grid->mGridCount == 0) { + return util::sprint(error, "Zero grid count"); + } else if (grid->mGridIndex >= grid->mGridCount) { + return util::sprint(error, "grid index(", int(grid->mGridIndex), ") >= grid count(", int(grid->mGridCount), ")"); + } else if (grid->mGridClass >= GridClass::End) { + return util::sprint(error, "Invalid GridClass(", toStr(str, grid->mGridClass), ")"); + } else if (grid->mGridType >= GridType::End) { + return util::sprint(error, "Invalid GridType(", toStr(str, grid->mGridType), ")"); + } else if (grid->mGridType != toGridType()) { + return util::sprint(error, "Invalid combination of BuildType(", toStr(str, toGridType()), ") and GridType(", toStr(str+16, grid->mGridType), ")"); + } else if (!isValid(grid->mGridType, grid->mGridClass)) { + return util::sprint(error, "Invalid combination of GridType(", toStr(str, grid->mGridType), ") and GridClass(", toStr(str+16,grid->mGridClass), ")"); + } + + // check Tree + auto &tree = grid->tree(); + if (auto *p = tree.getRoot()) { + if (!isAligned(p)) return util::strcpy(error, "Invalid pointer: Root is misaligned"); + } else { + return util::strcpy(error, "Invalid pointer: Root is NULL"); + } + + // check Root + auto &root = tree.root(); + auto *rootData = root.data(); + if (rootData == nullptr) { + return util::strcpy(error, "Invalid pointer: Root is NULL"); + } else if (!isAligned((const void*)rootData)) { + return util::strcpy(error, "Invalid pointer: Root is misaligned"); + } else if ( (const uint8_t*)(rootData) < (const uint8_t*)(&tree+1)) { + return util::strcpy(error, "Invalid root pointer (should be located after the Grid and Tree)"); + } else if ( (const void*)(rootData) > util::PtrAdd(rootData, root.memUsage())) { + return util::strcpy(error, "Invalid root pointer (appears to be located after the end of the buffer)"); + } else {// check root tiles + const void *bounds[2] = {rootData + 1, util::PtrAdd(rootData, root.memUsage())}; + for (uint32_t i = 0; imTableSize; ++i) { + const void *tile = rootData->tile(i); + if ( tile < bounds[0] ) { + return util::strcpy(error, "Invalid root tile pointer (below lower bound"); + } else if (tile >= bounds[1]) { + return util::strcpy(error, "Invalid root tile pointer (above higher bound"); + } + } + } + if (mode == CheckMode::Half) return error; + + // check nodes + const bool test = grid->isBreadthFirst(); + auto *n0 = tree.template getFirstNode<0>(); + auto *n1 = tree.template getFirstNode<1>(); + auto *n2 = tree.template getFirstNode<2>(); + const void *bounds[3][2] = {{n0, util::PtrAdd(n0, grid->gridSize())}, {n1, n0}, {n2, n1}}; + + auto check = [&](const void *ptr, int level) -> bool { + if (ptr==nullptr) { + util::strcpy(error, "Invalid node pointer: node is NULL"); + } else if (!isAligned(ptr)) { + util::strcpy(error, "Invalid node pointer: node is misaligned"); + } else if (test && level == 0 && (const void*)(n0++) != ptr) { + util::strcpy(error, "Leaf node is not stored breadth-first"); + } else if (test && level == 1 && (const void*)(n1++) != ptr) { + util::strcpy(error, "Lower node is not stored breadth-first"); + } else if (test && level == 2 && (const void*)(n2++) != ptr) { + util::strcpy(error, "Upper node is not stored breadth-first"); + } else if ( ptr < bounds[level][0] ) { + util::strcpy(error, "Invalid node pointer: below lower bound"); + } else if ( ptr >= bounds[level][1] ) { + util::strcpy(error, "Invalid node pointer: above higher bound"); + } + return !util::empty(error); + }; + + for (auto it2 = root.cbeginChild(); it2; ++it2) { + if (check(&*it2, 2)) return error; + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + if (check(&*it1, 1)) return error; + for (auto it0 = it1->cbeginChild(); it0; ++it0) if (check(&*it0, 0)) return error; + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + + return error; +} // checkGrid + +//================================================================================================ + +template +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose) +{ + std::unique_ptr strUP(new char[100]); + char *str = strUP.get(); + + tools::checkGrid(grid, str, mode); + + if (util::empty(str) && !validateChecksum(grid, mode)) util::strcpy(str, "Mis-matching checksum"); + if (verbose && !util::empty(str)) std::cerr << "Validation failed: " << str << std::endl; + + return util::empty(str); +}// isValid + +//================================================================================================ + +struct IsNanoGridValid { + template + static bool known(const GridData *gridData, CheckMode mode, bool verbose) + { + return tools::isValid((const NanoGrid*)gridData, mode, verbose); + } + static bool unknown(const GridData *gridData, CheckMode, bool verbose) + { + if (verbose) { + char str[16]; + std::cerr << "Unsupported GridType: \"" << toStr(str, gridData->mGridType) << "\"\n" << std::endl; + } + return false; + } +};// IsNanoGridValid + +/// @brief Validate a specific grid in a GridHandle +/// @tparam GridHandleT Type of GridHandle +/// @param handle GridHandle containing host grids +/// @param gridID linear index of the grid to be validated +/// @param mode node of validation tests +/// @param verbose if true information is printed if the grid fails a validation test +/// @return true if grid @c gridID passes all the validation tests +template +bool validateGrid(const GridHandleT &handle, uint32_t gridID, CheckMode mode, bool verbose) +{ + if (mode == CheckMode::Disable) { + return true; + } else if (gridID >= handle.gridCount()) { + if (verbose) std::cerr << "grid index " << gridID << " exceeds available grid count " << handle.gridCount() << std::endl; + return false; + } + return callNanoGrid(handle.gridData(gridID), mode, verbose); +}// validateGrid + +//================================================================================================ + +/// @brief Validate all the grids in a GridHandle +/// @tparam GridHandleT Type of GridHandle +/// @param handle GridHandle containing host grids (0,1...,N) +/// @param mode node of validation tests +/// @param verbose if true information is printed if a grid fails a validation test +/// @return true if all grids pass alle the validation tests +template +bool validateGrids(const GridHandleT &handle, CheckMode mode, bool verbose) +{ + if (mode == CheckMode::Disable) return true; + for (uint32_t gridID=0; gridID +[[deprecated("Use nanovdb:tools::checkGrid instead.")]] +__hostdev__ char* checkGrid(const NanoGrid *grid, char *error, CheckMode mode = CheckMode::Full) +{ + return tools::checkGrid(grid, error, mode); +} + +template +[[deprecated("Use nanovdb:tools::isValid instead.")]] +bool isValid(const NanoGrid *grid, CheckMode mode, bool verbose = false) +{ + return tools::isValid(grid, mode, verbose); +} + +}// namespace nanovdb + +#endif // NANOVDB_TOOLS_GRID_VALIDATOR_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/NanoToOpenVDB.h b/external/nanovdb/tools/NanoToOpenVDB.h new file mode 100644 index 00000000..5966ece9 --- /dev/null +++ b/external/nanovdb/tools/NanoToOpenVDB.h @@ -0,0 +1,366 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/NanoToOpenVDB.h + + \author Ken Museth + + \date May 6, 2020 + + \brief This class will deserialize an NanoVDB grid into an OpenVDB grid. + + \todo Add support for PointIndexGrid and PointDataGrid +*/ + +#include // manages and streams the raw memory buffer of a NanoVDB grid. +#include +#include + +#include + +#ifndef NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED + +template +struct ConvertTrait {using Type = T;}; + +template +struct ConvertTrait> {using Type = openvdb::math::Vec3;}; + +template +struct ConvertTrait> {using Type = openvdb::math::Vec4;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = float;}; + +template<> +struct ConvertTrait {using Type = openvdb::ValueMask;}; + +namespace nanovdb { + +namespace tools { + +/// @brief Forward declaration of free-standing function that de-serializes a typed NanoVDB grid into an OpenVDB Grid +template +typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose = 0); + +/// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase +template +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0); + +/// @brief This class will serialize an OpenVDB grid into a NanoVDB grid managed by a GridHandle. +template +class NanoToOpenVDB +{ + using NanoNode0 = nanovdb::LeafNode; // note that it's using openvdb coord nd mask types! + using NanoNode1 = nanovdb::InternalNode; + using NanoNode2 = nanovdb::InternalNode; + using NanoRootT = nanovdb::RootNode; + using NanoTreeT = nanovdb::Tree; + using NanoGridT = nanovdb::Grid; + using NanoValueT = typename NanoGridT::ValueType; + + using OpenBuildT = typename ConvertTrait::Type; // e.g. float -> float but nanovdb::math::Vec3 -> openvdb::Vec3 + using OpenNode0 = openvdb::tree::LeafNode; // leaf + using OpenNode1 = openvdb::tree::InternalNode; // lower + using OpenNode2 = openvdb::tree::InternalNode; // upper + using OpenRootT = openvdb::tree::RootNode; + using OpenTreeT = openvdb::tree::Tree; + using OpenGridT = openvdb::Grid; + using OpenValueT = typename OpenGridT::ValueType; + +public: + /// @brief Construction from an existing const OpenVDB Grid. + NanoToOpenVDB(){}; + + /// @brief Return a shared pointer to a NanoVDB grid constructed from the specified OpenVDB grid + typename OpenGridT::Ptr operator()(const NanoGrid& grid, int verbose = 0); + +private: + + template + OpenNodeT* processNode(const NanoNodeT*); + + OpenNode2* process(const NanoNode2* node) {return this->template processNode(node);} + OpenNode1* process(const NanoNode1* node) {return this->template processNode(node);} + + template + typename std::enable_if::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + template + typename std::enable_if::value, + OpenNode0*>::type + process(const NanoLeafT* node); + + /// converts nanovdb value types to openvdb value types, e.g. nanovdb::Vec3f& -> openvdb::Vec3f& + static const OpenValueT& Convert(const NanoValueT &v) {return reinterpret_cast(v);} + static const OpenValueT* Convert(const NanoValueT *v) {return reinterpret_cast(v);} + +}; // NanoToOpenVDB class + +template +typename NanoToOpenVDB::OpenGridT::Ptr +NanoToOpenVDB::operator()(const NanoGrid& grid, int /*verbose*/) +{ + // since the input nanovdb grid might use nanovdb types (Coord, Mask, Vec3) we cast to use openvdb types + const NanoGridT *srcGrid = reinterpret_cast(&grid); + + auto dstGrid = openvdb::createGrid(Convert(srcGrid->tree().background())); + dstGrid->setName(srcGrid->gridName()); // set grid name + switch (srcGrid->gridClass()) { // set grid class + case nanovdb::GridClass::LevelSet: + dstGrid->setGridClass(openvdb::GRID_LEVEL_SET); + break; + case nanovdb::GridClass::FogVolume: + dstGrid->setGridClass(openvdb::GRID_FOG_VOLUME); + break; + case nanovdb::GridClass::Staggered: + dstGrid->setGridClass(openvdb::GRID_STAGGERED); + break; + case nanovdb::GridClass::PointIndex: + throw std::runtime_error("NanoToOpenVDB does not yet support PointIndexGrids"); + case nanovdb::GridClass::PointData: + throw std::runtime_error("NanoToOpenVDB does not yet support PointDataGrids"); + default: + dstGrid->setGridClass(openvdb::GRID_UNKNOWN); + } + // set transform + const nanovdb::Map& nanoMap = reinterpret_cast(srcGrid)->mMap; + auto mat = openvdb::math::Mat4::identity(); + mat.setMat3(openvdb::math::Mat3(nanoMap.mMatD)); + mat.transpose(); // the 3x3 in nanovdb is transposed relative to openvdb's 3x3 + mat.setTranslation(openvdb::math::Vec3(nanoMap.mVecD)); + dstGrid->setTransform(openvdb::math::Transform::createLinearTransform(mat)); // calls simplify! + + // process root node + auto &root = dstGrid->tree().root(); + auto *data = srcGrid->tree().root().data(); + for (uint32_t i=0; imTableSize; ++i) { + auto *tile = data->tile(i); + if (tile->isChild()) { + root.addChild( this->process( data->getChild(tile)) ); + } else { + root.addTile(tile->origin(), Convert(tile->value), tile->state); + } + } + + return dstGrid; +} + +template +template +DstNodeT* +NanoToOpenVDB::processNode(const SrcNodeT *srcNode) +{ + DstNodeT *dstNode = new DstNodeT(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + const auto& childMask = srcNode->childMask(); + const_cast(dstNode->getValueMask()) = srcNode->valueMask(); + const_cast(dstNode->getChildMask()) = childMask; + auto* dstTable = const_cast(dstNode->getTable()); + auto* srcData = srcNode->data(); + std::vector> childNodes; + const auto childCount = childMask.countOn(); + childNodes.reserve(childCount); + for (uint32_t n = 0; n < DstNodeT::NUM_VALUES; ++n) { + if (childMask.isOn(n)) { + childNodes.emplace_back(n, srcData->getChild(n)); + } else { + dstTable[n].setValue(Convert(srcData->mTable[n].value)); + } + } + auto kernel = [&](const auto& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto &p = childNodes[i]; + dstTable[p.first].setChild( this->process(p.second) ); + } + }; + +#if 0 + kernel(Range1D(0, childCount)); +#else + util::forEach(0, childCount, 1, kernel); +#endif + return dstNode; +} // processNode + +template +template +inline typename std::enable_if::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value && + !std::is_same::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + + const auto* src = Convert(srcNode->data()->mValues);// doesn't work for compressed data, bool or ValueMask + for (auto *dst = dstNode->buffer().data(), *end = dst + OpenNode0::SIZE; dst != end; dst += 4, src += 4) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + float *dst = dstNode->buffer().data(); + for (int i=0; i!=512; i+=4) { + *dst++ = srcNode->getValue(i); + *dst++ = srcNode->getValue(i+1); + *dst++ = srcNode->getValue(i+2); + *dst++ = srcNode->getValue(i+3); + } + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + + return dstNode; +} // process(NanoNode0) + +template +template +inline typename std::enable_if::value, + typename NanoToOpenVDB::OpenNode0*>::type +NanoToOpenVDB::process(const NanoLeafT *srcNode) +{ + static_assert(std::is_same::value, "NanoToOpenVDB::process assert failed"); + OpenNode0* dstNode = new OpenNode0(); // un-initialized for fast construction + dstNode->setOrigin(srcNode->origin()); + dstNode->setValueMask(srcNode->valueMask()); + reinterpret_cast&>(dstNode->buffer()) = srcNode->data()->mValues; + + return dstNode; +} // process(NanoNode0) + +template +inline typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose) +{ + NanoToOpenVDB tmp; + return tmp(grid, verbose); +} + +template +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose, uint32_t n) +{ + if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else if (auto grid = handle.template grid(n)) { + return tools::nanoToOpenVDB(*grid, verbose); + } else { + OPENVDB_THROW(openvdb::RuntimeError, "Unsupported NanoVDB grid type!"); + } +}// tools::nanoToOpenVDB + +}// namespace tools + +/// @brief Forward declaration of free-standing function that de-serializes a typed NanoVDB grid into an OpenVDB Grid +template +[[deprecated("Use nanovdb::tools::nanoToOpenVDB instead.")]] +typename openvdb::Grid::Type>::Type>::Ptr +nanoToOpenVDB(const NanoGrid& grid, int verbose = 0) +{ + return tools::nanoToOpenVDB(grid, verbose); +} + +/// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase +template +[[deprecated("Use nanovdb::tools::nanoToOpenVDB instead.")]] +openvdb::GridBase::Ptr +nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0) +{ + return tools::nanoToOpenVDB(handle, verbose, n); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_NANOTOOPENVDB_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/AddBlindData.cuh b/external/nanovdb/tools/cuda/AddBlindData.cuh new file mode 100644 index 00000000..aab5796f --- /dev/null +++ b/external/nanovdb/tools/cuda/AddBlindData.cuh @@ -0,0 +1,146 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/AddBlindData.cuh + + \author Ken Museth + + \date August 3, 2023 + + \brief Defines function that appends blind device data to and existing device NanoGrid + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include +#include + +#include // for std::strcpy + +namespace nanovdb {// ================================================ + +namespace tools::cuda {// ============================================ + +/// @brief This function appends blind data to and existing NanoGrid +/// @tparam BuildT Build type of the grid +/// @tparam BlindDataT Type of the blind data +/// @tparam BufferT Type of the buffer used for allocation +/// @param d_grid Pointer to device grid +/// @param d_blindData Pointer to device blind data +/// @param valueCount number of values in the blind data +/// @param blindClass class of the blind data +/// @param semantics semantics of the blind data +/// @param name optional name of the blind data +/// @param pool optional pool used for allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return GridHandle with blind data appended +template +GridHandle +addBlindData(const NanoGrid *d_grid, + const BlindDataT *d_blindData, + uint64_t valueCount, + GridBlindDataClass blindClass = GridBlindDataClass::Unknown, + GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, + const char *name = "", + const BufferT &pool = BufferT(), + cudaStream_t stream = 0) +{ + // In: |-----------|--------- |-----------| + // old grid old meta old data + // Out: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + + static_assert(BufferTraits::hasDeviceDual, "Expected BufferT to support device allocation"); + + // extract byte sizes of the grid, blind meta data and blind data + enum {GRID=0, META=1, DATA=2, CHECKSUM=3}; + uint64_t tmp[4], *d_tmp; + cudaCheck(util::cuda::mallocAsync((void**)&d_tmp, 4*sizeof(uint64_t), stream)); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + if (auto count = d_grid->blindDataCount()) { + d_tmp[GRID] = util::PtrDiff(&d_grid->blindMetaData(0), d_grid); + d_tmp[META] = count*sizeof(GridBlindMetaData); + d_tmp[DATA] = d_grid->gridSize() - d_tmp[GRID] - d_tmp[META]; + } else { + d_tmp[GRID] = d_grid->gridSize(); + d_tmp[META] = d_tmp[DATA] = 0u; + } + d_tmp[CHECKSUM] = d_grid->checksum().full(); + }); cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&tmp, d_tmp, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); + + GridBlindMetaData metaData{int64_t(sizeof(GridBlindMetaData) + tmp[DATA]), valueCount, + sizeof(BlindDataT), semantics, blindClass, toGridType()}; + if (!metaData.isValid()) throw std::runtime_error("cudaAddBlindData: invalid combination of blind meta data"); + std::strcpy(metaData.mName, name); + auto buffer = BufferT::create(tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + metaData.blindDataSize(), &pool, false); + void *d_data = buffer.deviceData(); + + // 1: |-----------|----------| + // old grid old meta + cudaCheck(cudaMemcpyAsync(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice, stream)); + + // 2: |-----------|----------|----------| + // old grid old meta new meta + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice, stream)); + + // 3: |-----------|----------|----------|-----------| + // old grid old meta new meta old data + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), + (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice, stream)); + + // 4: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + const size_t dataSize = valueCount*sizeof(BlindDataT);// no padding + cudaCheck(cudaMemcpyAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], + d_blindData, dataSize, cudaMemcpyDeviceToDevice, stream)); + if (auto padding = metaData.blindDataSize() - dataSize) {// zero out possible padding + cudaCheck(cudaMemsetAsync((char*)d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding, stream)); + } + + // increment grid size and blind data counter in output grid + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + auto &grid = *reinterpret_cast*>(d_data); + grid.mBlindMetadataCount += 1; + grid.mBlindMetadataOffset = d_tmp[GRID]; + auto *meta = util::PtrAdd(d_data, grid.mBlindMetadataOffset);// points to first blind meta data + for (uint32_t i=0, n=grid.mBlindMetadataCount-1; imDataOffset += sizeof(GridBlindMetaData); + grid.mGridSize += sizeof(GridBlindMetaData) + meta->blindDataSize();// expansion with 32 byte alignment + }); cudaCheckError(); + cudaCheck(util::cuda::freeAsync(d_tmp, stream)); + + Checksum cs(tmp[CHECKSUM]); + cuda::updateChecksum(reinterpret_cast(d_data), cs.mode(), stream); + + return GridHandle(std::move(buffer)); +}// cudaAddBlindData + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::cuda::addBlindData instead")]] +GridHandle +cudaAddBlindData(const NanoGrid *d_grid, + const BlindDataT *d_blindData, + uint64_t valueCount, + GridBlindDataClass blindClass = GridBlindDataClass::Unknown, + GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, + const char *name = "", + const BufferT &pool = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::addBlindData(d_grid, d_blindData, valueCount, blindClass, semantics, name, pool, stream); +} + +}// namespace nanovdb + +#endif // NVIDIA_TOOLS_CUDA_ADDBLINDDATA_CUH_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/external/nanovdb/tools/cuda/GridChecksum.cuh b/external/nanovdb/tools/cuda/GridChecksum.cuh new file mode 100644 index 00000000..5cc964e5 --- /dev/null +++ b/external/nanovdb/tools/cuda/GridChecksum.cuh @@ -0,0 +1,441 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/GridChecksum.cuh + + \author Ken Museth + + \date September 28, 2023 + + \brief Compute CRC32 checksum of NanoVDB grids + + \note before v32.6.0: checksum[0] = Grid+Tree+Root, checksum[1] = nodes + after v32.6.0: checksum[0] = Grid+Tree, checksum[1] = nodes + blind data in 4K blocks + + When serialized: + [Grid,Tree][Root][ROOT TILES...][Node<5>...][Node<4>...][Leaf<3>...][BlindMeta...][BlindData...] + checksum[2] before v32.6.0: <------------- [0] ------------><-------------- [1] ---------------> + checksum[]2 after v32.6.0: <---[0]---><----------------------------------------[1]----------------------------------------> +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED + +#include +#include // required for instantiation of move c-tor of GridHandle +#include +#include +#include + +namespace nanovdb {// ======================================================================= + +namespace tools::cuda {// =================================================================== + +/// @brief Compute the (2 x CRC32) checksum of the specified @c d_gridData on the device +/// @param d_gridData Device base pointer to the grid from which the checksum is computed. +/// @param mode Defines the mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +/// @return Return the (2 x CRC32) checksum of the specified @c d_gridData +Checksum evalChecksum(const GridData *d_gridData, CheckMode mode = CheckMode::Default, cudaStream_t stream = 0); + +/// @brief Extract the checksum of a device grid +/// @param d_gridData Device basepointer to grid with a checksum +/// @param stream optional cuda stream (defaults to zero) +/// @return Checksum encoded in the specified grid +Checksum getChecksum(const GridData *d_gridData, cudaStream_t stream = 0); + +/// @brief Return true if the checksum of @c d_gridData matches the expected +/// value already encoded into the grid's meta data. +/// @tparam BuildT Template parameter used to build NanoVDB grid. +/// @param d_gridData Grid whose checksum is validated. +/// @param mode Defines the mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +bool validateChecksum(const GridData *d_gridData, CheckMode mode = CheckMode::Default, cudaStream_t stream = 0); + +/// @brief Update the checksum of a device grid +/// @param d_gridData device pointer to GridData +/// @param mode Mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +void updateChecksum(GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0); + +/// @brief Updates the checksum of a device grid by preserving its mode +/// @param d_gridData Device base pointer to grid +/// @param stream optional cuda stream (defaults to zero) +inline void updateChecksum(GridData *d_gridData, cudaStream_t stream = 0) +{ + updateChecksum(d_gridData, getChecksum(d_gridData, stream).mode(), stream); +} + +}// namespace tools::cuda + +namespace util::cuda { + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data using a look-up-table +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +/// @param d_lut device pointer to CRC32 Lookup Table +template +__global__ void crc32Kernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize, const uint32_t *d_lut) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32((const uint8_t*)d_data + tid * blockSize, blockSize, d_lut); +} + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data (without using a look-up-table) +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +template +__global__ void crc32Kernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32((const uint8_t*)d_data + tid * blockSize, blockSize); +} + +/// @brief Host function to allocate and initiate a Look-Up-Table of size 256 for subsequent CRC32 computation on the device +/// @param extra number of extra elements in the LUT +/// @param stream optional cuda stream (defaults to zero) +/// @return returns a nanovdb::util::cuda::unique_ptr point to a lookup-table for CRC32 computation +inline unique_ptr createCrc32Lut(size_t extra = 0, cudaStream_t stream = 0) +{ + unique_ptr lut(256 + extra, stream); + uint32_t *d_lut = lut.get(); + lambdaKernel<<<1, 256, 0, stream>>>(256, [=] __device__(size_t tid) {initCrc32Lut(d_lut, tid);}); + cudaCheckError(); + return lut; +} + +/// @brief Compute CRC32 checksum of 4K block +/// @param d_data device pointer to start of data +/// @param size number of bytes +/// @param d_lut Look-Up-Table for CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void blockedCRC32(const void *d_data, size_t size, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_data && d_lut && d_crc); + static constexpr unsigned int threadsPerBlock = 128;// seems faster than the old value of 256! + const uint64_t checksumCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// 4 KB (4096 byte) + unique_ptr buffer(checksumCount, stream);// for checksums of 4 KB blocks + uint32_t *d_checksums = buffer.get(); + lambdaKernel<<>>(checksumCount, [=] __device__(size_t tid) { + uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE; + if (tid+1 == checksumCount) blockSize += size - (checksumCount<>>(1, [=] __device__(size_t) {// Compute CRC32 of all the 4K blocks + *d_crc = crc32((const uint8_t*)d_checksums, checksumCount*sizeof(uint32_t), d_lut); + }); cudaCheckError(); +}// void cudaBlockedCRC32(const void *d_data, size_t size, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + +/// @brief Compute CRC32 checksum of 4K block +/// @param d_begin device pointer to start of data (inclusive) +/// @param d_end device pointer to end of data (exclusive) +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void blockedCRC32(const void *d_begin, const void *d_end, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + blockedCRC32(d_begin, PtrDiff(d_end, d_begin), d_lut, d_crc, stream); +} + +}// namespace util::cuda + +namespace tools::cuda { + +/// @brief +/// @param d_gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param d_crc +/// @param stream optional cuda stream (defaults to zero) +inline void crc32Head(const GridData *d_gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData && d_lut && d_crc); + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t){*d_crc = tools::crc32Head(d_gridData, d_lut);}); +}// void cudaCrc32Head(const GridData *d_gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + +/// @brief +/// @param d_gridData +/// @param gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param stream optional cuda stream (defaults to zero) +inline void crc32Tail(const GridData *d_gridData, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData && gridData && d_lut && d_crc); + NANOVDB_ASSERT(gridData->mVersion > Version(32,6,0)); + const uint8_t *d_begin = (const uint8_t*)d_gridData; + util::cuda::blockedCRC32(d_begin + sizeof(GridData) + sizeof(TreeData), d_begin + gridData->mGridSize, d_lut, d_crc, stream); +} + +/// @brief +/// @tparam ValueT +/// @param d_grid +/// @param gridData +/// @param d_lut pointer to Look-Up-Table for accelerated CRC32 computation +/// @param d_crc +/// @param stream +template +void crc32TailOld(const NanoGrid *d_grid, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) +{ + static constexpr unsigned int threadsPerBlock = 128;// seems faster than the old value of 256! + auto nodeMgrHandle = nanovdb::cuda::createNodeManager(d_grid, nanovdb::cuda::DeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + NANOVDB_ASSERT(isAligned(d_nodeMgr)); + const uint32_t nodeCount[3]={gridData->template nodeCount<0>(), gridData->template nodeCount<1>(), gridData->template nodeCount<2>()}; + util::cuda::unique_ptr d_checksumsUP(nodeCount[0]+nodeCount[1]+nodeCount[2]); + uint32_t *d_checksums = d_checksumsUP.get(), *d_ptr = d_checksums; + + util::cuda::lambdaKernel<<>>(nodeCount[2], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->upper(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + d_ptr += nodeCount[2]; + util::cuda::lambdaKernel<<>>(nodeCount[1], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->lower(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + d_ptr += nodeCount[1]; + util::cuda::lambdaKernel<<>>(nodeCount[0], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->leaf(uint32_t(tid)); + d_ptr[tid] = util::crc32((const uint8_t*)&node, node.memUsage(), d_lut); + }); cudaCheckError(); + + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + *d_crc = util::crc32(d_checksums, d_nodeMgr->tree().totalNodeCount()*sizeof(uint32_t), d_lut); + }); cudaCheckError(); +}// void cudaCrc32TailOld(const NanoGrid *d_grid, const GridData *gridData, uint32_t *d_lut, cudaStream_t stream) + +struct Crc32TailOld { + template + static void known(const GridData *d_gridData, const GridData *gridData, const uint32_t *d_lut, uint32_t *d_crc, cudaStream_t stream) + { + crc32TailOld((const NanoGrid*)d_gridData, gridData, d_lut, d_crc, stream); + } + static void unknown(const GridData*, const GridData*, const uint32_t*, uint32_t*, cudaStream_t) + { + throw std::runtime_error("Cannot call cudaCrc32TailOld with grid of unknown type"); + } +};// Crc32TailOld + +/// @brief +/// @param d_gridData +/// @param mode +/// @param stream +/// @return +inline Checksum evalChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_gridData); + Checksum cs; + if (mode != CheckMode::Empty) { + auto d_lut = util::cuda::createCrc32Lut(1, stream); + crc32Head(d_gridData, d_lut.get(), d_lut.get() + 256, stream); + cudaCheck(cudaMemcpyAsync(&(cs.head()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + if (mode == CheckMode::Full) { + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_lut.get() + 256, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_lut.get() + 256, stream); + } + cudaCheck(cudaMemcpyAsync(&(cs.tail()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + } + } + return cs; +} + +/// @brief +/// @tparam BuildT +/// @param d_grid +/// @param mode +/// @param stream +/// @return +template +Checksum evalChecksum(const NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_grid); + Checksum cs; + if (mode != CheckMode::Empty) { + auto d_lut = util::cuda::createCrc32Lut(1, stream); + crc32Head(d_grid, d_lut.get(), d_lut.get() + 256, stream); + cudaCheck(cudaMemcpyAsync(&(cs.head()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + if (mode == CheckMode::Full) { + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid, gridData, d_lut.get(), d_lut.get() + 256, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_lut.get() + 256, stream); + } + cudaCheck(cudaMemcpyAsync(&(cs.tail()), d_lut.get() + 256, headSize, cudaMemcpyDeviceToHost, stream)); + } + } + return cs; +} + +/// @brief +/// @param d_gridData +/// @param mode +/// @param stream +/// @return +inline bool validateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_gridData); + if (mode == CheckMode::Empty) return true; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mChecksum.isEmpty()) return true;// checksum is empty so nothing to check + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(1, stream);// unique pointer + uint32_t crc = 0, *d_crc = d_lut.get() + 256; + + // Check head checksum + crc32Head(d_gridData, d_lut.get(), d_crc, stream); + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + const bool checkHead = (crc == gridData->mChecksum.head()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) return checkHead; + + // Check tail checksum + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_crc, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_crc, stream); + } + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + return crc == gridData->mChecksum.tail(); +}// bool cudaValidateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0) + +/// @brief +/// @tparam BuildT +/// @param d_grid +/// @param mode +/// @param stream +/// @return +template +bool validateChecksum(const NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + static const int headSize = sizeof(GridData) + sizeof(TreeData); + NANOVDB_ASSERT(d_grid); + if (mode == CheckMode::Empty) return true; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[headSize]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, headSize, cudaMemcpyDeviceToHost, stream)); + if (gridData->mChecksum.isEmpty()) return true;// checksum is empty so nothing to check + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(1, stream);// unique pointer + uint32_t crc = 0, *d_crc = d_lut.get() + 256; + + // Check head checksum + crc32Head(d_grid, d_lut.get(), d_crc, stream); + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + const bool checkHead = (crc == gridData->mChecksum.head()); + if (gridData->mChecksum.isHalf() || mode == CheckMode::Half || !checkHead) return checkHead; + + // Check tail checksum + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid, gridData, d_lut.get(), d_crc, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_crc, stream); + } + cudaCheck(cudaMemcpyAsync(&crc, d_crc, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + return crc == gridData->mChecksum.tail(); +}// bool cudaValidateChecksum(const GridData *d_gridData, CheckMode mode, cudaStream_t stream = 0) + +/// @brief Extract the checksum of a device grid +/// @param d_gridData Device pointer to grid with a checksum +/// @param stream optional cuda stream (defaults to zero) +inline Checksum getChecksum(const GridData *d_gridData, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData); + Checksum cs; + cudaCheck(cudaMemcpyAsync(&cs, (const uint8_t*)d_gridData + 8, sizeof(cs), cudaMemcpyDeviceToHost, stream)); + return cs; +} + +/// @brief Update the checksum of a device grid +/// @param d_gridData device pointer to GridData +/// @param mode Mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +/// @return The actual mode used for checksum computation. Eg. if @c d_gridData is NULL (or @c mode = CheckMode::Empty) +/// then CheckMode::Empty is always returned. Else if the grid has no nodes or blind data CheckMode::Partial +/// is always returnd (even if @c mode = CheckMode::Full). +inline void updateChecksum(GridData *d_gridData, CheckMode mode, cudaStream_t stream) +{ + NANOVDB_ASSERT(d_gridData); + if (mode == CheckMode::Empty) return; + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(0, stream);// unique pointers + + // Update head checksum + crc32Head(d_gridData, d_lut.get(), (uint32_t*)d_gridData + 2, stream); + + if (mode == CheckMode::Half) return; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[sizeof(GridData) + sizeof(TreeData)]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_gridData, sizeof(GridData) + sizeof(TreeData), cudaMemcpyDeviceToHost, stream)); + + // Update tail checksum + uint32_t *d_tail = (uint32_t*)d_gridData + 3; + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_gridData, gridData, d_lut.get(), d_tail, stream); + } else { + callNanoGrid(d_gridData, gridData, d_lut.get(), d_tail, stream); + } +}// cudaUpdateChecksum + +/// @brief +/// @tparam ValueT +/// @param d_grid +/// @param mode +/// @param stream +template +void updateChecksum(NanoGrid *d_grid, CheckMode mode, cudaStream_t stream = 0) +{ + NANOVDB_ASSERT(d_grid); + if (mode == CheckMode::Empty) return; + + // Allocate device LUT for CRC32 computation + auto d_lut = util::cuda::createCrc32Lut(0, stream);// unique pointers + + // Update head checksum + cuda::crc32Head(d_grid, d_lut.get(), (uint32_t*)d_grid + 2, stream); + if (mode == CheckMode::Half) return; + + // Copy just the GridData from the device to the host + std::unique_ptr buffer(new char[sizeof(GridData) + sizeof(TreeData)]); + auto *gridData = (GridData*)(buffer.get()); + cudaCheck(cudaMemcpyAsync(gridData, d_grid, sizeof(GridData) + sizeof(TreeData), cudaMemcpyDeviceToHost, stream)); + + // Update tail checksum + uint32_t *d_tail = (uint32_t*)d_grid + 3; + if (gridData->mVersion > Version(32,6,0)) { + crc32Tail(d_grid->data(), gridData, d_lut.get(), d_tail, stream); + } else { + crc32TailOld(d_grid, gridData, d_lut.get(), d_tail, stream); + } +} + +}// namespace tools::cuda // ================================================ + +}// namespace nanovdb // ==================================================== + +#endif // NANOVDB_TOOLS_CUDA_GRIDCHECKSUM_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/GridStats.cuh b/external/nanovdb/tools/cuda/GridStats.cuh new file mode 100644 index 00000000..0ba570ac --- /dev/null +++ b/external/nanovdb/tools/cuda/GridStats.cuh @@ -0,0 +1,249 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/GridStats.cuh + + \author Ken Museth + + \date October 9, 2023 + + \brief Re-computes min/max/avg/var/bbox information for each node in a + pre-existing NanoVDB grid on the device. +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Update, i.e. re-compute, grid statistics like min/max, stats and bbox +/// information for an existing NanoVDB Grid. +/// @param grid Grid whose stats to update +/// @param mode Mode of computation for the statistics. +/// @param stream Optional cuda stream (defaults to zero) +template +void updateGridStats(NanoGrid *d_grid, StatsMode mode = StatsMode::Default, cudaStream_t stream = 0); + +//================================================================================================ + +/// @brief Allows for the construction of NanoVDB grids without any dependecy +template::ValueType>> +class GridStats +{ + using GridT = NanoGrid; + using TreeT = typename GridT::TreeType; + using ValueT = typename TreeT::ValueType; + using Node0 = typename TreeT::Node0; // leaf + using Node1 = typename TreeT::Node1; // lower + using Node2 = typename TreeT::Node2; // upper + using RootT = typename TreeT::Node3; // root + static_assert(util::is_same::value, "Mismatching type"); + + ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta + +public: + GridStats(ValueT delta = ValueT(0)) : mDelta(delta) {} + + void update(GridT *d_grid, cudaStream_t stream = 0); + +}; // cuda::GridStats + +//================================================================================================ + +namespace {// define cuda kernels in an unnamed namespace + +template +__global__ void processLeaf(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->leafCount()) return; + auto &d_leaf = d_nodeMgr->leaf(tid); + + if (d_leaf.updateBBox()) {// updates active bounding box (also updates data->mFlags) and return true if non-empty + if constexpr(StatsT::hasStats()) { + StatsT stats; + for (auto it = d_leaf.cbeginValueOn(); it; ++it) stats.add(*it); + if constexpr(StatsT::hasAverage()) { + d_stats[tid] = stats; + *reinterpret_cast(&d_leaf.mMinimum) = tid; + } else { + stats.setStats(d_leaf); + } + } + } + d_leaf.mFlags &= ~uint8_t(1u);// enable rendering +}// processLeaf + +template +__global__ void processInternal(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + using ChildT = typename NanoNode::type; + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->nodeCount(LEVEL)) return; + auto &d_node = d_nodeMgr->template node(tid); + auto &bbox = d_node.mBBox; + bbox = CoordBBox();// empty bbox + StatsT stats; + uint32_t childID = 0u; + + for (auto it = d_node.beginChild(); it; ++it) { + auto &child = *it; + bbox.expand( child.bbox() ); + if constexpr(StatsT::hasAverage()) { + childID = *reinterpret_cast(&child.mMinimum); + StatsT &s = d_stats[childID]; + s.setStats(child); + stats.add(s); + } else if constexpr(StatsT::hasMinMax()) { + stats.add(child.minimum()); + stats.add(child.maximum()); + } + } + for (auto it = d_node.cbeginValueOn(); it; ++it) { + const Coord ijk = it.getCoord(); + bbox[0].minComponent(ijk); + bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasStats()) stats.add(*it, ChildT::NUM_VALUES); + } + if constexpr(StatsT::hasAverage()) { + d_stats[childID] = stats; + *reinterpret_cast(&d_node.mMinimum) = childID; + } else if constexpr(StatsT::hasMinMax()) { + stats.setStats(d_node); + } + d_node.mFlags &= ~uint64_t(1u);// enable rendering +}// processInternal + +template +__global__ void processRootAndGrid(NodeManager *d_nodeMgr, StatsT *d_stats) +{ + using ChildT = NanoUpper; + using ValueT = typename ChildT::ValueType; + + // process root + auto &root = d_nodeMgr->root(); + root.mBBox = CoordBBox(); + if (root.isEmpty()) { + root.mMinimum = root.mMaximum = root.mBackground; + root.mAverage = root.mStdDevi = 0; + } else { + ValueT v; + StatsT s; + for (auto it = root.beginDense(); it; ++it) { + if (auto *child = it.probeChild(v)) { + root.mBBox.expand( child->bbox() ); + if constexpr(StatsT::hasAverage()) { + StatsT &stats = d_stats[*reinterpret_cast(&child->mMinimum)]; + stats.setStats(*child); + s.add(stats); + } else if constexpr(StatsT::hasMinMax()){ + s.add(child->minimum()); + s.add(child->maximum()); + } + } else if (it.isValueOn()) { + const Coord ijk = it.getCoord(); + root.mBBox[0].minComponent(ijk); + root.mBBox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasStats()) s.add(v, ChildT::NUM_VALUES); + } + } + s.setStats(root); + } + + // process Grid + auto& grid = d_nodeMgr->grid(); + const auto& indexBBox = root.bbox(); + if (indexBBox.empty()) { + grid.mWorldBBox = Vec3dBBox(); + grid.setBBoxOn(false); + } else { + // Note that below max is offset by one since CoordBBox.max is inclusive + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and Vec3dBBox. This also guarantees that a grid with a single + // active voxel, does not have an empty world bbox! E.g. if a grid with a + // unit index-to-world transformation only contains the active voxel (0,0,0) + // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) + // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions + // of index and world bounding boxes inherited from OpenVDB! + grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); + grid.setBBoxOn(true); + } + + // set bit flags + grid.setMinMaxOn(StatsT::hasMinMax()); + grid.setAverageOn(StatsT::hasAverage()); + grid.setStdDeviationOn(StatsT::hasStdDeviation()); +}// processRootAndGrid + +}// cuda kernels are defined in an unnamed namespace + +//================================================================================================ + +template +void GridStats::update(NanoGrid *d_grid, cudaStream_t stream) +{ + static const uint32_t threadsPerBlock = 128; + auto blocksPerGrid = [&](uint32_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + + auto nodeMgrHandle = nanovdb::cuda::createNodeManager(d_grid, CudaDeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + + uint32_t nodeCount[3];// {leaf, lower, upper} + cudaCheck(cudaMemcpyAsync(nodeCount, (char*)d_grid + sizeof(GridData) + 4*sizeof(uint64_t), 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + //cudaStreamSynchronize(stream);// finish all device tasks in stream + + StatsT *d_stats = nullptr; + + if constexpr(StatsT::hasAverage()) cudaCheck(util::cuda::mallocAsync((void**)&d_stats, nodeCount[0]*sizeof(StatsT), stream)); + + processLeaf<<>>(d_nodeMgr, d_stats); + + processInternal<<>>(d_nodeMgr, d_stats); + + processInternal<<>>(d_nodeMgr, d_stats); + + processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr, d_stats); + + if constexpr(StatsT::hasAverage()) cudaCheck(util::cuda::freeAsync(d_stats, stream)); + +} // cuda::GridStats::update( Grid ) + +//================================================================================================ + +template +void updateGridStats(NanoGrid *d_grid, StatsMode mode, cudaStream_t stream) +{ + if (d_grid == nullptr && mode == StatsMode::Disable) { + return; + } else if (mode == StatsMode::BBox || util::is_same::value) { + GridStats > stats; + stats.update(d_grid, stream); + } else if (mode == StatsMode::MinMax) { + GridStats > stats; + stats.update(d_grid, stream); + } else if (mode == StatsMode::All) { + GridStats > stats; + stats.update(d_grid, stream); + } else { + throw std::runtime_error("GridStats: Unsupported statistics mode."); + } +}// cuda::updateGridStats + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::cuda::updateGridStats instead")]] +void cudaGridStats(NanoGrid *d_grid, tools::StatsMode mode = tools::StatsMode::Default, cudaStream_t stream = 0) +{ + tools::cuda::updateGridStats(d_grid, mode, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_GRIDSTATS_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/GridValidator.cuh b/external/nanovdb/tools/cuda/GridValidator.cuh new file mode 100644 index 00000000..aaa28412 --- /dev/null +++ b/external/nanovdb/tools/cuda/GridValidator.cuh @@ -0,0 +1,59 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/GridValidator.cuh + + \author Ken Museth + + \date November 3, 2023 + + \brief Checks the validity of an existing NanoVDB device grid. +*/ + +#ifndef NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Return true if the specified grid passes several validation tests. +/// +/// @param grid Grid to validate +/// @param detailed If true the validation test is detailed and relatively slow. +/// @param verbose If true information about the first failed test is printed to std::cerr +template +bool isValid(const NanoGrid *d_grid, CheckMode mode, bool verbose = false, cudaStream_t stream = 0) +{ + static const int size = 100; + std::unique_ptr strUP(new char[size]); + util::cuda::unique_ptr d_strUP(size); + char *str = strUP.get(), *d_str = d_strUP.get(); + + util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) {nanovdb::tools::checkGrid(d_grid, d_str, mode);}); + cudaMemcpyAsync(str, d_str, size, cudaMemcpyDeviceToHost, stream); + + if (util::empty(str) && !cuda::validateChecksum(d_grid, mode)) util::strcpy(str, "Mis-matching checksum"); + if (verbose && !util::empty(str)) std::cerr << "Validation failed: " << str << std::endl; + + return util::empty(str); +}// tools::cuda::isValid + +}// namespace tools::cuda + +template +[[deprecated("Use cuda::isValid() instead.")]] +bool cudaIsValid(const NanoGrid *d_grid, CheckMode mode, bool verbose = false, cudaStream_t stream = 0) +{ + return tools::cuda::isValid(d_grid, mode, verbose, stream); +} + +} // namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_GRIDVALIDATOR_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/IndexToGrid.cuh b/external/nanovdb/tools/cuda/IndexToGrid.cuh new file mode 100644 index 00000000..d26b09a2 --- /dev/null +++ b/external/nanovdb/tools/cuda/IndexToGrid.cuh @@ -0,0 +1,407 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/IndexToGrid.cuh + + \author Ken Museth + + \date April 17, 2023 + + \brief Combines an IndexGrid and values into a regular Grid on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include + +namespace nanovdb {// ================================================================ + +namespace tools::cuda {// ============================================================ + +/// @brief Freestanding function that combines an IndexGrid and values into a regular Grid +/// @tparam DstBuildT Build time of the destination/output Grid +/// @tparam SrcBuildT Build type of the source/input IndexGrid +/// @tparam BufferT Type of the buffer used for allocation of the destination Grid +/// @param d_srcGrid Device pointer to source/input IndexGrid, i.e. SrcBuildT={ValueIndex,ValueOnIndex,ValueIndexMask,ValueOnIndexMask} +/// @param d_srcValues Device pointer to an array of values +/// @param pool Memory pool used to create a buffer for the destination/output Grid +/// @param stream optional CUDA stream (defaults to CUDA stream 0 +/// @note If d_srcGrid has stats (min,max,avg,std-div), the d_srcValues is also assumed +/// to have the same information, all of which are then copied to the destination/output grid. +/// An exception to this rule is if the type of d_srcValues is different from the stats type +/// NanoRoot::FloatType, e.g. if DstBuildT=Vec3f then NanoRoot::FloatType=float, +/// in which case average and standard-deviation is undefined in the output grid. +/// @return returns handle to grid that combined IndexGrid and values +template +typename util::enable_if::is_index, GridHandle>::type +indexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0); + + +template +typename util::enable_if::is_index, GridHandle>::type +createNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + +namespace {// anonymous namespace + +template +class IndexToGrid +{ + using SrcGridT = NanoGrid; +public: + struct NodeAccessor; + + /// @brief Constructor from a source IndeGrid + /// @param srcGrid Device pointer to IndexGrid used as the source + IndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream = 0); + + ~IndexToGrid() {cudaCheck(util::cuda::freeAsync(mDevNodeAcc, mStream));} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on; } + + /// @brief Set the name of the destination/output grid + /// @param name Name used for the destination grid + void setGridName(const std::string &name) {mGridName = name;} + + /// @brief Combines the IndexGrid with values to produce a regular Grid + /// @tparam DstBuildT Template parameter of the destination grid and value type + /// @tparam BufferT Template parameter of the memory allocator + /// @param srcValues pointer to values that will be inserted into the output grid + /// @param buffer optional buffer used for memory allocation + /// @return A new GridHandle with the grid of type @c DstBuildT + template + GridHandle getHandle(const typename BuildToValueMap::type *srcValues, const BufferT &buffer = BufferT()); + +private: + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + std::string mGridName; + bool mVerbose{false}; + NodeAccessor mNodeAcc, *mDevNodeAcc; + + template + BufferT getBuffer(const BufferT &pool); +};// IndexToGrid + +//================================================================================================ + +template +struct IndexToGrid::NodeAccessor +{ + uint64_t grid, tree, root, node[3], meta, blind, size;// byte offsets, node: 0=leaf,1=lower, 2=upper + const SrcGridT *d_srcGrid;// device point to source IndexGrid + void *d_dstPtr;// device pointer to buffer with destination Grid + char *d_gridName; + uint32_t nodeCount[4];// 0=leaf, 1=lower, 2=upper, 3=root tiles + + __device__ const NanoGrid& srcGrid() const {return *d_srcGrid;} + __device__ const NanoTree& srcTree() const {return d_srcGrid->tree();} + __device__ const NanoRoot& srcRoot() const {return d_srcGrid->tree().root();} + template + __device__ const typename NanoNode::type& srcNode(int i) const { + return *(this->srcTree().template getFirstNode() + i); + } + + template + __device__ NanoGrid& dstGrid() const {return *util::PtrAdd>(d_dstPtr, grid);} + template + __device__ NanoTree& dstTree() const {return *util::PtrAdd>(d_dstPtr, tree);} + template + __device__ NanoRoot& dstRoot() const {return *util::PtrAdd>(d_dstPtr, root);} + template + __device__ typename NanoNode::type& dstNode(int i) const { + return *(util::PtrAdd::type>(d_dstPtr, node[LEVEL])+i); + } +};// IndexToGrid::NodeAccessor + +//================================================================================================ + +template +__global__ void processGridTreeRootKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcGrid = nodeAcc->srcGrid(); + auto &dstGrid = nodeAcc->template dstGrid(); + auto &srcTree = srcGrid.tree(); + auto &dstTree = nodeAcc->template dstTree(); + auto &srcRoot = srcTree.root(); + auto &dstRoot = nodeAcc->template dstRoot(); + + // process Grid + *dstGrid.data() = *srcGrid.data(); + dstGrid.mGridType = toGridType(); + dstGrid.mData1 = 0u; + // we will recompute GridData::mChecksum later + + // process Tree + *dstTree.data() = *srcTree.data(); + dstTree.setRoot(&dstRoot); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + + // process Root + dstRoot.mBBox = srcRoot.mBBox; + dstRoot.mTableSize = srcRoot.mTableSize; + dstRoot.mBackground = srcValues[srcRoot.mBackground]; + if (srcGrid.hasMinMax()) { + dstRoot.mMinimum = srcValues[srcRoot.mMinimum]; + dstRoot.mMaximum = srcValues[srcRoot.mMaximum]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstRoot.mAverage = srcValues[srcRoot.mAverage]; + if (srcGrid.hasStdDeviation()) dstRoot.mStdDevi = srcValues[srcRoot.mStdDevi]; + } +}// processGridTreeRootKernel + +//================================================================================================ + +template +__global__ void processRootTilesKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + const auto tid = blockIdx.x; + + // Process children and tiles + const auto &srcTile = *nodeAcc->srcRoot().tile(tid); + auto &dstTile = *nodeAcc->template dstRoot().tile(tid); + dstTile.key = srcTile.key; + if (srcTile.child) { + dstTile.child = sizeof(NanoRoot) + sizeof(NanoRoot::Tile)*((srcTile.child - sizeof(NanoRoot))/sizeof(NanoRoot::Tile)); + dstTile.value = srcValues[0];// set to background + dstTile.state = false; + } else { + dstTile.child = 0;// i.e. no child node + dstTile.value = srcValues[srcTile.value]; + dstTile.state = srcTile.state; + } +}// processRootTilesKernel + +//================================================================================================ + +template +__global__ void processNodesKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcNodeT = typename NanoNode::type; + using DstNodeT = typename NanoNode::type; + using SrcChildT = typename SrcNodeT::ChildNodeType; + using DstChildT = typename DstNodeT::ChildNodeType; + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcNode = nodeAcc->template srcNode(blockIdx.x); + auto &dstNode = nodeAcc->template dstNode(blockIdx.x); + + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstNode.mBBox = srcNode.mBBox; + dstNode.mFlags = srcNode.mFlags; + dstNode.mValueMask = srcNode.mValueMask; + dstNode.mChildMask = srcNode.mChildMask; + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstNode.mMinimum = srcValues[srcNode.mMinimum]; + dstNode.mMaximum = srcValues[srcNode.mMaximum]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstNode.mAverage = srcValues[srcNode.mAverage]; + if (srcGrid.hasStdDeviation()) dstNode.mStdDevi = srcValues[srcNode.mStdDevi]; + } + } + const uint64_t nodeSkip = nodeAcc->nodeCount[LEVEL] - blockIdx.x, srcOff = sizeof(SrcNodeT)*nodeSkip, dstOff = sizeof(DstNodeT)*nodeSkip;// offset to first node of child type + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + for (int threadIdx_z=0; threadIdx_z +__global__ void processLeafsKernel(typename IndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + static_assert(!BuildTraits::is_special, "Invalid destination type!"); + auto &srcLeaf = nodeAcc->template srcNode<0>(blockIdx.x); + auto &dstLeaf = nodeAcc->template dstNode(blockIdx.x); + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstLeaf.mBBoxMin = srcLeaf.mBBoxMin; + for (int i=0; i<3; ++i) dstLeaf.mBBoxDif[i] = srcLeaf.mBBoxDif[i]; + dstLeaf.mFlags = srcLeaf.mFlags; + dstLeaf.mValueMask = srcLeaf.mValueMask; + /// + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstLeaf.mMinimum = srcValues[srcLeaf.getMin()]; + dstLeaf.mMaximum = srcValues[srcLeaf.getMax()]; + } + if constexpr(util::is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstLeaf.mAverage = srcValues[srcLeaf.getAvg()]; + if (srcGrid.hasStdDeviation()) dstLeaf.mStdDevi = srcValues[srcLeaf.getDev()]; + } + } + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + auto *dst = dstLeaf.mValues + off; + for (int threadIdx_z=0; threadIdx_z +__global__ void cpyNodeCountKernel(const NanoGrid *srcGrid, + typename IndexToGrid::NodeAccessor *nodeAcc) +{ + assert(srcGrid->isSequential()); + nodeAcc->d_srcGrid = srcGrid; + for (int i=0; i<3; ++i) nodeAcc->nodeCount[i] = srcGrid->tree().nodeCount(i); + nodeAcc->nodeCount[3] = srcGrid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +IndexToGrid::IndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream) + : mStream(stream), mTimer(stream) +{ + NANOVDB_ASSERT(d_srcGrid); + cudaCheck(util::cuda::mallocAsync((void**)&mDevNodeAcc, sizeof(NodeAccessor), mStream)); + cpyNodeCountKernel<<<1, 1, 0, mStream>>>(d_srcGrid, mDevNodeAcc); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost, mStream));// mNodeAcc = *mDevNodeAcc +} + +//================================================================================================ + +template +template +GridHandle IndexToGrid::getHandle(const typename BuildToValueMap::type *srcValues, + const BufferT &pool) +{ + if (mVerbose) mTimer.start("Initiate buffer"); + auto buffer = this->template getBuffer(pool); + + if (mVerbose) mTimer.restart("Process grid,tree,root"); + processGridTreeRootKernel<<<1, 1, 0, mStream>>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process root children and tiles"); + processRootTilesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + cudaCheck(util::cuda::freeAsync(mNodeAcc.d_gridName, mStream)); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + processNodesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + processNodesKernel<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process leaf nodes"); + processLeafsKernel<<>>(mDevNodeAcc, srcValues); + if (mVerbose) mTimer.stop(); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Compute checksums"); + updateChecksum((GridData*)mNodeAcc.d_dstPtr, mStream); + if (mVerbose) mTimer.stop(); + + //cudaStreamSynchronize(mStream);// finish all device tasks in mStream + return GridHandle(std::move(buffer)); +}// IndexToGrid::getHandle + +//================================================================================================ + +template +template +inline BufferT IndexToGrid::getBuffer(const BufferT &pool) +{ + mNodeAcc.grid = 0;// grid is always stored at the start of the buffer! + mNodeAcc.tree = NanoGrid::memUsage(); // grid ends and tree begins + mNodeAcc.root = mNodeAcc.tree + NanoTree::memUsage(); // tree ends and root node begins + mNodeAcc.node[2] = mNodeAcc.root + NanoRoot::memUsage(mNodeAcc.nodeCount[3]); // root node ends and upper internal nodes begin + mNodeAcc.node[1] = mNodeAcc.node[2] + NanoUpper::memUsage()*mNodeAcc.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mNodeAcc.node[0] = mNodeAcc.node[1] + NanoLower::memUsage()*mNodeAcc.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mNodeAcc.meta = mNodeAcc.node[0] + NanoLeaf::DataType::memUsage()*mNodeAcc.nodeCount[0];// leaf nodes end and blind meta data begins + mNodeAcc.blind = mNodeAcc.meta + 0*sizeof(GridBlindMetaData); // meta data ends and blind data begins + mNodeAcc.size = mNodeAcc.blind;// end of buffer + auto buffer = BufferT::create(mNodeAcc.size, &pool, false, mStream); + mNodeAcc.d_dstPtr = buffer.deviceData(); + if (mNodeAcc.d_dstPtr == nullptr) throw std::runtime_error("Failed memory allocation on the device"); + + if (size_t size = mGridName.size()) { + cudaCheck(util::cuda::mallocAsync((void**)&mNodeAcc.d_gridName, size, mStream)); + cudaCheck(cudaMemcpyAsync(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice, mStream)); + } else { + mNodeAcc.d_gridName = nullptr; + } + cudaCheck(cudaMemcpyAsync(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice, mStream));// copy NodeAccessor CPU -> GPU + return buffer; +} + +//================================================================================================ + +template +typename util::enable_if::is_index, GridHandle>::type +indexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool, cudaStream_t stream) +{ + IndexToGrid converter(d_srcGrid, stream); + return converter.template getHandle(d_srcValues, pool); +} + +}// namespace tools::cuda ============================================================= + +template +[[deprecated("Use nanovdb::cuda::indexToGrid instead")]] +typename util::enable_if::is_index, GridHandle>::type +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return tools::cuda::indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + + +template +[[deprecated("Use nanovdb::cuda::indexToGrid instead")]] +typename util::enable_if::is_index, GridHandle>::type +cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) +{ + return tools::cuda::indexToGrid(d_srcGrid, d_srcValues, pool, stream); +} + +}// nanovdb namespace =================================================================== + +#endif // NVIDIA_TOOLS_CUDA_INDEXTOGRID_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/PointsToGrid.cuh b/external/nanovdb/tools/cuda/PointsToGrid.cuh new file mode 100644 index 00000000..bcf335ef --- /dev/null +++ b/external/nanovdb/tools/cuda/PointsToGrid.cuh @@ -0,0 +1,1293 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/PointsToGrid.cuh + + \authors Greg Klar (initial version) and Ken Museth (final version) + + \brief Generates NanoVDB grids from a list of voxels or points on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! +*/ + +namespace nanovdb {// ================================================================================ + +namespace tools::cuda {// ============================================================================ + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount number of point in the list @c d_world +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param type Defined the way point information is represented in the output grid (see PointType enum NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount total number of point in the list @c d_world +/// @param maxPointsPerVoxel Max density of points per voxel, i.e. maximum number of points in any voxel +/// @param tolerance allow for point density to vary by the specified tolerance (defaults to 1). That is, the voxel size +/// is selected such that the max density is +/- the tolerance. +/// @param maxIterations Maximum number of iterations used to seach for a voxel size that produces a point density +/// with specified tolerance takes. +/// @param type Defined the way point information is represented in the output grid (see PointType enum in NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + int maxPointPerVoxel, + int tolerance = 1, + int maxIterations = 10, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid of any type from a list of voxel coordinates on the device. Unlike @c cudaPointsToGrid +/// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a +/// means to generate a grid that is know to contain the voxels given in the list. +/// @tparam BuildT Template type of the return grid +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dGridVoxels Raw or fancy pointer to list of voxel coordinates in grid (or index) space on the device +/// @param pointCount number of voxel in the list @c dGridVoxels +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param buffer Instance of the device buffer used for memory allocation +/// @return Returns a handle with the grid of type NanoGrid +template +GridHandle +voxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +}// namespace tools::cuda ======================================================================== + +/// @brief Example class of a fancy pointer that can optionally be used as a template for writing +/// a custom fancy pointer that allows for particle coordinates to be arrange non-linearly +/// in memory. For instance with coordinates are interlaced with other dats, i.e. an array +/// of structs, a custom implementation of fancy_ptr::operator[](size_t i) can account for +/// strides that skip other interlaces data. +/// @tparam T Template type that specifies the type use for the coordinates of the points +template +class fancy_ptr +{ + const T* mPtr; +public: + /// @brief Default constructor. + /// @note This method is atcually not required by cuda::PointsToGrid + /// @param ptr Pointer to array of elements + __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} + /// @brief Index acces into the array pointed to by the stored pointer. + /// @note This method is required by cuda::PointsToGrid! + /// @param i Unsigned index of the element to be returned + /// @return Const refernce to the element at the i'th poisiton + __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];} + /// @brief Dummy implementation required by pointer_traits. + /// @note Note that only the return type matters! + /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, + /// which is why pointer_traits makes use of it to determine the element_type that + /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! + __hostdev__ inline const T& operator*() const {return *mPtr;} +};// fancy_ptr + +/// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr +/// @tparam T Template type that specifies the type use for the coordinates of the points +/// @param ptr Raw pointer to data +/// @return a new instance of a fancy_ptr +template +fancy_ptr make_fancy(const T* ptr = nullptr) {return fancy_ptr(ptr);} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief Trait of points, like type of pointer and size of the pointer type +template +struct pointer_traits; + +template +struct pointer_traits { + using element_type = T; + static constexpr size_t element_size = sizeof(T); +}; + +template +struct pointer_traits { + using element_type = typename util::remove_reference())>::type;// assumes T::operator*() exists! + static constexpr size_t element_size = sizeof(element_type); +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief computes the relative 8-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 8-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u8 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<8) - 1); + voxel[0] = uint8_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint8_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint8_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative 16-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 16-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u16 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<16) - 1); + voxel[0] = uint16_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint16_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint16_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative float voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel float output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3f &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + voxel[0] = float( ijk[0] - math::Floor(ijk[0] + 0.5) ); + voxel[1] = float( ijk[1] - math::Floor(ijk[1] + 0.5) ); + voxel[2] = float( ijk[2] - math::Floor(ijk[2] + 0.5) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u8 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<8) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u16 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<16) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3f &voxel, const Coord &ijk, const Map &map) +{ + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } else { + return map.applyMapF(Vec3f(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace tools::cuda { + +template +class PointsToGrid +{ +public: + + struct Data { + Map map; + void *d_bufferPtr; + uint64_t *d_keys, *d_tile_keys, *d_lower_keys, *d_leaf_keys;// device pointer to 64 bit keys + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size;// byte offsets to nodes in buffer + uint32_t *d_indx;// device pointer to point indices (or IDs) + uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper + uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel; + BitFlags<16> flags; + __hostdev__ NanoGrid& getGrid() const {return *util::PtrAdd>(d_bufferPtr, grid);} + __hostdev__ NanoTree& getTree() const {return *util::PtrAdd>(d_bufferPtr, tree);} + __hostdev__ NanoRoot& getRoot() const {return *util::PtrAdd>(d_bufferPtr, root);} + __hostdev__ NanoUpper& getUpper(int i) const {return *(util::PtrAdd>(d_bufferPtr, upper)+i);} + __hostdev__ NanoLower& getLower(int i) const {return *(util::PtrAdd>(d_bufferPtr, lower)+i);} + __hostdev__ NanoLeaf& getLeaf(int i) const {return *(util::PtrAdd>(d_bufferPtr, leaf)+i);} + __hostdev__ GridBlindMetaData& getMeta() const { return *util::PtrAdd(d_bufferPtr, meta);}; + template + __hostdev__ Vec3T& getPoint(int i) const {return *(util::PtrAdd(d_bufferPtr, blind)+i);} + };// Data + + /// @brief Map constructor, which other constructors might call + /// @param map Map to be used for the output device grid + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const Map &map, cudaStream_t stream = 0) + : mStream(stream) + , mPointType(util::is_same::value ? PointType::Default : PointType::Disable) + { + mData.map = map; + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); + mDeviceData = mMemPool.template alloc(mStream); + } + + /// @brief Default constructor that calls the Map constructor defined above + /// @param scale Voxel size in world units + /// @param trans Translation of origin in world units + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0), cudaStream_t stream = 0) + : PointsToGrid(Map(scale, trans), stream){} + + /// @brief Constructor from a target maximum number of particles per voxel. Calls the Map constructor defined above + /// @param maxPointsPerVoxel Maximum number of points oer voxel + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(int maxPointsPerVoxel, int tolerance = 1, int maxIterations = 10, cudaStream_t stream = 0) + : PointsToGrid(Map(1.0), stream) + { + mMaxPointsPerVoxel = maxPointsPerVoxel; + mTolerance = tolerance; + mMaxIterations = maxIterations; + } + + /// @brief Toggle on and off verbose mode + /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking + void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } + + /// @brief Set the mode for checksum computation, which is disabled by default + /// @param mode Mode of checksum computation + void setChecksum(CheckMode mode = CheckMode::Disable){mChecksum = mode;} + + /// @brief Toggle on and off the computation of a bounding-box + /// @param on If true bbox will be computed + void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } + + /// @brief Set the name of the output grid + /// @param name name of the output grid + void setGridName(const std::string &name) {mGridName = name;} + + // only available when BuildT == Point + template typename util::enable_if::value>::type + setPointType(PointType type) { mPointType = type; } + + /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space + /// @tparam BuildT Build type of the output grid, i.e NanoGrid + /// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world or index space. + /// @tparam BufferT Buffer type used for allocation of the grid handle + /// @param points device point to an array of points in world space + /// @param pointCount number of input points or voxels + /// @param buffer optional buffer (currently ignored) + /// @return returns a handle with a grid of type NanoGrid + template + GridHandle getHandle(const PtrT points, + size_t pointCount, + const BufferT &buffer = BufferT()); + + template + void countNodes(const PtrT points, size_t pointCount); + + template + void processGridTreeRoot(const PtrT points, size_t pointCount); + + void processUpperNodes(); + + void processLowerNodes(); + + template + void processLeafNodes(const PtrT points); + + template + void processPoints(const PtrT points, size_t pointCount); + + void processBBox(); + + // the following methods are only defined when BuildT == Point + template typename util::enable_if::value, uint32_t>::type + maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} + template typename util::enable_if::value, uint32_t>::type + maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} + +private: + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} + + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + PointType mPointType; + std::string mGridName; + int mVerbose{0}; + Data mData, *mDeviceData; + uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + int mTolerance{1}, mMaxIterations{1}; + CheckMode mChecksum{CheckMode::Disable}; + + // wrapper of AllocT, defaulting to cub::CachingDeviceAllocator, which offers a shared scratch space + struct Allocator { + AllocT mAllocator; + void* d_scratch; + size_t scratchSize, actualScratchSize; + Allocator() : d_scratch(nullptr), scratchSize(0), actualScratchSize(0) {} + ~Allocator() { + if (scratchSize > 0) this->free(d_scratch);// a bug in cub makes this necessary + mAllocator.FreeAllCached(); + } + template + T* alloc(size_t count, cudaStream_t stream) { + T* d_ptr = nullptr; + cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); + return d_ptr; + } + template + T* alloc(cudaStream_t stream) {return this->template alloc(1, stream);} + void free(void *d_ptr) {if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr));} + template + void free(void *d_ptr, T... other) { + if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); + this->free(other...); + } + void adjustScratch(cudaStream_t stream){ + if (scratchSize > actualScratchSize) { + if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); + cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); + actualScratchSize = scratchSize; + } + } + } mMemPool; + + template + BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); +};// tools::cuda::PointsToGrid + +namespace kernels { +/// @details Used by cuda::PointsToGrid::processLeafNodes before the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +/// function in a lambda through lambdaKernel wrapper defined in CudaUtils.h. +template +__global__ void fillValueIndexKernel(const size_t numItems, uint64_t* devValueIndex, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); +} + +/// @details Used by PointsToGrid::processLeafNodes for the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void leafPrefixSumKernel(const size_t numItems, uint64_t* devValueIndexPrefix, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + + auto &leaf = d_data->getLeaf(tid); + leaf.mOffset = 1u;// will be re-set below + const uint64_t *w = leaf.mValueMask.words(); + uint64_t &prefixSum = leaf.mPrefixSum, sum = util::countOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += util::countOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } + if (tid==0) { + d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count + d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; + } else { + leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 + } +} + +/// @details Used by PointsToGrid::processLeafNodes to make sure leaf.mMask - leaf.mValueMask. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void setMaskEqValMaskKernel(const size_t numItems, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + auto &leaf = d_data->getLeaf(tid); + leaf.mMask = leaf.mValueMask; +} +} // namespace kernels + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Define utility macro used to call cub functions that use dynamic temporary storage +#ifndef CALL_CUBS +#ifdef _WIN32 +#define CALL_CUBS(func, ...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__, mStream)); +#else// fdef _WIN32 +#define CALL_CUBS(func, args...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args, mStream)); +#endif// ifdef _WIN32 +#endif// ifndef CALL_CUBS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline GridHandle +PointsToGrid::getHandle(const PtrT points, + size_t pointCount, + const BufferT &pool) +{ + if (mVerbose==1) mTimer.start("\nCounting nodes"); + this->countNodes(points, pointCount); + + if (mVerbose==1) mTimer.restart("Initiate buffer"); + auto buffer = this->getBuffer(points, pointCount, pool); + + if (mVerbose==1) mTimer.restart("Process grid,tree,root"); + this->processGridTreeRoot(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process upper nodes"); + this->processUpperNodes(); + + if (mVerbose==1) mTimer.restart("Process lower nodes"); + this->processLowerNodes(); + + if (mVerbose==1) mTimer.restart("Process leaf nodes"); + this->processLeafNodes(points); + + if (mVerbose==1) mTimer.restart("Process points"); + this->processPoints(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process bbox"); + this->processBBox(); + if (mVerbose==1) mTimer.stop(); + + if (mVerbose==1) mTimer.restart("Computation of checksum"); + tools::cuda::updateChecksum((GridData*)buffer.deviceData(), mChecksum); + if (mVerbose==1) mTimer.stop(); + + return GridHandle(std::move(buffer)); +}// PointsToGrid::getHandle + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// --- CUB helpers --- +template +struct ShiftRight +{ + __hostdev__ inline OutT operator()(const InT& v) const {return static_cast(v >> BitCount);} +}; + +template +struct ShiftRightIterator : public cub::TransformInputIterator, InT*> +{ + using BASE = cub::TransformInputIterator, InT*>; + __hostdev__ inline ShiftRightIterator(uint64_t* input_itr) : BASE(input_itr, ShiftRight()) {} +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +void PointsToGrid::countNodes(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + if constexpr(util::is_same::value) { + static_assert(util::is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + } else { + static_assert(util::is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); + } + + mMaxPointsPerVoxel = math::Min(mMaxPointsPerVoxel, pointCount); + int iterCounter = 0; + struct Foo {// pairs current voxel size, dx, with the corresponding particle density, i.e. maximum number of points per voxel + double dx; + uint32_t density; + bool operator<(const Foo &rhs) const {return density < rhs.density || (density == rhs.density && dx < rhs.dx);} + } min{0.0, 1}, max{0.0, 0};// min: as dx -> 0 density -> 1 point per voxel, max: density is 0 i.e. undefined + +jump:// this marks the beginning of the actual algorithm + + mData.d_keys = mMemPool.template alloc(pointCount, mStream); + mData.d_indx = mMemPool.template alloc(pointCount, mStream);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy mData from CPU -> GPU + + if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); + auto *d_keys = mMemPool.template alloc(pointCount, mStream); + auto *d_indx = mMemPool.template alloc(pointCount, mStream); + + if (mVerbose==2) mTimer.restart("Generate tile keys"); + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { + auto coordToKey = [](const Coord &ijk)->uint64_t{ + // Note: int32_t has a range of -2^31 to 2^31 - 1 whereas uint32_t has a range of 0 to 2^32 - 1 + static constexpr int64_t offset = 1 << 31; + return (uint64_t(uint32_t(int64_t(ijk[2]) + offset) >> 12) ) | // z is the lower 21 bits + (uint64_t(uint32_t(int64_t(ijk[1]) + offset) >> 12) << 21) | // y is the middle 21 bits + (uint64_t(uint32_t(int64_t(ijk[0]) + offset) >> 12) << 42); // x is the upper 21 bits + };// coordToKey lambda functor + d_indx[tid] = uint32_t(tid); + uint64_t &key = d_keys[tid]; + if constexpr(util::is_same::value) {// points are in world space + if constexpr(util::is_same::value) { + key = coordToKey(d_data->map.applyInverseMapF(points[tid]).round()); + } else {// points are Vec3d + key = coordToKey(d_data->map.applyInverseMap(points[tid]).round()); + } + } else if constexpr(util::is_same::value) {// points Coord are in index space + key = coordToKey(points[tid]); + } else {// points are Vec3f or Vec3d in index space + key = coordToKey(points[tid].round()); + } + }, mDeviceData, points); + cudaCheckError(); + if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord + std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx + + if (mVerbose==2) mTimer.restart("Allocate runs"); + auto *d_points_per_tile = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_node_count = mMemPool.template alloc(3, mStream); + + if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); + uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; + cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_points_per_tile); + + for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { + const uint32_t count = points_per_tile[id]; + util::cuda::lambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ + return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits + uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits + uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits + uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits + };// voxelKey lambda functor + tid += offset; + Vec3T p = points[d_indx[tid]]; + if constexpr(util::is_same::value) p = util::is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); + d_keys[tid] = voxelKey(id, p.round()); + }, mDeviceData); cudaCheckError(); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 + offset += count; + } + mMemPool.free(d_indx); + delete [] points_per_tile; + + if (mVerbose==2) mTimer.restart("Count points per voxel"); + + mData.pointsPerVoxel = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_voxel_count = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); + cudaCheck(cudaMemcpyAsync(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_voxel_count); + + if (util::is_same::value) { + if (mVerbose==2) mTimer.restart("Count max points per voxel"); + uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(mStream), maxPointsPerVoxel; + CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); + cudaCheck(cudaMemcpyAsync(&maxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_maxPointsPerVoxel); + double dx = mData.map.getVoxelSize()[0]; + if (++iterCounter >= mMaxIterations || pointCount == 1u || math::Abs((int)maxPointsPerVoxel - (int)mMaxPointsPerVoxel) <= mTolerance) { + mMaxPointsPerVoxel = maxPointsPerVoxel; + } else { + const Foo tmp{dx, maxPointsPerVoxel}; + if (maxPointsPerVoxel < mMaxPointsPerVoxel) { + if (min < tmp) min = tmp; + } else if (max.density == 0 || tmp < max) { + max = tmp; + } + if (max.density) { + dx = (min.dx*(max.density - mMaxPointsPerVoxel) + max.dx*(mMaxPointsPerVoxel-min.density))/double(max.density-min.density); + } else if (maxPointsPerVoxel > 1u) { + dx *= (mMaxPointsPerVoxel-1.0)/(maxPointsPerVoxel-1.0); + } else {// maxPointsPerVoxel = 1 so increase dx significantly + dx *= 10.0; + } + if (mVerbose==2) printf("\ntarget density = %u, current density = %u current dx = %f, next dx = %f\n", mMaxPointsPerVoxel, maxPointsPerVoxel, tmp.dx, dx); + mData.map = Map(dx); + mMemPool.free(mData.d_keys, mData.d_indx, d_keys, mData.d_tile_keys, d_node_count, mData.pointsPerVoxel); + goto jump; + } + } + if (iterCounter>1 && mVerbose) std::cerr << "Used " << iterCounter << " attempts to determine dx that produces a target dpoint denisty\n\n"; + + if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); + mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount, mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); + + mData.pointsPerLeaf = mMemPool.template alloc(pointCount, mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + + if constexpr(util::is_same::value) { + uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); + cudaCheck(cudaMemcpyAsync(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); + if (mMaxPointsPerLeaf > std::numeric_limits::max()) { + throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); + } + mMemPool.free(d_maxPointsPerLeaf); + } + + mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); + + mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes + cudaCheck(cudaMemcpyAsync(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + mMemPool.free(d_keys, d_node_count); + if (mVerbose==2) mTimer.stop(); + + //printf("Leaf count = %u, lower count = %u, upper count = %u\n", mData.nodeCount[0], mData.nodeCount[1], mData.nodeCount[2]); +}// PointsToGrid::countNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline BufferT PointsToGrid::getBuffer(const PtrT, size_t pointCount, const BufferT &pool) +{ + auto sizeofPoint = [&]()->size_t{ + switch (mPointType){ + case PointType::PointID: return sizeof(uint32_t); + case PointType::World64: return sizeof(Vec3d); + case PointType::World32: return sizeof(Vec3f); + case PointType::Grid64: return sizeof(Vec3d); + case PointType::Grid32: return sizeof(Vec3f); + case PointType::Voxel32: return sizeof(Vec3f); + case PointType::Voxel16: return sizeof(Vec3u16); + case PointType::Voxel8: return sizeof(Vec3u8); + case PointType::Default: return pointer_traits::element_size; + default: return size_t(0);// PointType::Disable + } + }; + + mData.grid = 0;// grid is always stored at the start of the buffer! + mData.tree = NanoGrid::memUsage(); // grid ends and tree begins + mData.root = mData.tree + NanoTree::memUsage(); // tree ends and root node begins + mData.upper = mData.root + NanoRoot::memUsage(mData.nodeCount[2]); // root node ends and upper internal nodes begin + mData.lower = mData.upper + NanoUpper::memUsage()*mData.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mData.leaf = mData.lower + NanoLower::memUsage()*mData.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mData.meta = mData.leaf + NanoLeaf::DataType::memUsage()*mData.nodeCount[0];// leaf nodes end and blind meta data begins + mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins + mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer + + auto buffer = BufferT::create(mData.size, &pool, false);// only allocate buffer on the device + mData.d_bufferPtr = buffer.deviceData(); + if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy Data CPU -> GPU + return buffer; +}// PointsToGrid::getBuffer + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processGridTreeRoot(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { + // process Root + auto &root = d_data->getRoot(); + root.mBBox = CoordBBox(); // init to empty + root.mTableSize = d_data->nodeCount[2]; + root.mBackground = NanoRoot::ValueType(0);// background_value + root.mMinimum = root.mMaximum = NanoRoot::ValueType(0); + root.mAverage = root.mStdDevi = NanoRoot::FloatType(0); + + // process Tree + auto &tree = d_data->getTree(); + tree.setRoot(&root); + tree.setFirstNode(&d_data->getUpper(0)); + tree.setFirstNode(&d_data->getLower(0)); + tree.setFirstNode(&d_data->getLeaf(0)); + tree.mNodeCount[2] = tree.mTileCount[2] = d_data->nodeCount[2]; + tree.mNodeCount[1] = tree.mTileCount[1] = d_data->nodeCount[1]; + tree.mNodeCount[0] = tree.mTileCount[0] = d_data->nodeCount[0]; + tree.mVoxelCount = d_data->voxelCount; + + // process Grid + auto &grid = d_data->getGrid(); + grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, toGridType()); + grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled + grid.mBlindMetadataCount = util::is_same::value;// ? 1u : 0u; + grid.mBlindMetadataOffset = d_data->meta; + if (pointType != PointType::Disable) { + const auto lastLeaf = tree.mNodeCount[0] - 1; + grid.mData1 = d_data->pointsPerLeafPrefix[lastLeaf] + d_data->pointsPerLeaf[lastLeaf]; + auto &meta = d_data->getMeta(); + meta.mDataOffset = sizeof(GridBlindMetaData);// blind data is placed right after this meta data + meta.mValueCount = pointCount; + // Blind meta data + switch (pointType){ + case PointType::PointID: + grid.mGridClass = GridClass::PointIndex; + meta.mSemantic = GridBlindDataSemantic::PointId; + meta.mDataClass = GridBlindDataClass::IndexArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(uint32_t); + util::strcpy(meta.mName, "PointID: uint32_t indices to points"); + break; + case PointType::World64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + break; + case PointType::World32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + break; + case PointType::Grid64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "Grid64: Vec3 point coordinates in grid space"); + break; + case PointType::Grid32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Grid32: Vec3 point coordinates in grid space"); + break; + case PointType::Voxel32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Voxel32: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel16: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u16); + util::strcpy(meta.mName, "Voxel16: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel8: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u8); + util::strcpy(meta.mName, "Voxel8: Vec3 point coordinates in voxel space"); + break; + case PointType::Default: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3T); + if constexpr(util::is_same::value) { + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + } else if constexpr(util::is_same::value){ + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + } else { + printf("Error in PointsToGrid::processGridTreeRoot: expected Vec3T = Vec3f or Vec3d\n"); + } + break; + default: + printf("Error in PointsToGrid::processGridTreeRoot: invalid pointType\n"); + } + } else if constexpr(BuildTraits::is_offindex) { + grid.mData1 = 1u + 512u*d_data->nodeCount[0]; + grid.mGridClass = GridClass::IndexGrid; + } + }, mDeviceData, mPointType);// lambdaKernel + cudaCheckError(); + + char *dst = mData.getGrid().mGridName; + if (const char *src = mGridName.data()) { + cudaCheck(cudaMemcpyAsync(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice, mStream)); + } else { + cudaCheck(cudaMemsetAsync(dst, 0, GridData::MaxNameSize, mStream)); + } +}// PointsToGrid::processGridTreeRoot + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processUpperNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + auto &upper = d_data->getUpper(tid); +#if 1 + auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ + static constexpr int64_t offset = 1 << 31;// max values of uint32_t is 2^31 - 1 + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return nanovdb::Coord(int(int64_t(((key >> 42) & MASK) << 12) - offset), // x are the upper 21 bits + int(int64_t(((key >> 21) & MASK) << 12) - offset), // y are the middle 21 bits + int(int64_t(( key & MASK) << 12) - offset)); // z are the lower 21 bits + }; + const Coord ijk = keyToCoord(d_data->d_tile_keys[tid]); +#else + const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); +#endif + root.tile(tid)->setChild(ijk, &upper, &root); + upper.mBBox[0] = ijk; + upper.mFlags = 0; + upper.mValueMask.setOff(); + upper.mChildMask.setOff(); + upper.mMinimum = upper.mMaximum = NanoLower::ValueType(0); + upper.mAverage = upper.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + mMemPool.free(mData.d_tile_keys); + + const uint64_t valueCount = mData.nodeCount[2] << 15; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &upper = d_data->getUpper(tid >> 15); + upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processUpperNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processLowerNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + const uint32_t upperOffset = lowerKey & 32767u;// (1 << 15) - 1 = 32767 + upper.mChildMask.setOnAtomic(upperOffset); + auto &lower = d_data->getLower(tid); + upper.setChild(upperOffset, &lower); + lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset); + lower.mFlags = 0; + lower.mValueMask.setOff(); + lower.mChildMask.setOff(); + lower.mMinimum = lower.mMaximum = NanoLower::ValueType(0);// background; + lower.mAverage = lower.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + const uint64_t valueCount = mData.nodeCount[1] << 12; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &lower = d_data->getLower(tid >> 12); + lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processLowerNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processLeafNodes(const PtrT points) +{ + const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats + + if (mVerbose==2) mTimer.start("process leaf meta data"); + // loop over leaf nodes and add it to its parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; + auto &upper = d_data->getUpper(tile_id); + const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; + auto &lower = *upper.getChild(upperOffset); + lower.mChildMask.setOnAtomic(lowerOffset); + auto &leaf = d_data->getLeaf(tid); + lower.setChild(lowerOffset, &leaf); + leaf.mBBoxMin = lower.offsetToGlobalCoord(lowerOffset); + leaf.mFlags = flags; + auto &valueMask = leaf.mValueMask; + valueMask.setOff();// initiate all bits to off + + if constexpr(util::is_same::value) { + leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; + leaf.mPointCount = d_data->pointsPerLeaf[tid]; + } else if constexpr(BuildTraits::is_offindex) { + leaf.mOffset = tid*512u + 1u;// background is index 0 + leaf.mPrefixSum = 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mAverage = leaf.mStdDevi = NanoLeaf::FloatType(0); + leaf.mMinimum = leaf.mMaximum = NanoLeaf::ValueType(0); + } + }, mDeviceData); cudaCheckError(); + + if (mVerbose==2) mTimer.restart("set active voxel state and values"); + // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues + util::cuda::lambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { + const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; + const uint64_t voxelKey = d_data->d_keys[pointID]; + auto &upper = d_data->getUpper(voxelKey >> 36); + auto &lower = *upper.getChild((voxelKey >> 21) & 32767u); + auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); + const uint32_t n = voxelKey & 511u; + leaf.mValueMask.setOnAtomic(n);// <--- slow! + if constexpr(util::is_same::value) { + leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) + } + }, mDeviceData); cudaCheckError(); + + mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); + + if (mVerbose==2) mTimer.restart("set inactive voxel values"); + const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; + util::cuda::lambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid >> 9u); + const uint32_t n = tid & 511u; + if (leaf.mValueMask.isOn(n)) return; + if constexpr(util::is_same::value) { + const uint32_t m = leaf.mValueMask.findPrev(n - 1); + leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(0);// value of inactive voxels + } + }, mDeviceData); cudaCheckError(); + + if constexpr(BuildTraits::is_onindex) { + if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); + uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); + auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + kernels::fillValueIndexKernel<<>>(mData.nodeCount[0], devValueIndex, mDeviceData); + cudaCheckError(); + CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); + mMemPool.free(devValueIndex); + kernels::leafPrefixSumKernel<<>>(mData.nodeCount[0], devValueIndexPrefix, mDeviceData); + cudaCheckError(); + mMemPool.free(devValueIndexPrefix); + } + + if constexpr(BuildTraits::is_indexmask) { + if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); + kernels::setMaskEqValMaskKernel<<>>(mData.nodeCount[0], mDeviceData); + cudaCheckError(); + } + if (mVerbose==2) mTimer.stop(); +}// PointsToGrid::processLeafNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processPoints(const PtrT, size_t) +{ + mMemPool.free(mData.d_indx); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Template specialization with BuildT = Point +template <> +template +inline void PointsToGrid::processPoints(const PtrT points, size_t pointCount) +{ + switch (mPointType){ + case PointType::Disable: + throw std::runtime_error("PointsToGrid::processPoints: mPointType == PointType::Disable\n"); + case PointType::PointID: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->d_indx[tid]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMap(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMapF(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel16: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel8: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Default: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint::element_type>(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + default: + printf("Internal error in PointsToGrid::processPoints\n"); + } + mMemPool.free(mData.d_indx); +}// PointsToGrid::processPoints + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processBBox() +{ + if (mData.flags.isMaskOff(GridFlags::HasBBox)) { + mMemPool.free(mData.d_leaf_keys, mData.d_lower_keys); + return; + } + + // reset bbox in lower nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + d_data->getLower(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // update and propagate bbox from leaf -> lower/parent nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid]; + auto &upper = d_data->getUpper(leafKey >> 27); + auto &lower = *upper.getChild((leafKey >> 12) & 32767u); + auto &leaf = d_data->getLeaf(tid); + leaf.updateBBox(); + lower.mBBox.expandAtomic(leaf.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_leaf_keys); + cudaCheckError(); + + // reset bbox in upper nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getUpper(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // propagate bbox from lower -> upper/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + auto &lower = d_data->getLower(tid); + upper.mBBox.expandAtomic(lower.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_lower_keys); + cudaCheckError() + + // propagate bbox from upper -> root/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); + }, mDeviceData); + cudaCheckError(); + + // update the world-bbox in the root node + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data) { + d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processBBox + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid +voxelsToGrid(const PtrT d_ijk, size_t voxelCount, double voxelSize, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(voxelSize, Vec3d(0.0), stream); + return converter.getHandle(d_ijk, voxelCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid with PointType coordinates as blind data +pointsToGrid(const PtrT d_xyz, int pointCount, int maxPointsPerVoxel, int tolerance, int maxIterations, PointType type, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(maxPointsPerVoxel, tolerance, maxIterations, Vec3d(0.0), stream); + converter.setPointType(type); + return converter.getHandle(d_xyz, pointCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(pointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(voxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +}// namespace tools::cuda ====================================================================================================================================== + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize = 1.0, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(dWorldPoints, pointCount, voxelSize, type, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(pointSet, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(dGridVoxels, voxelCount, voxelSize, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(pointSet, buffer, stream); +} + +}// namespace nanovdb + +#endif // NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/tools/cuda/SignedFloodFill.cuh b/external/nanovdb/tools/cuda/SignedFloodFill.cuh new file mode 100644 index 00000000..f214247a --- /dev/null +++ b/external/nanovdb/tools/cuda/SignedFloodFill.cuh @@ -0,0 +1,213 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/tools/cuda/SignedFloodFill.cuh + + \author Ken Museth + + \date May 3, 2023 + + \brief Performs signed flood-fill operation on the hierarchical tree structure on the device + + \todo This tools needs to handle the (extremely) rare case when root node + needs to be modified during the signed flood fill operation. This happens + when the root-table needs to be expanded with tile values (of size 4096^3) + that are completely inside the implicit surface. + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include +#include + +namespace nanovdb { + +namespace tools::cuda { + +/// @brief Performs signed flood-fill operation on the hierarchical tree structure on the device +/// @tparam BuildT Build type of the grid to be flood-filled +/// @param d_grid Non-const device pointer to the grid that will be flood-filled +/// @param verbose If true timing information will be printed to the terminal +/// @param stream optional cuda stream +template +typename util::enable_if::is_float, void>::type +signedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0); + +namespace {// anonymous namespace + +template +class SignedFloodFill +{ +public: + SignedFloodFill(bool verbose = false, cudaStream_t stream = 0) + : mStream(stream), mVerbose(verbose) {} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on;} + + void operator()(NanoGrid *d_grid); + +private: + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + bool mVerbose{false}; + +};// SignedFloodFill + +//================================================================================================ + +template +__global__ void processRootKernel(NanoTree *tree) +{ + // auto &root = tree->root(); + /* + using ChildT = typename RootT::ChildNodeType; + // Insert the child nodes into a map sorted according to their origin + std::map nodeKeys; + typename RootT::ChildOnIter it = root.beginChildOn(); + for (; it; ++it) nodeKeys.insert(std::pair(it.getCoord(), &(*it))); + static const Index DIM = RootT::ChildNodeType::DIM; + + // We employ a simple z-scanline algorithm that inserts inactive tiles with + // the inside value if they are sandwiched between inside child nodes only! + typename std::map::const_iterator b = nodeKeys.begin(), e = nodeKeys.end(); + if ( b == e ) return; + for (typename std::map::const_iterator a = b++; b != e; ++a, ++b) { + Coord d = b->first - a->first; // delta of neighboring coordinates + if (d[0]!=0 || d[1]!=0 || d[2]==Int32(DIM)) continue;// not same z-scanline or neighbors + const ValueT fill[] = { a->second->getLastValue(), b->second->getFirstValue() }; + if (!(fill[0] < 0) || !(fill[1] < 0)) continue; // scanline isn't inside + Coord c = a->first + Coord(0u, 0u, DIM); + for (; c[2] != b->first[2]; c[2] += DIM) root.addTile(c, mInside, false); + } + */ + //root.setBackground(mOutside, /*updateChildNodes=*/false); +}// processRootKernel + +//================================================================================================ + +template +__global__ void processNodeKernel(NanoTree *tree, size_t count) +{ + using NodeT = typename NanoNode::type; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nValue = tid & (NodeT::SIZE - 1u); + auto &node = *(tree->template getFirstNode() + (tid >> (3*NodeT::LOG2DIM))); + const auto &mask = node.childMask(); + if (mask.isOn(nValue)) return;// ignore if child + auto value = tree->background();// initiate to outside value + auto n = mask.template findNext(nValue); + if (n < NodeT::SIZE) { + if (node.getChild(n)->getFirstValue() < 0) value = -value; + } else if ((n = mask.template findPrev(nValue)) < NodeT::SIZE) { + if (node.getChild(n)->getLastValue() < 0) value = -value; + } else if (node.getValue(0)<0) { + value = -value; + } + node.setValue(nValue, value); +}// processNodeKernel + +//================================================================================================ + +template +__global__ void processLeafKernel(NanoTree *tree, size_t count) +{ + using LeafT = NanoLeaf; + const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nVoxel = tid & (LeafT::SIZE - 1u); + auto *leaf = tree->getFirstLeaf() + (tid >> (3*LeafT::LOG2DIM)); + const auto &mask = leaf->valueMask(); + if (mask.isOn(nVoxel)) return; + auto *buffer = leaf->mValues; + auto n = mask.template findNext(nVoxel); + if (n == LeafT::SIZE && (n = mask.template findPrev(nVoxel)) == LeafT::SIZE) n = 0u; + buffer[nVoxel] = buffer[n]<0 ? -tree->background() : tree->background(); +}// processLeafKernel + +//================================================================================================ + +template +__global__ void cpyNodeCountKernel(NanoGrid *d_grid, uint64_t *d_count) +{ + NANOVDB_ASSERT(d_grid->isSequential()); + for (int i=0; i<3; ++i) *d_count++ = d_grid->tree().nodeCount(i); + *d_count = d_grid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +void SignedFloodFill::operator()(NanoGrid *d_grid) +{ + static_assert(BuildTraits::is_float, "cuda::SignedFloodFill only works on float grids"); + NANOVDB_ASSERT(d_grid); + uint64_t count[4], *d_count = nullptr; + cudaCheck(util::cuda::mallocAsync((void**)&d_count, 4*sizeof(uint64_t), mStream)); + cpyNodeCountKernel<<<1, 1, 0, mStream>>>(d_grid, d_count); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, mStream)); + cudaCheck(util::cuda::freeAsync(d_count, mStream)); + + static const int threadsPerBlock = 128; + auto blocksPerGrid = [&](size_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + auto *tree = reinterpret_cast*>(d_grid + 1); + + if (mVerbose) mTimer.start("\nProcess leaf nodes"); + processLeafKernel<<>>(tree, count[0]<<9); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + processNodeKernel<<>>(tree, count[1]<<12); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + processNodeKernel<<>>(tree, count[2]<<15); + cudaCheckError(); + + //if (mVerbose) mTimer.restart("Process root node"); + //processRootKernel<<<1, 1, 0, mStream>>>(tree); + if (mVerbose) mTimer.stop(); + cudaCheckError(); +}// SignedFloodFill::operator() + +//================================================================================================ + +template +typename util::enable_if::is_float, void>::type +signedFloodFill(NanoGrid *d_grid, bool verbose, cudaStream_t stream) +{ + SignedFloodFill sff(verbose, stream); + sff(d_grid); + auto *d_gridData = d_grid->data(); + Checksum cs = getChecksum(d_gridData, stream); + if (cs.isFull()) {// CheckMode::Partial checksum is unaffected + updateChecksum(d_gridData, CheckMode::Full, stream); + } +} + +}// namespace tools::cuda + +template +[[deprecated("Use nanovdb::tools::cuda::signedFloodFill instead.")]] +typename util::enable_if::is_float, void>::type +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0) +{ + return tools::cuda::signedFloodFill(d_grid, verbose, stream); +} + +}// namespace nanovdb + +#endif // NANOVDB_TOOLS_CUDA_SIGNEDFLOODFILL_CUH_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/CpuTimer.h b/external/nanovdb/util/CpuTimer.h new file mode 100644 index 00000000..4c22f01d --- /dev/null +++ b/external/nanovdb/util/CpuTimer.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/Timer.h instead.") diff --git a/external/nanovdb/util/CreateNanoGrid.h b/external/nanovdb/util/CreateNanoGrid.h new file mode 100644 index 00000000..60fa3fd5 --- /dev/null +++ b/external/nanovdb/util/CreateNanoGrid.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/CreateNanoGrid.h instead.") diff --git a/external/nanovdb/util/DitherLUT.h b/external/nanovdb/util/DitherLUT.h new file mode 100644 index 00000000..4d6ff166 --- /dev/null +++ b/external/nanovdb/util/DitherLUT.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/DitherLUT.h instead.") diff --git a/external/nanovdb/util/ForEach.h b/external/nanovdb/util/ForEach.h new file mode 100644 index 00000000..d71769c5 --- /dev/null +++ b/external/nanovdb/util/ForEach.h @@ -0,0 +1,116 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/ForEach.h + + \author Ken Museth + + \date August 24, 2020 + + \brief A unified wrapper for tbb::parallel_for and a naive std::thread fallback +*/ + +#ifndef NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED + +#include // for Range1D + +#ifdef NANOVDB_USE_TBB +#include +#else +#include +#include +#include +#endif + +namespace nanovdb { + +namespace util { + +/// @brief simple wrapper for tbb::parallel_for with a naive std fallback +/// +/// @param range Range, CoordBBox, tbb::blocked_range, blocked_range2D, or blocked_range3D. +/// @param func functor with the signature [](const RangeT&){...}, +/// +/// @code +/// std::vector array(100); +/// auto func = [&array](auto &r){for (auto i=r.begin(); i!=r.end(); ++i) array[i]=i;}; +/// forEach(array, func); +/// @endcode +template +inline void forEach(RangeT range, const FuncT &func) +{ + if (range.empty()) return; +#ifdef NANOVDB_USE_TBB + tbb::parallel_for(range, func); +#else// naive and likely slow alternative based on std::thread + if (const size_t threadCount = std::thread::hardware_concurrency()>>1) { + std::vector rangePool{ range }; + while(rangePool.size() < threadCount) { + const size_t oldSize = rangePool.size(); + for (size_t i = 0; i < oldSize && rangePool.size() < threadCount; ++i) { + auto &r = rangePool[i]; + if (r.is_divisible()) rangePool.push_back(RangeT(r, Split())); + } + if (rangePool.size() == oldSize) break;// none of the ranges were divided so stop + } + std::vector threadPool; + for (auto &r : rangePool) threadPool.emplace_back(func, r);// launch threads + for (auto &t : threadPool) t.join();// synchronize threads + } else {//serial + func(range); + } +#endif +} + +/// @brief Simple wrapper for the function defined above +template +inline void forEach(size_t begin, size_t end, size_t grainSize, const FuncT& func) +{ + forEach(Range1D(begin, end, grainSize), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +inline void forEach(const ContainerT &c, const FuncT& func) +{ + forEach(Range1D(0, c.size(), 1), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +inline void forEach(const ContainerT &c, size_t grainSize, const FuncT& func) +{ + forEach(Range1D(0, c.size(), grainSize), func); +} + +}// namespace util + +/// @brief Simple wrapper for the function defined above +template +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(size_t begin, size_t end, size_t grainSize, const FuncT& func) +{ + util::forEach(util::Range1D(begin, end, grainSize), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(const ContainerT &c, const FuncT& func) +{ + util::forEach(util::Range1D(0, c.size(), 1), func); +} + +/// @brief Simple wrapper for the function defined above, which works with std::containers +template class ContainerT, typename... T, typename FuncT> +[[deprecated("Use nanovdb::util::forEach instead")]] +inline void forEach(const ContainerT &c, size_t grainSize, const FuncT& func) +{ + util::forEach(util::Range1D(0, c.size(), grainSize), func); +} + +}// namespace nanovdb + +#endif // NANOVDB_UTIL_FOREACH_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/GridBuilder.h b/external/nanovdb/util/GridBuilder.h new file mode 100644 index 00000000..681da5ff --- /dev/null +++ b/external/nanovdb/util/GridBuilder.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridBuilder.h instead.") diff --git a/external/nanovdb/util/GridChecksum.h b/external/nanovdb/util/GridChecksum.h new file mode 100644 index 00000000..1c70c7b3 --- /dev/null +++ b/external/nanovdb/util/GridChecksum.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridChecksum.h instead.") diff --git a/external/nanovdb/util/GridStats.h b/external/nanovdb/util/GridStats.h new file mode 100644 index 00000000..61de3b0d --- /dev/null +++ b/external/nanovdb/util/GridStats.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridStats.h instead.") diff --git a/external/nanovdb/util/GridValidator.h b/external/nanovdb/util/GridValidator.h new file mode 100644 index 00000000..8dc1465c --- /dev/null +++ b/external/nanovdb/util/GridValidator.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/GridValidator.h instead.") diff --git a/external/nanovdb/util/HDDA.h b/external/nanovdb/util/HDDA.h new file mode 100644 index 00000000..9944833b --- /dev/null +++ b/external/nanovdb/util/HDDA.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/HDDA.h instead.") diff --git a/external/nanovdb/util/HostBuffer.h b/external/nanovdb/util/HostBuffer.h new file mode 100644 index 00000000..a893d494 --- /dev/null +++ b/external/nanovdb/util/HostBuffer.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/HostBuffer.h instead.") diff --git a/external/nanovdb/util/IO.h b/external/nanovdb/util/IO.h new file mode 100644 index 00000000..385d4251 --- /dev/null +++ b/external/nanovdb/util/IO.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/io/IO.h instead.") diff --git a/external/nanovdb/util/Invoke.h b/external/nanovdb/util/Invoke.h new file mode 100644 index 00000000..677e033c --- /dev/null +++ b/external/nanovdb/util/Invoke.h @@ -0,0 +1,97 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/Invoke.h + + \author Ken Museth + + \date August 24, 2020 + + \brief A unified wrapper for tbb::parallel_invoke and a naive std::thread analog + + @code + template + int invoke(const Func0& f0, const Func1& f1, ..., const FuncN& fN); + @endcode +*/ + +#ifndef NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED + +#include // for nanovdb::CoordBBox + +#ifdef NANOVDB_USE_TBB +#include +#endif + +#include +#include +#include + +namespace nanovdb { + +namespace util { + +namespace { +#ifndef NANOVDB_USE_TBB +// Base case +template +void parallel_invoke(std::vector &threadPool, const Func &taskFunc) { + threadPool.emplace_back(taskFunc); +} + +// Iterative call +template +void parallel_invoke(std::vector &threadPool, const Func &taskFunc1, Rest... taskFuncN) { + threadPool.emplace_back(taskFunc1); + parallel_invoke(threadPool, taskFuncN...); +} + +// Base case +template +void serial_invoke(const Func &taskFunc) {taskFunc();} + +// Iterative call +template +void serial_invoke(const Func &taskFunc1, Rest... taskFuncN) { + taskFunc1(); + serial_invoke(taskFuncN...); +} +#endif +}// unnamed namespace + +/// @return 1 for serial, 2 for tbb multi-threading, and 3 for std multi-threading +template +int invoke(const Func &taskFunc1, Rest... taskFuncN) { +#ifdef NANOVDB_USE_TBB + tbb::parallel_invoke(taskFunc1, taskFuncN...); + return 2; +#else + const auto threadCount = std::thread::hardware_concurrency()>>1; + if (1 + sizeof...(Rest) <= threadCount) { + std::vector threadPool; + threadPool.emplace_back(taskFunc1); + parallel_invoke(threadPool, taskFuncN...); + for (auto &t : threadPool) t.join(); + return 3;// std multi-threading + } else { + taskFunc1(); + serial_invoke(taskFuncN...); + return 1;// serial + } +#endif + return -1;// should never happen +} + +}// namespace util + +template +[[deprecated("Use nanovdb::util::invoke instead")]] +int invoke(const Func &taskFunc1, Rest... taskFuncN) { + return util::invoke(taskFunc1, taskFuncN...); +} + +}// namespace nanovdb + +#endif // NANOVDB_UTIL_INVOKE_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/NanoToOpenVDB.h b/external/nanovdb/util/NanoToOpenVDB.h new file mode 100644 index 00000000..a6c21682 --- /dev/null +++ b/external/nanovdb/util/NanoToOpenVDB.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/NanoToOpenVDB.h instead.") diff --git a/external/nanovdb/util/NodeManager.h b/external/nanovdb/util/NodeManager.h new file mode 100644 index 00000000..076a18eb --- /dev/null +++ b/external/nanovdb/util/NodeManager.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/NodeManager.h instead.") diff --git a/external/nanovdb/util/OpenToNanoVDB.h b/external/nanovdb/util/OpenToNanoVDB.h new file mode 100644 index 00000000..c7dcce33 --- /dev/null +++ b/external/nanovdb/util/OpenToNanoVDB.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Use nanovdb/tools/CreateNanoGrid.h instead.") \ No newline at end of file diff --git a/external/nanovdb/util/PrefixSum.h b/external/nanovdb/util/PrefixSum.h new file mode 100644 index 00000000..11001087 --- /dev/null +++ b/external/nanovdb/util/PrefixSum.h @@ -0,0 +1,90 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/PrefixSum.h + + \author Ken Museth + + \date March 12, 2023 + + \brief Multi-threaded implementations of inclusive prefix sum + + \note An exclusive prefix sum is simply an array starting with zero + followed by the elements in the inclusive prefix sum, minus its + last entry which is the sum of all the input elements. +*/ + +#ifndef NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED + +#include // for Range1D +#include +#include // for std::plus + +#ifdef NANOVDB_USE_TBB +#include +#endif + +namespace nanovdb { + +namespace util { + +/// @brief Computes inclusive prefix sum of a vector +/// @tparam T Type of the elements in the input/out vector +/// @tparam OpT Type of operation performed on each element (defaults to sum) +/// @param vec input and output vector +/// @param threaded if true multi-threading is used +/// @note Inclusive prefix sum: for (i=1; i> +T prefixSum(std::vector &vec, bool threaded = true, OpT op = OpT()); + +/// @brief An inclusive scan includes in[i] when computing out[i] +/// @note Inclusive prefix operation: for (i=1; i +void inclusiveScan(T *array, size_t size, const T &identity, bool threaded, Op op) +{ +#ifndef NANOVDB_USE_TBB + threaded = false; + (void)identity;// avoids compiler warning +#endif + + if (threaded) { +#ifdef NANOVDB_USE_TBB + using RangeT = tbb::blocked_range; + tbb::parallel_scan(RangeT(0, size), identity, + [&](const RangeT &r, T sum, bool is_final_scan)->T { + T tmp = sum; + for (size_t i = r.begin(); i < r.end(); ++i) { + tmp = op(tmp, array[i]); + if (is_final_scan) array[i] = tmp; + } + return tmp; + },[&](const T &a, const T &b) {return op(a, b);} + ); +#endif + } else { // serial inclusive prefix operation + for (size_t i=1; i +T prefixSum(std::vector &vec, bool threaded, OpT op) +{ + inclusiveScan(vec.data(), vec.size(), T(0), threaded, op); + return vec.back();// sum of all input elements +}// prefixSum + +}// namespace util + +template> +[[deprecated("Use nanovdb::util::prefixSum instead")]] +T prefixSum(std::vector &vec, bool threaded = true, OpT op = OpT()) +{ + return util::prefixSum(vec, threaded, op); +}// prefixSum + +}// namespace nanovdb + +#endif // NANOVDB_UTIL_PREFIX_SUM_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/Primitives.h b/external/nanovdb/util/Primitives.h new file mode 100644 index 00000000..79cfe615 --- /dev/null +++ b/external/nanovdb/util/Primitives.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/CreatePrimitives.h instead.") diff --git a/external/nanovdb/util/Range.h b/external/nanovdb/util/Range.h new file mode 100644 index 00000000..e9ff766e --- /dev/null +++ b/external/nanovdb/util/Range.h @@ -0,0 +1,158 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/Range.h + + \author Ken Museth + + \date August 28, 2020 + + \brief Custom Range class that is compatible with the tbb::blocked_range classes +*/ + +#ifndef NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED + +#include +#include // for size_t + +#ifdef NANOVDB_USE_TBB +#include // for tbb::split +#endif + +namespace nanovdb { + +namespace util { + +class Split {};// Dummy class used by split constructors + +template +class Range; + +using Range1D = Range<1, size_t>; +using Range2D = Range<2, size_t>; +using Range3D = Range<3, size_t>; + +// template specialization for Rank = 1 +template +class Range<1, T> +{ + T mBegin, mEnd; + size_t mGrainsize; + template + friend class Range; +public: + using const_iterator = T; + using size_type = size_t; + Range(const Range&) = default; + Range(T begin, T end, size_type grainsize = size_type(1)) + : mBegin(begin), mEnd(end), mGrainsize(grainsize) + { + assert(grainsize > size_type(0)); + } + /// @brief Split constructor: r[a,b[ -> r[a,b/2[ & this[b/2,b[ + Range(Range &r, Split) : mBegin(r.mBegin), mEnd(r.mEnd), mGrainsize(r.mGrainsize) { + assert(r.is_divisible()); + r.mEnd = mBegin = this->middle(); + } +#ifdef NANOVDB_USE_TBB + Range(Range &r, tbb::split) : Range(r, Split()) {} +#endif + bool operator==(const Range& rhs) const { return mBegin == rhs.mBegin && mEnd == rhs.mEnd && mGrainsize == rhs.mGrainsize; } + T middle() const {return mBegin + (mEnd - mBegin) / T(2);} + size_type size() const { assert(!this->empty()); return size_type(mEnd - mBegin); } + bool empty() const { return !(mBegin < mEnd); } + size_type grainsize() const {return mGrainsize;} + bool is_divisible() const {return mGrainsize < this->size();} + const_iterator begin() const { return mBegin; } + const_iterator end() const { return mEnd; } +};// Range<1, T> + +// template specialization for Rank = 2 +template +class Range<2, T> +{ + Range<1, T> mRange[2]; +public: + using size_type = typename Range<1, T>::size_type; + Range(const Range<1, T> &rangeRow, const Range<1, T> &rangeCol) : mRange{ rangeRow, rangeCol } {} + Range(T beginRow, T endRow, size_type grainsizeRow, T beginCol, T endCol, size_type grainsizeCol) + : Range( Range<1,T>(beginRow, endRow, grainsizeRow), Range<1,T>(beginCol, endCol, grainsizeCol) ) + { + } + Range(T beginRow, T endRow, T beginCol, T endCol) : Range(Range<1,T>(beginRow, endRow), Range<1,T>(beginCol, endCol) ) + { + } + Range(Range &r, Split) : Range(r.mRange[0], r.mRange[1]) { + assert( r.is_divisible() );// at least one of the two dimensions must be divisible! + if( mRange[0].size()*double(mRange[1].grainsize()) < mRange[1].size()*double(mRange[0].grainsize()) ) { + r.mRange[1].mEnd = mRange[1].mBegin = mRange[1].middle(); + } else { + r.mRange[0].mEnd = mRange[0].mBegin = mRange[0].middle(); + } + } +#ifdef NANOVDB_USE_TBB + Range(Range &r, tbb::split) : Range(r, Split()) {} +#endif + bool operator==(const Range& rhs) const {return mRange[0] == rhs[0] && mRange[1] == rhs[1]; } + bool empty() const { return mRange[0].empty() || mRange[1].empty(); } + bool is_divisible() const {return mRange[0].is_divisible() || mRange[1].is_divisible();} + const Range<1, T>& operator[](int i) const { assert(i==0 || i==1); return mRange[i]; } +};// Range<2, T> + +// template specialization for Rank = 3 +template +class Range<3, T> +{ + Range<1, T> mRange[3]; +public: + using size_type = typename Range<1, T>::size_type; + Range(const Range<1, T> &rangeX, const Range<1, T> &rangeY, const Range<1, T> &rangeZ) : mRange{ rangeX, rangeY, rangeZ } {} + Range(T beginX, T endX, size_type grainsizeX, + T beginY, T endY, size_type grainsizeY, + T beginZ, T endZ, size_type grainsizeZ) + : Range( Range<1,T>(beginX, endX, grainsizeX), + Range<1,T>(beginY, endY, grainsizeY), + Range<1,T>(beginZ, endZ, grainsizeZ) ) + { + } + Range(T beginX, T endX, T beginY, T endY, T beginZ, T endZ) + : Range( Range<1,T>(beginX, endX), Range<1,T>(beginY, endY), Range<1,T>(beginZ, endZ) ) + { + } + Range(Range &r, Split) : Range(r.mRange[0], r.mRange[1], r.mRange[2]) + { + assert( r.is_divisible() );// at least one of the three dimensions must be divisible! + if ( mRange[2].size()*double(mRange[0].grainsize()) < mRange[0].size()*double(mRange[2].grainsize()) ) { + if ( mRange[0].size()*double(mRange[1].grainsize()) < mRange[1].size()*double(mRange[0].grainsize()) ) { + r.mRange[1].mEnd = mRange[1].mBegin = mRange[1].middle(); + } else { + r.mRange[0].mEnd = mRange[0].mBegin = mRange[0].middle(); + } + } else { + if ( mRange[2].size()*double(mRange[1].grainsize()) < mRange[1].size()*double(mRange[2].grainsize()) ) { + r.mRange[1].mEnd = mRange[1].mBegin = mRange[1].middle(); + } else { + r.mRange[2].mEnd = mRange[2].mBegin = mRange[2].middle(); + } + } + } +#ifdef NANOVDB_USE_TBB + Range(Range &r, tbb::split) : Range(r, Split()) {} +#endif + bool operator==(const Range& rhs) const {return mRange[0] == rhs[0] && mRange[1] == rhs[1] && mRange[2] == rhs[2]; } + bool empty() const { return mRange[0].empty() || mRange[1].empty() || mRange[2].empty(); } + bool is_divisible() const {return mRange[0].is_divisible() || mRange[1].is_divisible() || mRange[2].is_divisible();} + const Range<1, T>& operator[](int i) const { assert(i==0 || i==1 || i==2); return mRange[i]; } +};// Range<3, T> + +}// namespace util + +using Range1D [[deprecated("Use nanovdb::util::Range1D instead")]] = util::Range<1, size_t>; +using Range2D [[deprecated("Use nanovdb::util::Range2D instead")]] = util::Range<2, size_t>; +using Range3D [[deprecated("Use nanovdb::util::Range3D instead")]] = util::Range<3, size_t>; + +}// namespace nanovdb + +#endif // NANOVDB_UTIL_RANGE_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/Ray.h b/external/nanovdb/util/Ray.h new file mode 100644 index 00000000..90384909 --- /dev/null +++ b/external/nanovdb/util/Ray.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/Ray.h instead.") diff --git a/external/nanovdb/util/Reduce.h b/external/nanovdb/util/Reduce.h new file mode 100644 index 00000000..f171b252 --- /dev/null +++ b/external/nanovdb/util/Reduce.h @@ -0,0 +1,133 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/Reduce.h + + \author Ken Museth + + \date March 4, 2021 + + \brief A unified wrapper for tbb::parallel_reduce and a naive std::future analog +*/ + +#ifndef NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED + +#include // for util::Range1D + +#ifdef NANOVDB_USE_TBB +#include +#else +#include +#include +#include +#endif + +namespace nanovdb { + +namespace util { + +/// @return reduction +/// +/// @param range RangeT can be Range, CoordBBox, tbb::blocked_range, blocked_range2D, or blocked_range3D. +/// @param identity initial value +/// @param func functor with signature T FuncT::operator()(const RangeT& range, const T& a) const +/// @param join functor with the signature T JoinT::operator()(const T& a, const T& b) const +/// @code +/// std::vector array(100, 1); +/// auto func = [&array](auto &r, int a){for (auto i=r.begin(); i!=r.end(); ++i) a+=array[i]; return a;}; +/// int sum = reduce(array, 0, func, [](int a, int b){return a + b;}); +/// @endcode +template +inline T reduce(RangeT range, const T& identity, const FuncT &func, const JoinT &join) +{ + if (range.empty()) return identity; +#ifdef NANOVDB_USE_TBB + return tbb::parallel_reduce(range, identity, func, join); +#else// naive and likely slow alternative based on std::future + if (const size_t threadCount = std::thread::hardware_concurrency()>>1) { + std::vector rangePool{ range }; + while(rangePool.size() < threadCount) { + const size_t oldSize = rangePool.size(); + for (size_t i = 0; i < oldSize && rangePool.size() < threadCount; ++i) { + auto &r = rangePool[i]; + if (r.is_divisible()) rangePool.push_back(RangeT(r, Split())); + } + if (rangePool.size() == oldSize) break;// none of the ranges were divided so stop + } + std::vector< std::future > futurePool; + for (auto &r : rangePool) { + auto task = std::async(std::launch::async, [&](){return func(r, identity);}); + futurePool.push_back( std::move(task) );// launch tasks + } + T result = identity; + for (auto &f : futurePool) { + result = join(result, f.get());// join results + } + return result; + } else {// serial + return static_cast(func(range, identity)); + } +#endif + return identity;// should never happen +} + +/// @brief Simple wrapper to the function defined above +template +inline T reduce(size_t begin, size_t end, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + Range1D range(begin, end, grainSize); + return reduce( range, identity, func, join ); +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +inline T reduce(const ContainerT &c, const T& identity, const FuncT& func, const JoinT& join) +{ + Range1D range(0, c.size(), 1); + return reduce( range, identity, func, join ); + +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +inline T reduce(const ContainerT &c, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + Range1D range(0, c.size(), grainSize); + return reduce( range, identity, func, join ); +} + +}// namespace util + +/// @brief Simple wrapper to the function defined above +template +[[deprecated("Use nanovdb::util::reduce instead")]] +inline T reduce(size_t begin, size_t end, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(begin, end, grainSize); + return util::reduce( range, identity, func, join ); +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +[[deprecated("Use nanovdb::util::reduce instead")]] +inline T reduce(const ContainerT &c, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(0, c.size(), 1); + return util::reduce( range, identity, func, join ); + +} + +/// @brief Simple wrapper that works with std::containers +template class ContainerT, typename... ArgT, typename T, typename FuncT, typename JoinT > +[[deprecated("Use nanovdb::util::reduce instead")]] +T reduce(const ContainerT &c, size_t grainSize, const T& identity, const FuncT& func, const JoinT& join) +{ + util::Range1D range(0, c.size(), grainSize); + return util::reduce( range, identity, func, join ); +} + +}// namespace nanovdb + +#endif // NANOVDB_UTIL_REDUCE_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/SampleFromVoxels.h b/external/nanovdb/util/SampleFromVoxels.h new file mode 100644 index 00000000..02802444 --- /dev/null +++ b/external/nanovdb/util/SampleFromVoxels.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/SampleFromVoxels.h instead.") diff --git a/external/nanovdb/util/Stencils.h b/external/nanovdb/util/Stencils.h new file mode 100644 index 00000000..2de91c52 --- /dev/null +++ b/external/nanovdb/util/Stencils.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/math/Stencils.h instead.") diff --git a/external/nanovdb/util/Timer.h b/external/nanovdb/util/Timer.h new file mode 100644 index 00000000..b2d8dc9e --- /dev/null +++ b/external/nanovdb/util/Timer.h @@ -0,0 +1,87 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/// @file nanovdb/util/Timer.h +/// +/// @author Ken Museth +/// +/// @brief A simple timing class (in case openvdb::util::CpuTimer is unavailable) + +#ifndef NANOVDB_UTIL_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_TIMER_H_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +namespace util { + +class Timer +{ + std::chrono::high_resolution_clock::time_point mStart; +public: + /// @brief Default constructor + Timer() {} + + /// @brief Constructor that starts the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + Timer(const std::string &msg, std::ostream& os = std::cerr) {this->start(msg, os);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + mStart = std::chrono::high_resolution_clock::now(); + } + + /// @brief elapsed time (since start) in miliseconds + template + auto elapsed() + { + auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - mStart).count(); + } + + /// @brief stop the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param os output stream for the message above + template + void stop(std::ostream& os = std::cerr) + { + auto end = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration_cast(end - mStart).count(); + os << "completed in " << diff; + if (std::is_same::value) {// resolved at compile-time + os << " microseconds" << std::endl; + } else if (std::is_same::value) { + os << " milliseconds" << std::endl; + } else if (std::is_same::value) { + os << " seconds" << std::endl; + } else { + os << " unknown time unit" << std::endl; + } + } + + /// @brief stop and start the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + template + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } +};// Timer + +}// namespace util + +using CpuTimer [[deprecated("Use nanovdb::util::Timer instead")]] = util::Timer; + +} // namespace nanovdb + +#endif // NANOVDB_UTIL_TIMER_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/Util.h b/external/nanovdb/util/Util.h new file mode 100644 index 00000000..7dcdfbdb --- /dev/null +++ b/external/nanovdb/util/Util.h @@ -0,0 +1,657 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/Util.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Utility functions +*/ + +#ifndef NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED + +#ifdef __CUDACC_RTC__ + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned long long uint64_t; + +#define NANOVDB_ASSERT(x) + +#ifndef UINT64_C +#define UINT64_C(x) (x ## ULL) +#endif + +#else // !__CUDACC_RTC__ + +#include // for abs in clang7 +#include // for types like int32_t etc +#include // for size_t type +#include // for assert +#include // for stderr and snprintf +#include // for sqrt and fma +#include // for numeric_limits +#include // for std::move +#ifdef NANOVDB_USE_IOSTREAMS +#include // for read/writeUncompressedGrids +#endif// ifdef NANOVDB_USE_IOSTREAMS + +// All asserts can be disabled here, even for debug builds +#if 1 +#define NANOVDB_ASSERT(x) assert(x) +#else +#define NANOVDB_ASSERT(x) +#endif + +#if defined(NANOVDB_USE_INTRINSICS) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) +#endif + +#endif // __CUDACC_RTC__ + +#if defined(__CUDACC__) || defined(__HIP__) +// Only define __hostdev__ qualifier when using NVIDIA CUDA or HIP compilers +#ifndef __hostdev__ +#define __hostdev__ __host__ __device__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#else +// Dummy definitions of macros only defined by CUDA and HIP compilers +#ifndef __hostdev__ +#define __hostdev__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#ifndef __global__ +#define __global__ // Runs on the GPU, called from the CPU or the GPU +#endif +#ifndef __device__ +#define __device__ // Runs on the GPU, called from the GPU +#endif +#ifndef __host__ +#define __host__ // Runs on the CPU, called from the CPU +#endif + +#endif // if defined(__CUDACC__) || defined(__HIP__) + +// The following macro will suppress annoying warnings when nvcc +// compiles functions that call (host) intrinsics (which is perfectly valid) +#if defined(_MSC_VER) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING __pragma("hd_warning_disable") +#elif defined(__GNUC__) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING _Pragma("hd_warning_disable") +#else +#define NANOVDB_HOSTDEV_DISABLE_WARNING +#endif + +// Define compiler warnings that work with all compilers +//#if defined(_MSC_VER) +//#define NANO_WARNING(msg) _pragma("message" #msg) +//#else +//#define NANO_WARNING(msg) _Pragma("message" #msg) +//#endif + +//============================================== +/// @brief Defines macros that issues warnings for deprecated header files +/// @details Example: +/// @code +/// #include // for NANOVDB_DEPRECATED_HEADER +/// #include +/// NANOVDB_DEPRECATED_HEADER("This header file is deprecated, please use instead") +/// @endcode +#ifdef __GNUC__ +#define NANOVDB_PRAGMA(X) _Pragma(#X) +#define NANOVDB_DEPRECATED_HEADER(MSG) NANOVDB_PRAGMA(GCC warning MSG) +#elif defined(_MSC_VER) +#define NANOVDB_STRINGIZE_(MSG) #MSG +#define NANOVDB_STRINGIZE(MSG) NANOVDB_STRINGIZE_(MSG) +#define NANOVDB_DEPRECATED_HEADER(MSG) \ + __pragma(message(__FILE__ "(" NANOVDB_STRINGIZE(__LINE__) ") : Warning: " MSG)) +#endif + +// A portable implementation of offsetof - unfortunately it doesn't work with static_assert +#define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) + +namespace nanovdb {// ================================================================= + +namespace util {// ==================================================================== + +/// @brief Minimal implementation of std::declval, which converts any type @c T to +//// a reference type, making it possible to use member functions in the operand +/// of the decltype specifier without the need to go through constructors. +/// @tparam T Template type to be converted to T&& +/// @return T&& +/// @warning Unlike std::declval, this version does not work when T = void! However, +/// NVRTC does not like std::declval, so we provide our own implementation. +template +T&& declval() noexcept; + +// --------------------------> string utility functions <------------------------------------ + +/// @brief tests if a c-string @c str is empty, that is its first value is '\0' +/// @param str c-string to be tested for null termination +/// @return true if str[0] = '\0' +__hostdev__ inline bool empty(const char* str) +{ + NANOVDB_ASSERT(str != nullptr); + return *str == '\0'; +}// util::empty + +/// @brief length of a c-sting, excluding '\0'. +/// @param str c-string +/// @return the number of characters that precede the terminating null character. +__hostdev__ inline size_t strlen(const char *str) +{ + NANOVDB_ASSERT(str != nullptr); + const char *s = str; + while(*s) ++s; ; + return (s - str); +}// util::strlen + +/// @brief Copy characters from @c src to @c dst. +/// @param dst pointer to the destination string. +/// @param src pointer to the null-terminated source string. +/// @return destination string @c dst. +/// @note Emulates the behaviour of std::strcpy, except this version also runs on the GPU. +__hostdev__ inline char* strcpy(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + for (char *p = dst; (*p++ = *src) != '\0'; ++src); + return dst; +}// util::strcpy(char*, const char*) + +/// @brief Copies the first num characters of @c src to @c dst. +/// If the end of the source C string (which is signaled by a +/// null-character) is found before @c max characters have been +/// copied, @c dst is padded with zeros until a total of @c max +/// characters have been written to it. +/// @param dst destination string +/// @param src source string +/// @param max maximum number of character in destination string +/// @return destination string @c dst +/// @warning if strncpy(dst, src, max)[max-1]!='\0' then @c src has more +/// characters than @c max and the return string needs to be +/// manually null-terminated, i.e. strncpy(dst, src, max)[max-1]='\0' +__hostdev__ inline char* strncpy(char *dst, const char *src, size_t max) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + size_t i = 0; + for (; i < max && src[i] != '\0'; ++i) dst[i] = src[i]; + for (; i < max; ++i) dst[i] = '\0'; + return dst; +}// util::strncpy(char *dst, const char *src, size_t max) + +/// @brief converts a number to a string using a specific base +/// @param dst destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +/// @note Emulates the behaviour of itoa, except this verion also works on the GPU. +__hostdev__ inline char* strcpy(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr && bas > 0); + int len = 0;// length of number once converted to a string + if (num == 0) dst[len++] = '0'; + for (int abs = num < 0 && bas == 10 ? -num : num; abs; abs /= bas) { + const int rem = abs % bas; + dst[len++] = rem > 9 ? rem - 10 + 'a' : rem + '0'; + } + if (num < 0) dst[len++] = '-';// append '-' if negative + for (char *a = dst, *b = a + len - 1; a < b; ++a, --b) {// reverse dst + dst[len] = *a;// use end of string as temp + *a = *b; + *b = dst[len]; + } + dst[len] = '\0';// explicitly terminate end of string + return dst; +}// util::strcpy(char*, int, int) + +/// @brief Appends a copy of the character string pointed to by @c src to +/// the end of the character string pointed to by @c dst on the device. +/// @param dst pointer to the null-terminated byte string to append to. +/// @param src pointer to the null-terminated byte string to copy from. +/// @return pointer to the character array being appended to. +/// @note Emulates the behaviour of std::strcat, except this version also runs on the GPU. +__hostdev__ inline char* strcat(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + char *p = dst; + while (*p != '\0') ++p;// advance till end of dst + strcpy(p, src);// append src + return dst; +}// util::strcat(char*, const char*) + +/// @brief concatenates a number after a string using a specific base +/// @param dst null terminated destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +__hostdev__ inline char* strcat(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr); + char *p = dst; + while (*p != '\0') ++p; + strcpy(p, num, bas); + return dst; +}// util::strcat(char*, int, int) + +/// @brief Compares two null-terminated byte strings lexicographically. +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return Negative value if @c lhs appears before @c rhs in lexicographical order. +/// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears +/// after @c rhs in lexicographical order. +/// @note Emulates the behaviour of std::strcmp, except this version also runs on the GPU. +__hostdev__ inline int strcmp(const char *lhs, const char *rhs) +{ + while(*lhs != '\0' && (*lhs == *rhs)){ + lhs++; + rhs++; + } + return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs +}// util::strcmp(const char*, const char*) + +/// @brief Test if two null-terminated byte strings are the same +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return true if the two c-strings are identical +__hostdev__ inline bool streq(const char *lhs, const char *rhs) +{ + return strcmp(lhs, rhs) == 0; +}// util::streq + +namespace impl {// ======================================================= +// Base-case implementation of Variadic Template function impl::sprint +__hostdev__ inline char* sprint(char *dst){return dst;} +// Variadic Template function impl::sprint +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcat(dst, var1), var2...); +} +}// namespace impl ========================================================= + +/// @brief prints a variable number of string and/or numbers to a destination string +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcpy(dst, var1), var2...); +}// util::sprint + +// --------------------------> memzero <------------------------------------ + +/// @brief Zero initialization of memory +/// @param dst pointer to destination +/// @param byteCount number of bytes to be initialized to zero +/// @return destination pointer @c dst +__hostdev__ inline static void* memzero(void *dst, size_t byteCount) +{ + NANOVDB_ASSERT(dst); + const size_t wordCount = byteCount >> 3; + if (wordCount << 3 == byteCount) { + for (auto *d = (uint64_t*)dst, *e = d + wordCount; d != e; ++d) *d = 0ULL; + } else { + for (auto *d = (char*)dst, *e = d + byteCount; d != e; ++d) *d = '\0'; + } + return dst; +}// util::memzero + +// --------------------------> util::is_same <------------------------------------ + +/// @brief C++11 implementation of std::is_same +/// @note When more than two arguments are provided value = T0==T1 || T0==T2 || ... +template +struct is_same +{ + static constexpr bool value = is_same::value || is_same::value; +}; + +template +struct is_same {static constexpr bool value = false;}; + +template +struct is_same {static constexpr bool value = true;}; + +// --------------------------> util::is_floating_point <------------------------------------ + +/// @brief C++11 implementation of std::is_floating_point +template +struct is_floating_point {static constexpr bool value = is_same::value;}; + +// --------------------------> util::enable_if <------------------------------------ + +/// @brief C++11 implementation of std::enable_if +template +struct enable_if {}; + +template +struct enable_if {using type = T;}; + +// --------------------------> util::disable_if <------------------------------------ + +template +struct disable_if {using type = T;}; + +template +struct disable_if {}; + +// --------------------------> util::is_const <------------------------------------ + +template +struct is_const {static constexpr bool value = false;}; + +template +struct is_const {static constexpr bool value = true;}; + +// --------------------------> util::is_pointer <------------------------------------ + +/// @brief Trait used to identify template parameter that are pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer {static constexpr bool value = false;}; + +/// @brief Template specialization of pointers +/// @tparam T Template parameter to be tested +/// @note T can be both a non-const and const type +template +struct is_pointer {static constexpr bool value = true;}; + +// --------------------------> util::conditional <------------------------------------ + +/// @brief C++11 implementation of std::conditional +template +struct conditional { using type = TrueT; }; + +/// @brief Template specialization of conditional +/// @tparam FalseT Type used when boolean is false +/// @tparam TrueT Type used when boolean is true +template +struct conditional { using type = FalseT; }; + +// --------------------------> util::remove_const <------------------------------------ + +/// @brief Trait use to const from type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +/// @brief Template specialization of trait class use to remove const qualifier type from a type +/// @tparam T Type of the const type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +// --------------------------> util::remove_reference <------------------------------------ + +/// @brief Trait use to remove reference, i.e. "&", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +/// @brief Template specialization of trait class use to remove reference, i.e. "&", qualifier from a type +/// @tparam T Type of the reference +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +// --------------------------> util::remove_pointer <------------------------------------ + +/// @brief Trait use to remove pointer, i.e. "*", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +/// @brief Template specialization of trait class use to to remove pointer, i.e. "*", qualifier from a type +/// @tparam T Type of the pointer +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +// --------------------------> util::match_const <------------------------------------ + +/// @brief Trait used to transfer the const-ness of a reference type to another type +/// @tparam T Type whose const-ness needs to match the reference type +/// @tparam ReferenceT Reference type that is not const +/// @details match_const::type = int +/// match_const::type = int +template +struct match_const {using type = typename remove_const::type;}; + +/// @brief Template specialization used to transfer the const-ness of a reference type to another type +/// @tparam T Type that will adopt the const-ness of the reference type +/// @tparam ReferenceT Reference type that is const +/// @details match_const::type = const int +/// match_const::type = const int +template +struct match_const {using type = const typename remove_const::type;}; + +// --------------------------> util::is_specialization <------------------------------------ + +/// @brief Metafunction used to determine if the first template +/// parameter is a specialization of the class template +/// given in the second template parameter. +/// +/// @details is_specialization, Vec3>::value == true; +/// is_specialization::value == true; +/// is_specialization, std::vector>::value == true; +template class TemplateType> +struct is_specialization {static const bool value = false;}; +template class TemplateType> +struct is_specialization, TemplateType> +{ + static const bool value = true; +};// util::is_specialization + +// --------------------------> util::PtrDiff <------------------------------------ + +/// @brief Compute the distance, in bytes, between two pointers, dist = p - q +/// @param p fist pointer, assumed to NOT be NULL +/// @param q second pointer, assumed to NOT be NULL +/// @return signed distance between pointer, p - q, addresses in units of bytes +__hostdev__ inline static int64_t PtrDiff(const void* p, const void* q) +{ + NANOVDB_ASSERT(p && q); + return reinterpret_cast(p) - reinterpret_cast(q); +}// util::PtrDiff + +// --------------------------> util::PtrAdd <------------------------------------ + +/// @brief Adds a byte offset to a non-const pointer to produce another non-const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p non-const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a non-const pointer defined as the offset of an input pointer +template +__hostdev__ inline static DstT* PtrAdd(void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +/// @brief Adds a byte offset to a const pointer to produce another const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a const pointer defined as the offset of a const input pointer +template +__hostdev__ inline static const DstT* PtrAdd(const void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +// -------------------> findLowestOn <---------------------------- + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffs(v) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzl(v)); +#else + //NANO_WARNING("Using software implementation for findLowestOn(uint32_t v)") + static const unsigned char DeBruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint32_t((v & -v) * 0x077CB531U) >> 27]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint32_t) + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffsll(static_cast(v)) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzll(v)); +#else + //NANO_WARNING("Using software implementation for util::findLowestOn(uint64_t)") + static const unsigned char DeBruijn[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, + }; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint64_t((v & -v) * UINT64_C(0x022FDD63CC95386D)) >> 58]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint64_t) + +// -------------------> findHighestOn <---------------------------- + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(uint32_t) * 8 - 1 - __clz(v); // Return the number of consecutive high-order zero bits in a 32-bit integer. +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); +#else + //NANO_WARNING("Using software implementation for util::findHighestOn(uint32_t)") + static const unsigned char DeBruijn[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijn[uint32_t(v * 0x07C4ACDDU) >> 27]; +#endif +}// util::findHighestOn + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __clzll(static_cast(v)); +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v); +#else + const uint32_t* p = reinterpret_cast(&v); + return p[1] ? 32u + findHighestOn(p[1]) : findHighestOn(p[0]); +#endif +}// util::findHighestOn + +// ----------------------------> util::countOn <-------------------------------------- + +/// @return Number of bits that are on in the specified 64-bit word +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t countOn(uint64_t v) +{ +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcll for util::countOn + return __popcll(v); +// __popcnt64 intrinsic support was added in VS 2019 16.8 +#elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcnt64 for util::countOn + return uint32_t(__popcnt64(v)); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using builtin_popcountll for util::countOn + return __builtin_popcountll(v); +#else // use software implementation + //NANO_WARNING("Using software implementation for util::countOn") + v = v - ((v >> 1) & uint64_t(0x5555555555555555)); + v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); + return (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; +#endif +}// util::countOn(uint64_t) + +}// namespace util ================================================================== + +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint32_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint64_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint32_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint64_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::countOn instead")]] +__hostdev__ inline uint32_t CountOn(uint64_t v){return util::countOn(v);} + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/cuda/CudaAddBlindData.cuh b/external/nanovdb/util/cuda/CudaAddBlindData.cuh new file mode 100644 index 00000000..39ece43d --- /dev/null +++ b/external/nanovdb/util/cuda/CudaAddBlindData.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/AddBlindData.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaDeviceBuffer.h b/external/nanovdb/util/cuda/CudaDeviceBuffer.h new file mode 100644 index 00000000..65371c6c --- /dev/null +++ b/external/nanovdb/util/cuda/CudaDeviceBuffer.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/DeviceBuffer.h instead.") \ No newline at end of file diff --git a/external/nanovdb/util/cuda/CudaGridChecksum.cuh b/external/nanovdb/util/cuda/CudaGridChecksum.cuh new file mode 100644 index 00000000..fe897d45 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaGridChecksum.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridChecksum.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaGridHandle.cuh b/external/nanovdb/util/cuda/CudaGridHandle.cuh new file mode 100644 index 00000000..db68f238 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaGridHandle.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/GridHandle.cuh instead.") \ No newline at end of file diff --git a/external/nanovdb/util/cuda/CudaGridStats.cuh b/external/nanovdb/util/cuda/CudaGridStats.cuh new file mode 100644 index 00000000..acc62af5 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaGridStats.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridStats.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaGridValidator.cuh b/external/nanovdb/util/cuda/CudaGridValidator.cuh new file mode 100644 index 00000000..a89c8cae --- /dev/null +++ b/external/nanovdb/util/cuda/CudaGridValidator.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/GridValidator.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaIndexToGrid.cuh b/external/nanovdb/util/cuda/CudaIndexToGrid.cuh new file mode 100644 index 00000000..4a15b523 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaIndexToGrid.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/IndexToGrid.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaNodeManager.cuh b/external/nanovdb/util/cuda/CudaNodeManager.cuh new file mode 100644 index 00000000..ca287266 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaNodeManager.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/cuda/NodeManager.cuh instead.") \ No newline at end of file diff --git a/external/nanovdb/util/cuda/CudaPointsToGrid.cuh b/external/nanovdb/util/cuda/CudaPointsToGrid.cuh new file mode 100644 index 00000000..7494b607 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/PointsToGrid.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaSignedFloodFill.cuh b/external/nanovdb/util/cuda/CudaSignedFloodFill.cuh new file mode 100644 index 00000000..7f0d9ce0 --- /dev/null +++ b/external/nanovdb/util/cuda/CudaSignedFloodFill.cuh @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/tools/cuda/SignedFloodFill.cuh instead.") diff --git a/external/nanovdb/util/cuda/CudaUtils.h b/external/nanovdb/util/cuda/CudaUtils.h new file mode 100644 index 00000000..38f7c94a --- /dev/null +++ b/external/nanovdb/util/cuda/CudaUtils.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/cuda/Util.h instead.") \ No newline at end of file diff --git a/external/nanovdb/util/cuda/GpuTimer.h b/external/nanovdb/util/cuda/GpuTimer.h new file mode 100644 index 00000000..ee0b0c71 --- /dev/null +++ b/external/nanovdb/util/cuda/GpuTimer.h @@ -0,0 +1,6 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +#include // for NANOVDB_DEPRECATED_HEADER +#include +NANOVDB_DEPRECATED_HEADER("Include nanovdb/util/cuda/Timer.h instead.") \ No newline at end of file diff --git a/external/nanovdb/util/cuda/Timer.h b/external/nanovdb/util/cuda/Timer.h new file mode 100644 index 00000000..07c9366a --- /dev/null +++ b/external/nanovdb/util/cuda/Timer.h @@ -0,0 +1,116 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/// @file nanovdb/util/cuda/Timer.h +/// +/// @author Ken Museth +/// +/// @brief A simple GPU timing class + +#ifndef NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED + +#include // for std::cerr +#include +#include + +namespace nanovdb { + +namespace util::cuda { + +class Timer +{ + cudaStream_t mStream{0}; + cudaEvent_t mStart, mStop; + +public: + /// @brief Default constructor + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @note Starts the timer + Timer(cudaStream_t stream = 0) : mStream(stream) + { + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Construct and start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + Timer(const std::string &msg, cudaStream_t stream = 0, std::ostream& os = std::cerr) + : mStream(stream) + { + os << msg << " ... " << std::flush; + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Destructor + ~Timer() + { + cudaEventDestroy(mStart); + cudaEventDestroy(mStop); + } + + /// @brief Start the timer + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start() {cudaEventRecord(mStart, mStream);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const char* msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief elapsed time (since start) in miliseconds + /// @return elapsed time (since start) in miliseconds + float elapsed() + { + cudaEventRecord(mStop, mStream); + cudaEventSynchronize(mStop); + float diff = 0.0f; + cudaEventElapsedTime(&diff, mStart, mStop); + return diff; + } + + /// @brief stop the timer + /// @param os output stream for the message above + void stop(std::ostream& os = std::cerr) + { + float diff = this->elapsed(); + os << "completed in " << diff << " milliseconds" << std::endl; + } + + /// @brief stop and start the timer + /// @param msg string message to be printed when timer is started + /// @warning Remember to call start before restart + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } +};// Timer + +}// namespace util::cuda + +using GpuTimer [[deprecated("Use nanovdb::util::cuda::Timer instead")]]= util::cuda::Timer; + +} // namespace nanovdb + +#endif // NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED diff --git a/external/nanovdb/util/cuda/Util.h b/external/nanovdb/util/cuda/Util.h new file mode 100644 index 00000000..8d1711b3 --- /dev/null +++ b/external/nanovdb/util/cuda/Util.h @@ -0,0 +1,193 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: Apache-2.0 + +/*! + \file nanovdb/util/cuda/Util.h + + \author Ken Museth + + \date December 20, 2023 + + \brief Cuda specific utility functions +*/ + +#ifndef NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED + +#include +#include +#include // for stderr and NANOVDB_ASSERT + +// change 1 -> 0 to only perform asserts during debug builds +#if 1 || defined(DEBUG) || defined(_DEBUG) + static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) + { + if (code != cudaSuccess) { + fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); + //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } + } + static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } else if (uint64_t(ptr) % 32) { + fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } + } +#else + static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} + static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} +#endif + +// Convenience function for checking CUDA runtime API results +// can be wrapped around any runtime API call. No-op in release builds. +#define cudaCheck(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +#define cudaSync() \ + { \ + cudaCheck(cudaDeviceSynchronize()); \ + } + +#define cudaCheckError() \ + { \ + cudaCheck(cudaGetLastError()); \ + } + +namespace nanovdb {// ========================================================= + +namespace util::cuda {// ====================================================== + +//#define NANOVDB_USE_SYNC_CUDA_MALLOC +// cudaMallocAsync and cudaFreeAsync were introduced in CUDA 11.2 so we introduce +// custom implementations that map to cudaMalloc and cudaFree below. If NANOVDB_USE_SYNC_CUDA_MALLOC +// is defined these implementations will also be defined, which is useful in virtualized environments +// that slice up the GPU and share it between instances as vGPU's. GPU unified memory is usually disabled +// out of security considerations. Asynchronous CUDA malloc/free depends on GPU unified memory, so it +// is not possible to use cudaMallocAsync and cudaFreeAsync in such environments. + +#if (CUDART_VERSION < 11020) || defined(NANOVDB_USE_SYNC_CUDA_MALLOC) // 11.2 introduced cudaMallocAsync and cudaFreeAsync + +/// @brief Simple wrapper that calls cudaMalloc +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored) +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);} + +/// @brief Simple wrapper that calls cudaFree +/// @param d_ptr Device pointer that will be freed +/// @param dummy The stream establishing the stream ordering promise (ignored) +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);} + +#else + +/// @brief Simple wrapper that calls cudaMallocAsync +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param stream The stream establishing the stream ordering contract and the memory pool to allocate from +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t stream){return cudaMallocAsync(d_ptr, size, stream);} + +/// @brief Simple wrapper that calls cudaFreeAsync +/// @param d_ptr Device pointer that will be freed +/// @param stream The stream establishing the stream ordering promise +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t stream){return cudaFreeAsync(d_ptr, stream);} + +#endif + +/// @brief Simple (naive) implementation of a unique device pointer +/// using stream ordered memory allocation and deallocation. +/// @tparam T Type of the device pointer +template +class unique_ptr +{ + T *mPtr;// pointer to stream ordered memory allocation + cudaStream_t mStream; +public: + unique_ptr(size_t count = 0, cudaStream_t stream = 0) : mPtr(nullptr), mStream(stream) + { + if (count>0) cudaCheck(mallocAsync((void**)&mPtr, count*sizeof(T), stream)); + } + unique_ptr(const unique_ptr&) = delete; + unique_ptr(unique_ptr&& other) : mPtr(other.mPtr), mStream(other.mStream) + { + other.mPtr = nullptr; + } + ~unique_ptr() + { + if (mPtr) cudaCheck(freeAsync(mPtr, mStream)); + } + unique_ptr& operator=(const unique_ptr&) = delete; + unique_ptr& operator=(unique_ptr&& rhs) noexcept + { + mPtr = rhs.mPtr; + mStream = rhs.mStream; + rhs.mPtr = nullptr; + return *this; + } + void reset() { + if (mPtr) { + cudaCheck(freeAsync(mPtr, mStream)); + mPtr = nullptr; + } + } + T* get() const {return mPtr;} + explicit operator bool() const {return mPtr != nullptr;} +};// util::cuda::unique_ptr + +/// @brief Computes the number of blocks per grid given the problem size and number of threads per block +/// @param numItems Problem size +/// @param threadsPerBlock Number of threads per block (second CUDA launch parameter) +/// @return number of blocks per grid (first CUDA launch parameter) +/// @note CUDA launch parameters: kernel<<< blocksPerGrid, threadsPerBlock, sharedMemSize, streamID>>> +inline size_t blocksPerGrid(size_t numItems, size_t threadsPerBlock) +{ + NANOVDB_ASSERT(numItems > 0 && threadsPerBlock >= 32 && threadsPerBlock % 32 == 0); + return (numItems + threadsPerBlock - 1) / threadsPerBlock; +} + + +#if defined(__CUDACC__)// the following functions only run on the GPU! + +/// @brief Cuda kernel that launches device lambda functions +/// @param numItems Problem size +template +__global__ void lambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +}// util::cuda::lambdaKernel + +#endif// __CUDACC__ + +}// namespace util::cuda ============================================================ + +}// namespace nanovdb =============================================================== + +#if defined(__CUDACC__)// the following functions only run on the GPU! +template +[[deprecated("Use nanovdb::cuda::lambdaKernel instead")]] +__global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +} +#endif// __CUDACC__ + +#endif// NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED \ No newline at end of file