diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 35d030ae48..e26c8d0aaf 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -51,37 +51,6 @@ jobs: cmake --preset nightly-linux-DG2-JIT-MULTILEVEL -DCMAKE_BUILD_TYPE=Release -DEMBREE_TESTING_INTENSITY=3 cmake --build build --config Release --target test_package - nightly-linux-DG2-INTERNAL-L0RTAS-build: - secrets: inherit - uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main - with: - image: embree/ubuntu:22.04 - runs-on: '[ "Linux", "docker", "build" ]' - project: embree - dpcpp-version: intel-llvm/nightly-2023-12-18-rk - artifact-out: nightly-linux-DG2-INTERNAL-L0RTAS-build - artifact-path: ./build/*.tar.gz - cmd: | - module load cmake/3.25.3 - cmake --preset nightly-linux-DG2-INTERNAL-L0RTAS -DCMAKE_BUILD_TYPE=Release -DEMBREE_TESTING_INTENSITY=3 - cmake --build build --config Release --target build - - linux-DG2-JIT-INTERNAL-L0RTAS-test: - secrets: inherit - uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main - needs: ["nightly-linux-DG2-INTERNAL-L0RTAS-build"] - with: - image: embree/ubuntu:22.04 - options: --device=/dev/dri:/dev/dri - runs-on: '[ "Linux", "docker", "dg2" ]' - project: embree - env-from-files: ./.github/workflows/gfx-ubuntu22-public.env - artifact-in: nightly-linux-DG2-INTERNAL-L0RTAS-build - cmd: | - module load cmake/3.25.3 - cmake --preset nightly-linux-DG2-INTERNAL-L0RTAS -DCMAKE_BUILD_TYPE=Release -DEMBREE_TESTING_INTENSITY=3 - cmake --build build --config Release --target test_package - linux-DG2-JIT-PUBLIC-sycl-RC-build: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main @@ -381,18 +350,6 @@ jobs: cmake --preset nightly-windows-DG2-JIT -DCMAKE_BUILD_TYPE=Release -DEMBREE_TESTING_INTENSITY=4 cmake --build build --config Release --target test_package - windows-DG2-JIT-INTERNAL-VALIDATION_API: - secrets: inherit - uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main - with: - runs-on: '[ "Windows", "NAS", "dg2" ]' - project: embree - env-from-files: ./.github/workflows/dpcpp-sycl-nightly.env ./.github/workflows/gfx-windows-public.env - cmd: | - python scripts/test.py configure platform:x64 compiler:dpcpp isa:SSE2 build:Release EMBREE_SYCL_SUPPORT:ON sycl:none rt_validation_api:ON implicit_dispatch_globals:OFF L0RTAS:OFF intensity:3 tasking:TBB2020.3 - python scripts/test.py build - python scripts/test.py test - windows-DG2-JIT-sycl-RC-build: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main diff --git a/CMakeLists.txt b/CMakeLists.txt index d9536b202f..ead510159e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,11 +195,6 @@ IF (EMBREE_SYCL_SUPPORT) ENDIF() ENDIF() -CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_L0_RTAS_BUILDER "Enable Level Zero RTAS builder" ON "EMBREE_SYCL_SUPPORT" OFF) -IF (EMBREE_SYCL_L0_RTAS_BUILDER) - ADD_DEFINITIONS("-DEMBREE_SYCL_L0_RTAS_BUILDER") -ENDIF() - OPTION(EMBREE_RAY_MASK "Enables ray mask support." ON) OPTION(EMBREE_BACKFACE_CULLING "Enables backface culling.") OPTION(EMBREE_BACKFACE_CULLING_CURVES "Enables backface culling for curve primitives." OFF) diff --git a/CMakePresets.json b/CMakePresets.json index 624717c0e7..db547d9609 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -40,7 +40,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512", "EMBREE_USE_GOOGLE_BENCHMARK": "ON", @@ -83,7 +82,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } diff --git a/CMakeUserPresets.json b/CMakeUserPresets.json deleted file mode 100644 index c21be418c8..0000000000 --- a/CMakeUserPresets.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "version": 4, - "cmakeMinimumRequired": { - "major": 3, - "minor": 11, - "patch": 0 - }, - "configurePresets": [ - { - "name": "user-package", - "hidden": true, - "binaryDir": "${sourceDir}/build", - "cacheVariables": { - "BUILD_TESTING": "ON", - "EMBREE_TESTING_INSTALL_TESTS": "ON", - "EMBREE_ZIP_MODE": "ON", - "EMBREE_INSTALL_DEPENDENCIES": "ON", - "EMBREE_BUILD_GLFW_FROM_SOURCE": "ON", - "EMBREE_PATCH_GLFW_SOURCE": "ON", - "CMAKE_INSTALL_INCLUDEDIR": "include", - "CMAKE_INSTALL_LIBDIR": "lib", - "CMAKE_INSTALL_DOCDIR": "doc", - "CMAKE_INSTALL_BINDIR": "bin", - "CMAKE_INSTALL_TESTDIR": "testing" - } - }, - - { - "name": "user-package-linux", - "hidden": true, - "inherits": ["package", "linux"], - "cacheVariables": { - "CMAKE_SKIP_INSTALL_RPATH": "OFF" - } - }, - - - - - { - "name": "user", - "inherits": ["user-package-linux", "tbb2021_9_0"], - "binaryDir": "${sourceDir}/build", - "cacheVariables": { - "CMAKE_CXX_COMPILER": "g++", - "CMAKE_C_COMPILER": "gcc", - "CMAKE_BUILD_TYPE": "Release", - "EMBREE_TASKING_SYSTEM": "TBB", - "EMBREE_MAX_ISA": "SSE2" - } - }, - - { - "name": "user-sycl", - "inherits": ["package-windows", "env", "icx-windows", "ispc1_19_0", "tbb2021_9_0"], - "binaryDir": "${sourceDir}/build", - "cacheVariables": { - "CMAKE_BUILD_TYPE": "Release", - "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", - "EMBREE_SYCL_AOT_DEVICES": "none", - "EMBREE_MAX_ISA": "SSE2" - } - } - ] -} diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 5a528cc0ca..f45fe1bb3f 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -7,7 +7,6 @@ ENDIF() IF (EMBREE_SYCL_SUPPORT) ADD_SUBDIRECTORY(level_zero) - ADD_SUBDIRECTORY(rthwif) ENDIF() IF (EMBREE_CONFIG) diff --git a/kernels/common/device.cpp b/kernels/common/device.cpp index 60252466a2..32f92feafb 100644 --- a/kernels/common/device.cpp +++ b/kernels/common/device.cpp @@ -662,7 +662,6 @@ namespace embree if (result != ZE_RESULT_SUCCESS) throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed"); -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) bool ze_rtas_builder = false; for (uint32_t i=0; i #include #include #include #include -ZeWrapper::RTAS_BUILD_MODE ZeWrapper::rtas_builder = ZeWrapper::AUTO; +bool ZeWrapper::rtas_builder_selected = false; static std::mutex zeWrapperMutex; static void* handle = nullptr; @@ -92,7 +90,8 @@ ZeWrapper::~ZeWrapper() { ze_result_t selectLevelZeroRTASBuilder(ze_driver_handle_t hDriver) { - if (ZeWrapper::rtas_builder == ZeWrapper::LEVEL_ZERO) + /* only select rtas builder once! */ + if (ZeWrapper::rtas_builder_selected) return ZE_RESULT_SUCCESS; auto zeRTASBuilderCreateExpTemp = find_symbol(handle,"zeRTASBuilderCreateExp"); @@ -121,33 +120,10 @@ ze_result_t selectLevelZeroRTASBuilder(ze_driver_handle_t hDriver) zeRTASParallelOperationGetPropertiesExpInternal = find_symbol(handle,"zeRTASParallelOperationGetPropertiesExp"); zeRTASParallelOperationJoinExpInternal = find_symbol(handle,"zeRTASParallelOperationJoinExp"); - ZeWrapper::rtas_builder = ZeWrapper::LEVEL_ZERO; + ZeWrapper::rtas_builder_selected = true; return ZE_RESULT_SUCCESS; } -void selectInternalRTASBuilder() -{ -#if defined(ZE_RAYTRACING_DISABLE_INTERNAL_BUILDER) - throw std::runtime_error("internal builder disabled at compile time"); -#else - if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL) - return; - - zeRTASBuilderCreateExpInternal = &zeRTASBuilderCreateExpImpl; - zeRTASBuilderDestroyExpInternal = &zeRTASBuilderDestroyExpImpl; - zeDriverRTASFormatCompatibilityCheckExpInternal = &zeDriverRTASFormatCompatibilityCheckExpImpl; - zeRTASBuilderGetBuildPropertiesExpInternal = &zeRTASBuilderGetBuildPropertiesExpImpl; - zeRTASBuilderBuildExpInternal = &zeRTASBuilderBuildExpImpl; - - zeRTASParallelOperationCreateExpInternal = &zeRTASParallelOperationCreateExpImpl; - zeRTASParallelOperationDestroyExpInternal = &zeRTASParallelOperationDestroyExpImpl; - zeRTASParallelOperationGetPropertiesExpInternal = &zeRTASParallelOperationGetPropertiesExpImpl; - zeRTASParallelOperationJoinExpInternal = &zeRTASParallelOperationJoinExpImpl; - - ZeWrapper::rtas_builder = ZeWrapper::INTERNAL; -#endif -} - ze_result_t ZeWrapper::init() { std::lock_guard lock(zeWrapperMutex); @@ -171,42 +147,12 @@ ze_result_t ZeWrapper::init() return ZE_RESULT_SUCCESS; } -ze_result_t ZeWrapper::initRTASBuilder(ze_driver_handle_t hDriver, RTAS_BUILD_MODE rtas_build_mode) +ze_result_t ZeWrapper::initRTASBuilder(ze_driver_handle_t hDriver) { std::lock_guard lock(zeWrapperMutex); - /* only select rtas builder once! */ - if (rtas_builder != RTAS_BUILD_MODE::AUTO) - { - if (rtas_build_mode == RTAS_BUILD_MODE::AUTO) - return ZE_RESULT_SUCCESS; - - if (rtas_builder == rtas_build_mode) - return ZE_RESULT_SUCCESS; - - return ZE_RESULT_ERROR_UNKNOWN; - } - try { - - if (rtas_build_mode == RTAS_BUILD_MODE::AUTO) - { - try { - if (selectLevelZeroRTASBuilder(hDriver) != ZE_RESULT_SUCCESS) - selectInternalRTASBuilder(); - } catch (std::exception& e) { - selectInternalRTASBuilder(); - } - } - - else if (rtas_build_mode == RTAS_BUILD_MODE::INTERNAL) - selectInternalRTASBuilder(); - - else if (rtas_build_mode == RTAS_BUILD_MODE::LEVEL_ZERO) - return selectLevelZeroRTASBuilder(hDriver); - - else - throw std::runtime_error("internal error"); + return selectLevelZeroRTASBuilder(hDriver); } catch (std::exception& e) { return ZE_RESULT_ERROR_UNKNOWN; @@ -290,7 +236,7 @@ ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_r /* fill properties */ pProperties->flags = 0; - pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_INVALID; + pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_FORMAT_EXP_INVALID; pProperties->rtasBufferAlignment = 128; /* check for supported device ID */ @@ -307,7 +253,7 @@ ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_r /* disabling of device check through env variable */ const char* disable_device_check = std::getenv("EMBREE_DISABLE_DEVICEID_CHECK"); if (disable_device_check && strcmp(disable_device_check,"1") == 0) { - pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; + pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } @@ -320,7 +266,7 @@ ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_r (0x56C0 <= device_id && device_id <= 0x56C1); if (dg2) { - pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; + pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } @@ -332,7 +278,7 @@ ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_r (device_id == 0x0BD4); if (pvc) { - pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; + pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } @@ -345,7 +291,7 @@ ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_r (device_id == 0x7D60); if (mtl) { - pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; + pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } @@ -356,21 +302,6 @@ ze_result_t ZeWrapper::zeDeviceGetProperties(ze_device_handle_t ze_handle, ze_de { if (!handle || !zeDeviceGetPropertiesInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); - - if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL) - { - if (props->pNext && ((ze_base_properties_t*)props->pNext)->stype == ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES) - { - ze_result_t result = zeDeviceGetRTASPropertiesExp(ze_handle, (ze_rtas_device_exp_properties_t*)props->pNext); - if (result != ZE_RESULT_SUCCESS) return result; - - void* pNext = props->pNext; - props->pNext = ((ze_base_properties_t*)props->pNext)->pNext; - result = zeDeviceGetPropertiesInternal(ze_handle, props); - props->pNext = pNext; - return result; - } - } return zeDeviceGetPropertiesInternal(ze_handle, props); } diff --git a/kernels/level_zero/ze_wrapper.h b/kernels/level_zero/ze_wrapper.h index f4f409ed5a..f378eb3163 100644 --- a/kernels/level_zero/ze_wrapper.h +++ b/kernels/level_zero/ze_wrapper.h @@ -27,16 +27,10 @@ typedef struct _ze_rtas_builder_build_op_debug_exp_desc_t struct ZeWrapper { - enum RTAS_BUILD_MODE { - AUTO = 0, // try L0 implementation first and fallback to internal implementation - INTERNAL = 1, // use internal RTAS build implementation - LEVEL_ZERO = 2, // use Level Zero provided RTAS build implementation - }; - ~ZeWrapper(); static ze_result_t init(); - static ze_result_t initRTASBuilder(ze_driver_handle_t hDriver, RTAS_BUILD_MODE rtas_build_mode = RTAS_BUILD_MODE::AUTO); + static ze_result_t initRTASBuilder(ze_driver_handle_t hDriver); static ze_result_t zeMemFree(ze_context_handle_t, void*); static ze_result_t zeMemAllocHost(ze_context_handle_t, const ze_host_mem_alloc_desc_t*, size_t, size_t, void**); @@ -66,6 +60,6 @@ struct ZeWrapper static ze_result_t zeRTASParallelOperationGetPropertiesExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ); static ze_result_t zeRTASParallelOperationJoinExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation); - static RTAS_BUILD_MODE rtas_builder; + static bool rtas_builder_selected; }; diff --git a/kernels/rthwif/CMakeLists.txt b/kernels/rthwif/CMakeLists.txt deleted file mode 100644 index 8a1e359318..0000000000 --- a/kernels/rthwif/CMakeLists.txt +++ /dev/null @@ -1,179 +0,0 @@ -## Copyright 2009-2021 Intel Corporation -## SPDX-License-Identifier: Apache-2.0 - -cmake_minimum_required(VERSION 3.5) - -project(ze_raytracing) - -INCLUDE(CTest) - -SET(RTHWIF_VERSION_MAJOR 4) -SET(RTHWIF_VERSION_MINOR 1) -SET(RTHWIF_VERSION_PATCH 0) -SET(RTHWIF_VERSION ${RTHWIF_VERSION_MAJOR}.${RTHWIF_VERSION_MINOR}.${RTHWIF_VERSION_PATCH}) - -SET(CMAKE_CXX_STANDARD 17) - -IF (NOT DEFINED EMBREE_VERSION_MAJOR) - - SET(RTHWIF_STANDALONE ON) - SET(RTHWIF_NAME ze_raytracing) - ADD_DEFINITIONS("-DRTHWIF_STANDALONE") - - SET(EMBREE_CMAKEEXPORT_DIR "cmake") - - OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON) - - SET(EMBREE_RTHWIF_STATIC_LIB OFF) - SET(EMBREE_BUILDER_TBB_STATIC ON) - - SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") - SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") - SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") - SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" ${CMAKE_MODULE_PATH}) - - CONFIGURE_FILE( - "${PROJECT_SOURCE_DIR}/../../kernels/config.h.in" - "${PROJECT_SOURCE_DIR}/../../kernels/config.h" - ) - - SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT 1) - CONFIGURE_FILE( - "${PROJECT_SOURCE_DIR}/../../kernels/rtcore_config.h.in" - "${PROJECT_SOURCE_DIR}/../../include/embree4/rtcore_config.h" - ) - - IF (NOT WIN32) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries - ENDIF() - - ADD_SUBDIRECTORY(../../common/sys sys) - ADD_SUBDIRECTORY(../../common/simd simd) - - GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?) - - find_package(TBB 2020) - -ELSE() - SET(RTHWIF_NAME embree_rthwif) - OPTION(EMBREE_RTHWIF_STATIC_LIB "Build RTHWIF as a static library." ON) - option(EMBREE_BUILDER_TBB_STATIC "Use a staticaly compiled TBB version for the Embree builder for GPU." OFF) -ENDIF() - -IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS) - ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS") -ENDIF() - -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning -SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning - -if (EMBREE_RTHWIF_STATIC_LIB) - set(RTHWIF_LIB_TYPE STATIC) -else() - set(RTHWIF_LIB_TYPE SHARED) -endif() - -# by default link against the tasking target that has all TBB related -# information we need when TASKING_TBB is used -set(TBB_TARGET tasking) - -if (EMBREE_BUILDER_TBB_STATIC OR NOT TASKING_TBB) - - #################################################################### - # fetch TBB and build static version of it - set(TBB_TARGET tbb) - ADD_DEFINITIONS("-D_CRT_SECURE_NO_WARNINGS") - - option(TBB_STRICT "Treat compiler warnings as errors" OFF) - option(TBB_TEST "Enable testing" OFF) - option(TBBMALLOC_BUILD "Enable tbbmalloc build" OFF) - SET(TBB_DIR OFF) - SET(BUILD_SHARED_LIBS OFF) - - INCLUDE(FetchContent) - - SET(FETCHCONTENT_QUIET OFF) - - IF (NOT EMBREE_RTHWIF_TBB_GIT_REPOSITORY) # allow setting this externally - SET(EMBREE_RTHWIF_TBB_GIT_REPOSITORY "https://github.com/oneapi-src/oneTBB.git") - ENDIF() - - FetchContent_Declare( - tbb_static - GIT_REPOSITORY ${EMBREE_RTHWIF_TBB_GIT_REPOSITORY} - GIT_TAG v2021.6.0 - ) - - FetchContent_GetProperties(tbb_static) - if(NOT tbb_static_POPULATED) - FetchContent_Populate(tbb_static) - # We want to build tbb_static to link it into embree_rthwif, but don't want to - # install it as part of the Embree install targets. - add_subdirectory(${tbb_static_SOURCE_DIR} ${tbb_static_BINARY_DIR} EXCLUDE_FROM_ALL) - endif() - - MARK_AS_ADVANCED(FETCHCONTENT_BASE_DIR) - MARK_AS_ADVANCED(FETCHCONTENT_FULLY_DISCONNECTED) - MARK_AS_ADVANCED(FETCHCONTENT_QUIET) - MARK_AS_ADVANCED(FETCHCONTENT_SOURCE_DIR_TBB_STATIC) - MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED) - MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED_TBB_STATIC) - - MARK_AS_ADVANCED(TBB4PY_BUILD) - MARK_AS_ADVANCED(TBBMALLOC_BUILD) - MARK_AS_ADVANCED(TBB_BUILD) - MARK_AS_ADVANCED(TBB_CPF) - MARK_AS_ADVANCED(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH) - MARK_AS_ADVANCED(TBB_ENABLE_IPO) - MARK_AS_ADVANCED(TBB_EXAMPLES) - MARK_AS_ADVANCED(TBB_FIND_PACKAGE) - MARK_AS_ADVANCED(TBB_INSTALL_VARS) - MARK_AS_ADVANCED(TBB_NO_APPCONTAINER) - MARK_AS_ADVANCED(TBB_SANITIZE) - MARK_AS_ADVANCED(TBB_STRICT) - MARK_AS_ADVANCED(TBB_TEST) - MARK_AS_ADVANCED(TBB_TEST_SPEC) - MARK_AS_ADVANCED(TBB_VALGRIND_MEMCHECK) - MARK_AS_ADVANCED(TBB_WINDOWS_DRIVER) - - ADD_DEFINITIONS(-DTASKING_TBB) - #################################################################### -ENDIF() - -IF (RTHWIF_STANDALONE) - include(package_ze_raytracing) - INCLUDE(CPack) -ENDIF() - -IF (EMBREE_SYCL_RT_VALIDATION_API) - ADD_LIBRARY(embree_rthwif_sycl STATIC rttrace/rttrace_validation.cpp) - SET_PROPERTY(TARGET embree_rthwif_sycl APPEND PROPERTY COMPILE_FLAGS "-DEMBREE_SYCL_SUPPORT") - SET_TARGET_PROPERTIES(embree_rthwif_sycl PROPERTIES COMPILE_FLAGS ${CMAKE_CXX_FLAGS_SYCL}) - - INSTALL(TARGETS embree_rthwif_sycl EXPORT embree_rthwif_sycl-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) - INSTALL(EXPORT embree_rthwif_sycl-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) - - SET(EMBREE_RTHWIF_SYCL embree_rthwif_sycl) -ENDIF() - -IF (NOT EMBREE_SYCL_L0_RTAS_BUILDER) -ADD_LIBRARY(embree_rthwif ${RTHWIF_LIB_TYPE} rtbuild/rtbuild.cpp rtbuild/qbvh6.cpp rtbuild/statistics.cpp) -TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE ${TBB_TARGET} simd sys) -SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES OUTPUT_NAME ${RTHWIF_NAME}) -IF (EMBREE_RTHWIF_STATIC_LIB) - TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_RTHWIF_STATIC_LIB) -ENDIF() - -TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_SYCL_SUPPORT) - -IF (EMBREE_STATIC_LIB OR NOT EMBREE_RTHWIF_STATIC_LIB) - INSTALL(TARGETS embree_rthwif EXPORT ${RTHWIF_NAME}-targets - LIBRARY NAMELINK_SKIP DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel) - INSTALL(EXPORT ${RTHWIF_NAME}-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) -ENDIF() -ENDIF() - -ADD_SUBDIRECTORY(testing) diff --git a/kernels/rthwif/package_ze_raytracing.cmake b/kernels/rthwif/package_ze_raytracing.cmake deleted file mode 100644 index 08555c8702..0000000000 --- a/kernels/rthwif/package_ze_raytracing.cmake +++ /dev/null @@ -1,62 +0,0 @@ -## Copyright 2009-2021 Intel Corporation -## SPDX-License-Identifier: Apache-2.0 - -INCLUDE(GNUInstallDirs) - -############################################################## -# Install Documentation -############################################################## - -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../LICENSE.txt" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../CHANGELOG.md" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs.txt" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-TBB.txt" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-OIDN.txt" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-DPCPP.txt" DESTINATION doc COMPONENT lib) -INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-oneAPI-DPCPP.txt" DESTINATION doc COMPONENT lib) - -############################################################## -# CPack specific stuff -############################################################## - -SET(CPACK_PACKAGE_NAME "L0 Ray Tracing Build API") -SET(CPACK_PACKAGE_FILE_NAME "ze_raytracing-${RTHWIF_VERSION}") -IF(NOT WIN32) - SET(CPACK_STRIP_FILES TRUE) -ENDIF() - -SET(CPACK_PACKAGE_VERSION_MAJOR ${EMBREE_VERSION_MAJOR}) -SET(CPACK_PACKAGE_VERSION_MINOR ${EMBREE_VERSION_MINOR}) -SET(CPACK_PACKAGE_VERSION_PATCH ${EMBREE_VERSION_PATCH}) -SET(CPACK_PACKAGE_VERSION ${EMBREE_VERSION}) -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Implements acceleration structure build for L0 ray tracing extension.") -SET(CPACK_PACKAGE_VENDOR "Intel Corporation") -SET(CPACK_PACKAGE_CONTACT embree_support@intel.com) -SET(CPACK_MONOLITHIC_INSTALL 1) - -SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library") -SET(CPACK_COMPONENT_LIB_DESCRIPTION "Library") - -SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development") -SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Development") - -SET(CPACK_COMPONENT_EXAMPLES_DISPLAY_NAME "Examples") -SET(CPACK_COMPONENT_EXAMPLES_DESCRIPTION "Examples") - -# Windows specific settings -IF(WIN32) - SET(CPACK_GENERATOR ZIP) - SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x64.windows") - -# MacOSX specific settings -ELSEIF(APPLE) - SET(CPACK_GENERATOR ZIP) - SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx") - -# Linux specific settings -ELSE() - - SET(CPACK_GENERATOR TGZ) - SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux") - -ENDIF() diff --git a/kernels/rthwif/rtbuild/leaf.h b/kernels/rthwif/rtbuild/leaf.h deleted file mode 100644 index 0fced18031..0000000000 --- a/kernels/rthwif/rtbuild/leaf.h +++ /dev/null @@ -1,629 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#if defined(ZE_RAYTRACING) -#include "sys/sysinfo.h" -#include "sys/vector.h" -#include "math/vec2.h" -#include "math/vec3.h" -#include "math/bbox.h" -#include "math/affinespace.h" -#else -#include "../../../common/sys/sysinfo.h" -#include "../../../common/sys/vector.h" -#include "../../../common/math/vec2.h" -#include "../../../common/math/vec3.h" -#include "../../../common/math/bbox.h" -#include "../../../common/math/lbbox.h" -#include "../../../common/math/affinespace.h" -#endif - -#include "node_type.h" - -#include -#include - -namespace embree -{ - /* - - Internal representation for GeometryFlags. - - */ - -#undef OPAQUE // Windows defines OPAQUE in gdi.h - enum class GeometryFlags : uint32_t - { - NONE = 0x0, - OPAQUE = 0x1 - }; - - inline bool operator& (GeometryFlags a, GeometryFlags b) { - return (int(a) & int(b)) ? true : false; - } - - /* output operator for GeometryFlags */ - inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags) - { -#if !defined(__SYCL_DEVICE_ONLY__) - if (gflags == GeometryFlags::NONE) return cout << "NONE"; - if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE "; -#endif - return cout; - } - - /* - - This structure is a header for each leaf type. Only the - InstanceLeaf has a slightly different header. - - All primitives inside a leaf are of the same geometry, thus have - the same geometry index (geomIndex), the same shader index - (shaderIndex), the same geometry mask (geomMask), and the same - geometry flags (geomFlags). - - The shaderIndex is used to calculate the shader record to - invoke. This is an extension to DXR where the geomIndex is used - for that purpose. For DXR we can always set the shaderIndex to be - equal to the geomIndex. - - */ - - struct PrimLeafDesc - { - static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF; - static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF; - - enum Type : uint32_t - { - TYPE_NONE = 0, - - /* For a node type of NODE_TYPE_PROCEDURAL we support enabling - * and disabling the opaque/non_opaque culling. */ - - TYPE_OPACITY_CULLING_ENABLED = 0, - TYPE_OPACITY_CULLING_DISABLED = 1 - }; - - PrimLeafDesc() {} - - PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE) - : shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags) - { - if (shaderIndex > MAX_SHADER_INDEX) - throw std::runtime_error("too large shader ID"); - - if (geomIndex > MAX_GEOM_INDEX) - throw std::runtime_error("too large geometry ID"); - } - - /* compares two PrimLeafDesc's for equality */ - friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b) - { - if (a.geomIndex != b.geomIndex) return false; - assert(a.shaderIndex == b.shaderIndex); - assert(a.geomMask == b.geomMask); - assert(a.type == b.type); - assert(a.geomFlags == b.geomFlags); - return true; - } - - friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) { - return !(a == b); - } - - void print(std::ostream& cout, uint32_t depth) const - { -#if !defined(__SYCL_DEVICE_ONLY__) - cout << tab(depth) << "PrimLeafDesc {" << std::endl; - cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl; - cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl; - cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl; - cout << tab(depth) << " geomIndex = " << geomIndex << std::endl; - cout << tab(depth) << "}"; -#endif - } - - friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) { - desc.print(cout,0); return cout; - } - - /* Checks if opaque culling is enabled. */ - bool opaqueCullingEnabled() const { - return type == TYPE_OPACITY_CULLING_ENABLED; - } - - /* procedural instances store some valid shader index */ - bool isProceduralInstance() const { - return shaderIndex != 0xFFFFFF; - } - - /* returns geometry flags */ - GeometryFlags getGeomFlags() const { - return (GeometryFlags) geomFlags; - } - - public: - uint32_t shaderIndex : 24; // shader index used for shader record calculations - uint32_t geomMask : 8; // geometry mask used for ray masking - - uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene - /*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances - /*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry - }; - - /* - - The QuadLeaf structure stores a single quad. A quad is a triangle - pair with a shared edge. The first triangle has vertices v0,v1,v2, - while the second triangle has vertices v[j0],v[j1],v[j2], thus the - second triangle used local triangle indices. - - */ - - struct QuadLeaf - { - QuadLeaf() {} - - QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3, - uint8_t j0, uint8_t j1, uint8_t j2, - uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1, - GeometryFlags gflags, uint32_t geomMask, bool last) - - : leafDesc(shaderIndex,geomIndex,gflags,geomMask), - primIndex0(primIndex0), - primIndex1Delta(primIndex1-primIndex0), pad1(0), - j0(j0),j1(j1),j2(j2),last(last),pad(0), - v0(v0), v1(v1), v2(v2), v3(v3) - { - /* There are some constraints on the primitive indices. The - * second primitive index always has to be the largest and the - * distance between them can be at most 0xFFFF as we use 16 bits - * to encode that difference. */ - assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF); - } - - /* returns the i'th vertex */ - __forceinline Vec3f vertex(size_t i) const { - assert(i < 4); return (&v0)[i]; - } - - /* Checks if the specified triange is the last inside a leaf - * list. */ - bool isLast(uint32_t i = 1) const - { - assert(i<2); - if (i == 0) return false; // the first triangle is never the last - else return last; // the last bit tags the second triangle to be last - } - - /* Checks if the second triangle exists. */ - bool valid2() const { - return !(j0 == 0 && j1 == 0 && j2 == 0); - } - - /* Calculates the number of stored triangles. */ - size_t size() const { - return 1 + valid2(); - } - - /* Calculates the effectively used bytes. If we store only one - * triangle we waste the storage of one vertex. */ - size_t usedBytes() const - { - if (valid2()) return sizeof(QuadLeaf); - else return sizeof(QuadLeaf)-sizeof(Vec3f); - } - - /* Calculates to delta to add to primIndex0 to get the primitive - * index of the i'th triangle. */ - uint32_t primIndexDelta(uint32_t i) const - { - assert(i<2); - return i*primIndex1Delta; - } - - /* Calculates the primitive index of the i'th triangle. */ - uint32_t primIndex(uint32_t i) const - { - assert(i<2); - return primIndex0 + primIndexDelta(i); - } - - /* Quad mode is a special mode where the uv's over the quad are - * defined over the entire range [0,1]x[0,1]. */ - bool quadMode() const { - return primIndex1Delta == 0; - } - - /* Calculates the bounding box of this leaf. */ - BBox3f bounds() const - { - BBox3f b = empty; - b.extend(v0); - b.extend(v1); - b.extend(v2); - if (valid2()) - b.extend(v3); - return b; - } - - /* output of quad leaf */ - void print(std::ostream& cout, uint32_t depth) const - { -#if !defined(__SYCL_DEVICE_ONLY__) - cout << tab(depth) << "QuadLeaf {" << std::endl; - cout << tab(depth) << " addr = " << this << std::endl; - cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl; - cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl; - cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl; - cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl; - cout << tab(depth) << " triangle0 = { " << std::endl; - cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl; - cout << tab(depth) << " v0 = " << v0 << std::endl; - cout << tab(depth) << " v1 = " << v1 << std::endl; - cout << tab(depth) << " v2 = " << v2 << std::endl; - cout << tab(depth) << " }" << std::endl; - if (valid2()) { - cout << tab(depth) << " triangle1 = { " << std::endl; - cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl; - cout << tab(depth) << " v0 = " << vertex(j0) << std::endl; - cout << tab(depth) << " v1 = " << vertex(j1) << std::endl; - cout << tab(depth) << " v2 = " << vertex(j2) << std::endl; - cout << tab(depth) << " }" << std::endl; - } - cout << tab(depth) << "}"; -#endif - } - - /* output operator for QuadLeaf */ - friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) { - leaf.print(cout,0); return cout; - } - - public: - PrimLeafDesc leafDesc; // the leaf header - - uint32_t primIndex0; // primitive index of first triangle - struct { - uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle - uint32_t pad1 : 11; // MBZ - uint32_t j0 : 2; // specifies first vertex of second triangle - uint32_t j1 : 2; // specified second vertex of second triangle - uint32_t j2 : 2; // specified third vertex of second triangle - uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list - uint32_t pad : 9; // unused bits - }; - - Vec3f v0; // first vertex of first triangle - Vec3f v1; // second vertex of first triangle - Vec3f v2; // third vertex of first triangle - Vec3f v3; // forth vertex used for second triangle - }; - - static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large"); - - /* - - Internal instance flags definition. - - */ - - struct InstanceFlags - { - enum Flags : uint8_t - { - NONE = 0x0, - TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags - TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles - FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque - FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque - }; - - InstanceFlags() {} - - InstanceFlags(Flags rflags) - : flags(rflags) {} - - InstanceFlags(uint8_t rflags) - : flags((Flags)rflags) {} - - operator Flags () const { - return flags; - } - - /* output operator for InstanceFlags */ - friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags) - { -#if !defined(__SYCL_DEVICE_ONLY__) - if (iflags == InstanceFlags::NONE) return cout << "NONE"; - if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE "; - if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE "; - if (iflags.force_opaque) cout << "FORCE_OPAQUE "; - if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE "; -#endif - return cout; - } - - public: - union - { - Flags flags; - struct - { - bool triangle_cull_disable : 1; - bool triangle_front_counterclockwise : 1; - bool force_opaque : 1; - bool force_non_opaque : 1; - }; - }; - }; - - inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) { - return (InstanceFlags::Flags)(int(a) | int(b)); - } - - /* - - The instance leaf represent an instance. It essentially stores - transformation matrices (local to world as well as world to - local) of the instance as well as a pointer to the start node - of some BVH. - - The instance leaf consists of two parts, part0 (first 64 bytes) - and part1 (second 64 bytes). Part0 will only get accessed by - hardware and stores the world to local transformation as well as - the BVH node to start traversal. Part1 stores additional data - that is only read by the shader, e.g. it stores the local to - world transformation of the instance. - - The layout of the first part of the InstanceLeaf is compatible - with a ProceduralLeaf, thus we can use the same layout for - software instancing if we want. - - */ - - struct InstanceLeaf - { - InstanceLeaf() {} - - InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask) - { - part0.shaderIndex = 0; //InstShaderRecordID; - part0.geomMask = instMask; - - part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex; - part0.pad0 = 0; - part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED; - part0.geomFlags = (uint32_t) GeometryFlags::NONE; - - part0.startNodePtr = startNodePtr; - assert((startNodePtr >> 48) == 0); - part0.instFlags = (InstanceFlags) 0; - part0.pad1 = 0; - - part1.instanceID = instUserID; - part1.instanceIndex = instID; - part1.bvhPtr = (uint64_t) 0; - part1.pad = 0; - - part1.obj2world_vx = obj2world.l.vx; - part1.obj2world_vy = obj2world.l.vy; - part1.obj2world_vz = obj2world.l.vz; - part0.obj2world_p = obj2world.p; - - const AffineSpace3f world2obj = rcp(obj2world); - part0.world2obj_vx = world2obj.l.vx; - part0.world2obj_vy = world2obj.l.vy; - part0.world2obj_vz = world2obj.l.vz; - part1.world2obj_p = world2obj.p; - } - - /* Returns the address of the start node pointer. We need this - * address to calculate relocation tables when dumping the BVH to - * disk. */ - const uint64_t startNodePtrAddr() const { - return (uint64_t)((char*)&part0 + 8); - } - - /* Returns the address of the BVH that contains the start node. */ - const uint64_t bvhPtrAddr() const { - return (uint64_t)&part1; - } - - /* returns the world to object space transformation matrix. */ - const AffineSpace3f World2Obj() const { - return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p); - } - - /* returns the object to world space transformation matrix. */ - const AffineSpace3f Obj2World() const { - return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p); - } - - /* output operator for instance leaf */ - void print (std::ostream& cout, uint32_t depth) const - { -#if !defined(__SYCL_DEVICE_ONLY__) - if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl; - else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl; - - cout << tab(depth) << " addr = " << this << std::endl; - cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl; - cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl; - cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl; - cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl; - cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl; - cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl; - cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl; - cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl; - cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl; - cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl; - cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl; - cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl; - cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl; - cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl; - cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl; - cout << tab(depth) << "}"; -#endif - } - - /* output operator for InstanceLeaf */ - friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) { - leaf.print(cout,0); return cout; - } - - /* first 64 bytes accessed during traversal by hardware */ - struct Part0 - { - /* Checks if opaque culling is enabled. */ - bool opaqueCullingEnabled() const { - return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED; - } - - public: - uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing - uint32_t geomMask : 8; // geometry mask used for ray masking - - uint32_t instanceContributionToHitGroupIndex : 24; - uint32_t pad0 : 5; - - /* the following two entries are only used for procedural instances */ - /*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling - /*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances - - uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object - uint64_t instFlags : 8; // flags for the instance (see InstanceFlags) - uint64_t pad1 : 8; // unused bits - - Vec3f world2obj_vx; // 1st column of Worl2Obj transform - Vec3f world2obj_vy; // 2nd column of Worl2Obj transform - Vec3f world2obj_vz; // 3rd column of Worl2Obj transform - Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes) - } part0; - - /* second 64 bytes accessed during shading */ - struct Part1 - { - uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too - uint64_t pad : 16; // unused bits - - uint32_t instanceID; // user defined value per DXR spec - uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) - - Vec3f obj2world_vx; // 1st column of Obj2World transform - Vec3f obj2world_vy; // 2nd column of Obj2World transform - Vec3f obj2world_vz; // 3rd column of Obj2World transform - Vec3f world2obj_p; // translation of World2Obj transform - } part1; - }; - - static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large"); - - - /* - Leaf type for procedural geometry. This leaf only contains the - leaf header (which identifices the geometry) and a list of - primitive indices. - - The BVH will typically reference only some of the primitives - stores inside this leaf. The range is specified by a start - primitive and the last primitive is tagged with a bit. - - */ - - struct ProceduralLeaf - { - static const uint32_t N = 13; - - /* Creates an empty procedural leaf. */ - ProceduralLeaf () - : leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0) - { - for (auto& id : _primIndex) id = 0xFFFFFFFF; - } - - /* Creates a procedural leaf with one primitive. More primitives - * of the same geometry can get added later using the add - * function. */ - - ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last) - : leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE) - { - for (auto& id : _primIndex) id = 0xFFFFFFFF; - _primIndex[0] = primIndex; - } - - /* returns the number of primitives stored inside this leaf */ - uint32_t size() const { - return numPrimitives; - } - - /* Calculates the effectively used bytes. */ - size_t usedBytes() const - { - /*if (leafDesc.isProceduralInstance()) - return sizeof(InstanceLeaf); - else*/ - return sizeof(PrimLeafDesc)+4+4*numPrimitives; - } - - /* if possible adds a new primitive to this leaf */ - bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in) - { - assert(primIndex_in != 0xFFFFFFFF); - if (numPrimitives >= N) return false; - if (!numPrimitives) leafDesc = leafDesc_in; - if (leafDesc != leafDesc_in) return false; - _primIndex[numPrimitives] = primIndex_in; - if (last_in) last |= 1 << numPrimitives; - else last &= ~(1 << numPrimitives); - numPrimitives++; - return true; - } - - /* returns the primitive index of the i'th primitive */ - uint32_t primIndex(uint32_t i) const - { - assert(i < N); - return _primIndex[i]; - } - - /* checks if the i'th primitive is the last in a leaf list */ - bool isLast(uint32_t i) const { - if (i >= N) return true; // just to make some verify tests happy - else return (last >> i) & 1; - } - - /* output operator for procedural leaf */ - void print (std::ostream& cout, uint32_t i, uint32_t depth) const - { -#if !defined(__SYCL_DEVICE_ONLY__) - cout << tab(depth) << "ProceduralLeaf {" << std::endl; - cout << tab(depth) << " addr = " << this << std::endl; - cout << tab(depth) << " slot = " << i << std::endl; - if (i < N) { - cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl; - cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl; - cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl; - cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl; - cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl; - } else { - cout << tab(depth) << " INVALID" << std::endl; - } - cout << tab(depth) << "}"; -#endif - } - - public: - PrimLeafDesc leafDesc; // leaf header identifying the geometry - uint32_t numPrimitives : 4; // number of stored primitives - uint32_t pad : 32-4-N; - uint32_t last : N; // bit vector with a last bit per primitive - uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf - }; - - static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large"); -} diff --git a/kernels/rthwif/rtbuild/node_type.h b/kernels/rthwif/rtbuild/node_type.h deleted file mode 100644 index 3a18e7c702..0000000000 --- a/kernels/rthwif/rtbuild/node_type.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include - -namespace embree -{ - /* The type of a node. */ - enum NodeType : uint8_t - { - NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type - NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children - NODE_TYPE_INSTANCE = 0x1, // instance leaf - NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf - NODE_TYPE_QUAD = 0x4, // quad leaf - NODE_TYPE_INVALID = 0x7 // indicates invalid node - }; - - /* output operator for NodeType */ - inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type) - { -#if !defined(__RTRT_GSIM) - switch (_type) - { - case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break; - case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break; - case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break; - case NODE_TYPE_QUAD: _cout << "QUAD"; break; - case NODE_TYPE_INVALID: _cout << "INVALID"; break; - default: _cout << "INVALID NODE TYPE"; break; - } -#endif - return _cout; - }; - - /* - Sub-type definition for each NodeType - */ - - enum SubType : uint8_t - { - SUB_TYPE_NONE = 0, - - /* sub-type for NODE_TYPE_INTERNAL */ - SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children - - /* Sub-type for NODE_TYPE_QUAD */ - SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes) - - /* Sub-type for NODE_TYPE_PROCEDURAL */ - SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf - }; -} diff --git a/kernels/rthwif/rtbuild/qbvh6.cpp b/kernels/rthwif/rtbuild/qbvh6.cpp deleted file mode 100644 index 19c438dfb2..0000000000 --- a/kernels/rthwif/rtbuild/qbvh6.cpp +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "qbvh6.h" - -namespace embree -{ - template - void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area) - { - InternalNode* inner = node.innerNode(); - - size_t size = 0; - for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++) - { - if (inner->valid(i)) - { - size++; - computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN); - } - } - - /* update BVH statistics */ - stats.internalNode.numNodes++; - stats.internalNode.numChildrenUsed += size; - stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN; - stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area; - stats.internalNode.numBytes += sizeof(InternalNode); - } - - void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren) - { - switch (node.type) - { - case NODE_TYPE_INSTANCE: - { - stats.instanceLeaf.numLeaves++; - stats.instanceLeaf.numPrimsUsed++; - stats.instanceLeaf.numPrimsTotal++; - stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; - stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf); - stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf); - break; - } - case NODE_TYPE_QUAD: - { - bool last = false; - stats.quadLeaf.numLeaves++; - - do - { - QuadLeaf* quad = node.leafNodeQuad(); - node.node += sizeof(QuadLeaf); - last = quad->isLast(); - - stats.quadLeaf.numPrimsUsed += quad->size(); - stats.quadLeaf.numPrimsTotal += 2; - stats.quadLeaf.numBytesUsed += quad->usedBytes(); - stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf); - stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area; - - } while (!last); - - break; - } - case NODE_TYPE_PROCEDURAL: - { - /*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!? - { - stats.proceduralLeaf.numLeaves++; - stats.proceduralLeaf.numPrimsUsed += 1; - stats.proceduralLeaf.numPrimsTotal += 1; - stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; - stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf); - stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf); - } - else*/ - { - bool last = false; - uint32_t currPrim = node.cur_prim; - stats.proceduralLeaf.numLeaves++; - - do - { - ProceduralLeaf* leaf = node.leafNodeProcedural(); - last = leaf->isLast(currPrim); - - if (currPrim == 0) { - stats.proceduralLeaf.numBlocks++; - stats.proceduralLeaf.numBytesUsed += leaf->usedBytes(); - stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf); - } - - uint32_t primsInBlock = leaf->size(); - - stats.proceduralLeaf.numPrimsUsed++; - stats.proceduralLeaf.numPrimsTotal++; - stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; - - if (++currPrim >= primsInBlock) { - currPrim = 0; - node.node += sizeof(ProceduralLeaf); - } - - } while (!last); - } - break; - } - case NODE_TYPE_INTERNAL: - { - computeInternalNodeStatistics(stats, node, time_range, node_bounds_area, root_bounds_area); - break; - } - default: - assert(false); - } - } - - BVHStatistics QBVH6::computeStatistics() const - { - BVHStatistics stats; - if (empty()) return stats; - embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6); - return stats; - } - - template - void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren) - { - QInternalNode* inner = node.innerNode(); - inner->print(cout, depth, false); - std::cout << std::endl; - - for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++) - { - if (inner->valid(i)) - print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN); - } - - cout << tab(depth) << "}" << std::endl; - } - - void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren) - { - switch (node.type) - { - case NODE_TYPE_INSTANCE: { - node.leafNodeInstance()->print(cout,depth); - cout << std::endl; - break; - } - case NODE_TYPE_QUAD: - { - std::cout << tab(depth) << "List {" << std::endl; - - bool last = false; - - do - { - QuadLeaf* quad = node.leafNodeQuad(); - node.node += sizeof(QuadLeaf); - last = quad->isLast(); - - quad->print(cout,depth+1); - std::cout << std::endl; - - } while (!last); - - std::cout << tab(depth) << "}" << std::endl; - break; - } - case NODE_TYPE_PROCEDURAL: - { - /*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled()) - { - InstanceLeaf* leaf = (InstanceLeaf*) node.node; - leaf->print(cout,depth+1); - std::cout << std::endl; - } - else*/ - { - std::cout << tab(depth) << "List {" << std::endl; - - bool last = false; - uint32_t currPrim = node.cur_prim; - - do - { - ProceduralLeaf* leaf = node.leafNodeProcedural(); - last = leaf->isLast(currPrim); - - uint32_t primsInBlock = leaf->size(); - - leaf->print(cout,currPrim,depth+1); - std::cout << std::endl; - - if (++currPrim >= primsInBlock) { - currPrim = 0; - node.node += sizeof(ProceduralLeaf); - } - - } while (!last); - - std::cout << tab(depth) << "}" << std::endl; - } - break; - } - case NODE_TYPE_INTERNAL: - { - printInternalNodeStatistics(cout, node, depth, numChildren); - break; - } - default: - std::cout << "{ INVALID_NODE }" << std::endl; - //assert(false); - } - } - - unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function - return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart); - } - - unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function - return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned); - } - - uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function - return 64 * uint64_t(base->nodeDataStart + idx); - } - - uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function - return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6)); - } - - void QBVH6::print ( std::ostream& cout ) const - { - - cout << "QBVH @ "<< this <<" header: {\n"; - cout << " rootNodeOffset = " << rootNodeOffset << std::endl; - cout << " bounds = " << bounds << std::endl; - cout << " nodeDataStart = " << nodeDataStart << std::endl; - cout << " nodeDataCur = " << nodeDataCur << std::endl; - cout << " leafDataStart = " << leafDataCur << std::endl; - cout << " leafDataCur = " << leafDataCur << std::endl; - cout << " proceduralDataStart = " << proceduralDataStart << std::endl; - cout << " proceduralDataCur = " << proceduralDataCur << std::endl; - cout << " backPointerDataStart = " << backPointerDataStart << std::endl; - cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl; - cout << " numPrims = " << numPrims << std::endl; - cout << "}" << std::endl; - - if (empty()) return; - - print(cout,root(),0,6); - - if (hasBackPointers()) - { - cout << "backpointers: {\n"; - for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) { - cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n"; - } - cout << "}\n"; - } - } -} diff --git a/kernels/rthwif/rtbuild/qbvh6.h b/kernels/rthwif/rtbuild/qbvh6.h deleted file mode 100644 index 603da55c45..0000000000 --- a/kernels/rthwif/rtbuild/qbvh6.h +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "qnode.h" -#include "statistics.h" -#include "rtbuild.h" - -namespace embree -{ - /* - - The QBVH6 structure defines the bounding volume hierarchy (BVH) - that is used by the hardware. It is a BVH with 6-wide branching - factor, and quantized bounding boxes. At the leaf level quads - (QuadLeaf type), procedural geometries (ProceduralLeaf - type), and instances (InstanceLeaf type) can get referenced. - - */ - - inline constexpr size_t roundOffsetTo128(size_t offset) { - return 2 * ((offset + 127) / 128); - } - - struct QBVH6 - { - typedef NodeRef Node; - typedef InternalNode InternalNode6; - - static constexpr uint64_t rootNodeOffset = 128; - - static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large"); - - /* structure used to initialize the memory allocator inside the BVH */ - struct SizeEstimate - { - SizeEstimate () - : nodeBytes(0), leafBytes(0), proceduralBytes(0) {} - - SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes) - : nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {} - - size_t bytes() const { - return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes; - } - - friend bool operator<= (SizeEstimate a, SizeEstimate b) - { - if (a.nodeBytes > b.nodeBytes) return false; - if (a.leafBytes > b.leafBytes) return false; - if (a.proceduralBytes > b.proceduralBytes) return false; - return true; - } - - friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b) - { - return SizeEstimate(a.nodeBytes + b.nodeBytes, - a.leafBytes + b.leafBytes, - a.proceduralBytes + b.proceduralBytes); - } - - /* output operator */ - friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate) - { - cout << "SizeEstimate {" << std::endl; - cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl; - cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl; - cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl; - return cout << "}"; - } - - public: - size_t nodeBytes; // bytes required to store internal nodes - size_t leafBytes; // bytes required to store leaf nodes - size_t proceduralBytes; // bytes required to store procedural leaf nodes - }; - - /* Initializes a QBVH6 node with its provided size. The memory for - * the QBVH6 structure is overallocated and the allocation size is - * provided to the constructor, such that the allocator of the BVH - * can get initialized properly. */ - - QBVH6(SizeEstimate size) - : nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart), - leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart), - proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart), - backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart) - { - assert(size.nodeBytes % 64 == 0); - assert(size.leafBytes % 64 == 0); - assert(size.proceduralBytes % 64 == 0); - assert(size.bytes() <= (64LL << 32)); - - bounds = embree::empty; - } - - /* Returns the root node of the BVH */ - Node root() const { - return Node(rootNodeOffset,(uint64_t)this); - } - - /* sets root not offset to point to this specified node */ - void setRootNodeOffset(Node node) { - assert(node.cur_prim == 0); - uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this; - assert(rootNodeOffset == rootNodeOffset1); - } - - /* check if BVH is empty */ - bool empty() const { - return root().type == NODE_TYPE_INVALID; - } - - /* pretty printing */ - template - static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6); - static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6); - void print(std::ostream& cout = std::cout) const; - - /* output operator */ - friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) { - qbvh.print(cout); return cout; - } - - /* calculates BVH statistics */ - BVHStatistics computeStatistics() const; - - /* - This section implements a simple allocator for BVH data. The - BVH data is separated into two section, a section where nodes - and leaves in mixed mode are allocated, and a section where - only leaves are allocate in fat-leaf mode. - - */ - public: - - /* allocate data in the node memory section */ - char* allocNode(size_t bytes) - { - assert(bytes % 64 == 0); - uint32_t blocks = (uint32_t)bytes / 64; - assert(nodeDataCur + blocks <= leafDataStart); - char* ptr = (char*)this + 64 * (size_t)nodeDataCur; - nodeDataCur += blocks; - return ptr; - } - - /* allocate memory in the leaf memory section */ - char* allocLeaf(size_t bytes) - { - assert(bytes % 64 == 0); - uint32_t blocks = (uint32_t)bytes / 64; - assert(leafDataCur + blocks <= proceduralDataStart); - char* ptr = (char*)this + 64 * (size_t)leafDataCur; - leafDataCur += blocks; - return ptr; - } - - /* allocate memory in procedural leaf memory section */ - char* allocProceduralLeaf(size_t bytes) - { - assert(bytes % 64 == 0); - uint32_t blocks = (uint32_t)bytes / 64; - assert(proceduralDataCur + blocks <= backPointerDataStart); - char* ptr = (char*)this + 64 * (size_t)proceduralDataCur; - proceduralDataCur += blocks; - return ptr; - } - - /* returns pointer to node address */ - char* nodePtr(size_t ofs) { - return (char*)this + 64 * size_t(nodeDataStart) + ofs; - } - /* returns pointer to address for next leaf allocation */ - char* leafPtr() { - return (char*)this + 64 * (size_t)leafDataCur; - } - - /* returns the total number of bytes of the BVH */ - size_t getTotalBytes() const { - return 64 * (size_t)backPointerDataEnd; - } - - /* returns number of bytes available for node allocations */ - size_t getFreeNodeBytes() const { - return 64 * (size_t)(leafDataStart - nodeDataCur); - } - - /* returns number of bytes available for leaf allocations */ - size_t getFreeLeafBytes() const { - return 64 * (size_t)(proceduralDataStart - leafDataCur); - } - - /* returns number of bytes available for procedural leaf allocations */ - size_t getFreeProceduralLeafBytes() const { - return 64 * (size_t)(backPointerDataStart - proceduralDataCur); - } - - /* returns the bytes used by allocations */ - size_t getUsedBytes() const { - return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes(); - } - - bool hasBackPointers() const { - return backPointerDataStart < backPointerDataEnd; - } - - public: - ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; - uint32_t reserved1; - BBox3f bounds; // bounding box of the BVH - - uint32_t nodeDataStart; // first 64 byte block of node data - uint32_t nodeDataCur; // next free 64 byte block for node allocations - uint32_t leafDataStart; // first 64 byte block of leaf data - uint32_t leafDataCur; // next free 64 byte block for leaf allocations - uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data - uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations - uint32_t backPointerDataStart; // first 64 byte block for back pointers - uint32_t backPointerDataEnd; // end of back pointer array - uint32_t numTimeSegments = 1; - uint32_t numPrims = 0; // number of primitives in this BVH - uint32_t reserved[12]; - uint64_t dispatchGlobalsPtr; - }; - - static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large"); -} - diff --git a/kernels/rthwif/rtbuild/qbvh6_builder_sah.h b/kernels/rthwif/rtbuild/qbvh6_builder_sah.h deleted file mode 100644 index f981ecf6a8..0000000000 --- a/kernels/rthwif/rtbuild/qbvh6_builder_sah.h +++ /dev/null @@ -1,1340 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "qbvh6.h" -#include "statistics.h" -#include "quadifier.h" -#include "rtbuild.h" -#include - -#if defined(ZE_RAYTRACING) -#include "builders/priminfo.h" -#include "builders/primrefgen_presplit.h" -#include "builders/heuristic_binning_array_aligned.h" -#include "algorithms/parallel_for_for_prefix_sum.h" -#else -#include "../../builders/priminfo.h" -#include "../../builders/primrefgen_presplit.h" -#include "../../builders/heuristic_binning_array_aligned.h" -#include "../../../common/algorithms/parallel_for_for_prefix_sum.h" -#endif - -namespace embree -{ - namespace isa - { - struct QBVH6BuilderSAH - { - static const size_t BVH_WIDTH = QBVH6::InternalNode6::NUM_CHILDREN; - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth - - /* the type of primitive that is referenced */ - enum Type { TRIANGLE=0, QUAD=1, PROCEDURAL=2, INSTANCE=3, UNKNOWN=4, NUM_TYPES=5 }; - - /* check when we use spatial splits */ - static bool useSpatialSplits(ze_rtas_builder_build_quality_hint_exp_t build_quality, ze_rtas_builder_build_op_exp_flags_t build_flags) { - return build_quality == ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH && !(build_flags & ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION); - } - - /* BVH allocator */ - struct Allocator - { - Allocator() {} - - void init(char* data_in, size_t bytes_in) { - ptr = data_in; - end = bytes_in; - cur.store(0); - } - - size_t bytesAllocated() const { - return cur.load(); - } - - __forceinline void* malloc(size_t bytes, size_t align = 16) - { - assert(align <= 128); //ZE_RAYTRACING_ACCELERATION_STRUCTURE_ALIGNMENT_EXT - if (unlikely(cur.load() >= end)) return nullptr; - const size_t extra = (align - cur) & (align-1); - const size_t bytes_align = bytes + extra; - const size_t cur_old = cur.fetch_add(bytes_align); - const size_t cur_new = cur_old + bytes_align; - if (unlikely(cur_new >= end)) return nullptr; - return &ptr[cur_old + extra]; - } - - private: - char* ptr; // data buffer pointer - size_t end; // size of data buffer in bytes - __aligned(64) std::atomic cur; // current pointer to allocate next data block from - }; - - /* triangle data for leaf creation */ - struct Triangle - { - Triangle () - : gmask(0) {} - - Triangle (uint32_t i0, uint32_t i1, uint32_t i2, - Vec3f p0, Vec3f p1, Vec3f p2, - GeometryFlags gflags, - uint8_t gmask) - : i0(i0), i1(i1), i2(i2), p0(p0), p1(p1), p2(p2), gflags(gflags), gmask(gmask) {} - - __forceinline bool valid() const { - return gmask != 0; - } - - uint32_t i0,i1,i2; - Vec3f p0,p1,p2; - GeometryFlags gflags; - uint8_t gmask; - }; - - /* quad data for leaf creation */ - struct Quad - { - Quad (Vec3f p0, Vec3f p1, Vec3f p2, Vec3f p3, GeometryFlags gflags, uint8_t gmask) - : p0(p0), p1(p1), p2(p2), p3(p3), gflags(gflags), gmask(gmask) {} - - Vec3f p0,p1,p2,p3; - GeometryFlags gflags; - uint8_t gmask; - }; - - /* procedural data for leaf creation */ - struct Procedural - { - Procedural (uint8_t gmask) - : gmask(gmask) {} - - PrimLeafDesc desc(uint32_t geomID) const { - return PrimLeafDesc(0,geomID,GeometryFlags::NONE,gmask,PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED); - } - - uint8_t gmask; - }; - - /* instance data for leaf creation */ - struct Instance - { - Instance (AffineSpace3f local2world, void* accel, uint8_t imask, uint32_t instanceUserID) - : local2world(local2world), accel(accel), imask(imask), instanceUserID(instanceUserID) {} - - AffineSpace3f local2world; - void* accel; - uint8_t imask; - uint32_t instanceUserID; - }; - - struct Stats - { - size_t numTriangles = 0; - size_t numQuads = 0; - size_t numProcedurals = 0; - size_t numInstances = 0; - - /* assume some reasonable quadification rate */ - void estimate_quadification() - { - numQuads += (numTriangles+1)/2 + numTriangles/8; - numTriangles = 0; - } - - void estimate_presplits( double factor ) - { - numTriangles = max(numTriangles, size_t(numTriangles*factor)); - numQuads = max(numQuads , size_t(numQuads*factor)); - numInstances = max(numInstances, size_t(numInstances*factor)); - } - - size_t size() { - return numTriangles+numQuads+numProcedurals+numInstances; - } - - size_t expected_bvh_bytes() - { - const size_t blocks = (size()+5)/6; - const size_t expected_bytes = 128 + 64*size_t(1+1.5*blocks) + numTriangles*64 + numQuads*64 + numProcedurals*8 + numInstances*128; - const size_t bytes = 2*4096 + size_t(1.1*expected_bytes); // FIXME: FastAllocator wastes memory and always allocates 4kB per thread - return (bytes+127)&-128; - } - - size_t worst_case_bvh_bytes() - { - const size_t numPrimitives = size(); - const size_t blocks = (numPrimitives+5)/6; - const size_t worst_case_bytes = 128 + 64*(1+blocks + numPrimitives) + numTriangles*64 + numQuads*64 + numProcedurals*64 + numInstances*128; - const size_t bytes = 2*4096 + size_t(1.1*worst_case_bytes); // FIXME: FastAllocator wastes memory and always allocates 4kB per thread - return (bytes+127)&-128; - } - - size_t scratch_space_bytes() { - return size()*sizeof(PrimRef)+64; // 64 to align to 64 bytes - } - }; - - /*! settings for SAH builder */ - struct Settings - { - public: - size_t maxDepth = 27; //!< maximum depth of BVH to build - size_t sahBlockSize = 6; //!< blocksize for SAH heuristic - size_t leafSize[NUM_TYPES] = { 9,9,6,6,6 }; //!< target size of a leaf - size_t typeSplitSize = 128; //!< number of primitives when performing type splitting - }; - - /*! recursive state of builder */ - struct BuildRecord - { - public: - __forceinline BuildRecord () {} - - __forceinline BuildRecord (size_t depth, const PrimInfoRange& prims, Type type) - : depth(depth), prims(prims), type(type) {} - - __forceinline BBox3fa bounds() const { return prims.geomBounds; } - - __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) { return a.prims.size() < b.prims.size(); } - __forceinline friend bool operator> (const BuildRecord& a, const BuildRecord& b) { return a.prims.size() > b.prims.size(); } - - __forceinline size_t begin() const { return prims.begin(); } - __forceinline size_t end () const { return prims.end(); } - __forceinline size_t size () const { return prims.size(); } - __forceinline bool equalType() const { return type != UNKNOWN; } - - friend inline std::ostream& operator<<(std::ostream& cout, const BuildRecord& r) { - return cout << "BuildRecord { depth = " << r.depth << ", pinfo = " << r.prims << ", type = " << r.type << " }"; - } - - public: - size_t depth; //!< Depth of the root of this subtree. - PrimInfoRange prims; //!< The list of primitives. - Type type; //!< shared type when type of primitives are equal otherwise UNKNOWN - }; - - struct PrimRange - { - PrimRange () : block_delta(0), cur_prim(0) {} - - PrimRange (uint8_t block_delta, uint8_t start_prim = 0) - : block_delta(block_delta), cur_prim(start_prim) - { - assert(block_delta < 4); - assert(start_prim < 16); - } - - friend std::ostream& operator<<(std::ostream& cout,const PrimRange& range) { - return cout << "PrimRange { " << (int)range.block_delta << ", " << (int)range.cur_prim << " }"; - } - - public: - uint8_t block_delta; - uint8_t cur_prim; - }; - - struct ReductionTy - { - ReductionTy() : node(nullptr) {} - ReductionTy (void* node, NodeType type, uint8_t nodeMask, PrimRange primRange) - : node((char*)node), type(type), nodeMask(nodeMask), primRange(primRange) {} - - inline bool valid() { return node != nullptr; } - - public: - char* node; - NodeType type; - uint8_t nodeMask; - PrimRange primRange; - }; - - class ProceduralLeafBuilder - { - public: - - ProceduralLeafBuilder (char* data, size_t numBlocks) - : data(data), numBlocks(numBlocks), prevBlockID(0), currBlockID(0), currProcedural(nullptr) {} - - ProceduralLeaf* getCurProcedural() - { - if (!currProcedural) - { - assert(numBlocks); - currProcedural = new (data) ProceduralLeaf(); - data += sizeof(ProceduralLeaf); numBlocks--; - } - return currProcedural; - } - - PrimRange addProcedural(uint32_t geomID, uint32_t primID, const Procedural* procedural, bool last) - { - assert(currProcedural); - - if (!currProcedural->add(procedural->desc(geomID),primID,last)) - { - assert(numBlocks); - currProcedural = (ProceduralLeaf*) data; - data += sizeof(ProceduralLeaf); numBlocks--; - - new (currProcedural) ProceduralLeaf(procedural->desc(geomID),primID,last); - currBlockID+=1; - } - - uint32_t blockDelta = currBlockID - prevBlockID; - uint32_t currPrim = (uint32_t)currProcedural->size() - 1; - prevBlockID = currBlockID; - - return PrimRange(blockDelta,currPrim); - } - - protected: - char* data; - size_t numBlocks; - uint32_t prevBlockID; - uint32_t currBlockID; - ProceduralLeaf* currProcedural; - }; - - template - class BuilderT - { - public: - static const size_t BINS = 32; - typedef HeuristicArrayBinningSAH CentroidBinner; - - BuilderT (Device* device, - const getSizeFunc& getSize, - const getTypeFunc& getType, - const createPrimRefArrayFunc& createPrimRefArray, - const getTriangleFunc& getTriangle, - const getTriangleIndicesFunc& getTriangleIndices, - const getQuadFunc& getQuad, - const getProceduralFunc& getProcedural, - const getInstanceFunc& getInstance, - void* scratch_ptr, size_t scratch_bytes, - ze_rtas_format_exp_t rtas_format, - ze_rtas_builder_build_quality_hint_exp_t build_quality, - ze_rtas_builder_build_op_exp_flags_t build_flags, - bool verbose) - : getSize(getSize), - getType(getType), - createPrimRefArray(createPrimRefArray), - getTriangle(getTriangle), - getTriangleIndices(getTriangleIndices), - getQuad(getQuad), - getProcedural(getProcedural), - getInstance(getInstance), - prims(scratch_ptr,scratch_bytes), - rtas_format((ze_raytracing_accel_format_internal_t)rtas_format), - build_quality(build_quality), - build_flags(build_flags), - verbose(verbose) {} - - ReductionTy setInternalNode(char* curAddr, size_t curBytes, NodeType nodeTy, char* childAddr, - BuildRecord children[BVH_WIDTH], ReductionTy values[BVH_WIDTH], size_t numChildren) - { - assert(curBytes >= sizeof(QBVH6::InternalNode6)); - assert(numChildren <= QBVH6::InternalNode6::NUM_CHILDREN); - - BBox3f bounds = empty; - for (size_t i=0; isetChildOffset(childAddr); - - uint8_t nodeMask = 0; - for (uint32_t i = 0; i < numChildren; i++) - { - qnode->setChild(i,children[i].bounds(),values[i].type,values[i].primRange.block_delta); - nodeMask |= values[i].nodeMask; - } - qnode->nodeMask = nodeMask; - - return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); - } - - ReductionTy setNode(char* curAddr, size_t curBytes, NodeType nodeTy, char* childAddr, - BuildRecord children[BVH_WIDTH], ReductionTy values[BVH_WIDTH], size_t numChildren) - { - return setInternalNode(curAddr,curBytes,nodeTy,childAddr,children,values,numChildren); - } - - QuadLeaf getTriangleInternal(unsigned int geomID, unsigned int primID) - { - QBVH6BuilderSAH::Triangle tri = getTriangle(geomID,primID); - const Vec3f p0 = tri.p0; - const Vec3f p1 = tri.p1; - const Vec3f p2 = tri.p2; - Vec3f p3 = p2; - - uint8_t lb0 = 0,lb1 = 0,lb2 = 0; - uint16_t second = quadification[geomID][primID]; - - /* handle paired triangle */ - if (second) - { - QBVH6BuilderSAH::Triangle tri1 = getTriangle(geomID,primID+second); - assert(tri.gflags == tri1.gflags); - assert(tri.gmask == tri1.gmask ); - - bool pair MAYBE_UNUSED = pair_triangles(Vec3(tri.i0,tri.i1,tri.i2),Vec3(tri1.i0,tri1.i1,tri1.i2),lb0,lb1,lb2); - assert(pair); - - if (lb0 == 3) p3 = tri1.p0; - if (lb1 == 3) p3 = tri1.p1; - if (lb2 == 3) p3 = tri1.p2; - } - - return QuadLeaf( p0,p1,p2,p3, lb0,lb1,lb2, 0, geomID, primID, primID+second, tri.gflags, tri.gmask, false ); - }; - - QuadLeaf createQuadLeaf(Type ty, const PrimRef& prim) - { - const unsigned int geomID = prim.geomID(); - const unsigned int primID = prim.primID(); - - if (ty == TRIANGLE) - return getTriangleInternal(geomID, primID); - else - { - assert(ty == QUAD); - const Quad quad = getQuad(geomID,primID); - return QuadLeaf(quad.p0,quad.p1,quad.p3,quad.p2, 3,2,1, 0, geomID, primID, primID, quad.gflags, quad.gmask, false ); - } - } - - const ReductionTy createQuads(Type ty, const BuildRecord& curRecord, char* curAddr_) - { - QuadLeaf* curAddr = (QuadLeaf*) curAddr_; - uint8_t nodeMask = 0; - for (size_t i = curRecord.begin(); i < curRecord.end(); i++, curAddr++) - { - *curAddr = createQuadLeaf(ty,prims[i]); - curAddr->last = (i+1) == curRecord.end(); - nodeMask |= curAddr->leafDesc.geomMask; - } - return ReductionTy(curAddr, NODE_TYPE_QUAD, nodeMask, PrimRange(curRecord.size()*sizeof(QuadLeaf)/64)); - } - - const ReductionTy createFatQuadLeaf(Type ty, const BuildRecord& curRecord, char* curAddr, size_t curBytes, - BuildRecord children[BVH_WIDTH], size_t numChildren) - { - /*! allocate data for all children */ - char* childData = (char*) allocator.malloc(curRecord.prims.size()*sizeof(QuadLeaf), 64); - - if (!childData) - return ReductionTy(); - - /* create each child */ - ReductionTy values[BVH_WIDTH]; - for (size_t i=0, j=0; isetChildOffset(first_procedural + ranges[0].block_delta); - qnode->nodeMask = nodeMask; - ranges[0].block_delta = 0; - - for (size_t i = curRecord.begin(), j=0; i < curRecord.end(); i++, j++) - qnode->setChild(j,prims[i].bounds(),NODE_TYPE_PROCEDURAL,ranges[j+1].block_delta,ranges[j].cur_prim); - - return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); - } - - template - const ReductionTy createInstances(const BuildRecord& curRecord, char* curAddr, size_t curBytes) - { - uint32_t numPrimitives = curRecord.size(); - assert(numPrimitives <= QBVH6::InternalNode6::NUM_CHILDREN); - - /* allocate data for all children */ - InstanceLeaf* childData = (InstanceLeaf*) allocator.malloc(numPrimitives*sizeof(InstanceLeaf), 64); - - if (!childData) - return ReductionTy(); - - QBVH6::InternalNode6* qnode = new (curAddr) QBVH6::InternalNode6(curRecord.bounds(),NODE_TYPE_INSTANCE); - qnode->setChildOffset(childData); - - uint8_t nodeMask = 0; - for (size_t i=curRecord.begin(), c=0; i(instance.accel)->root(); - root += 64*rootOfs; // goto sub-BVH - new (&childData[c]) InstanceLeaf(instance.local2world,root,geomID,instance.instanceUserID,instance.imask); - - qnode->setChild(c,prims[i].bounds(),NODE_TYPE_INSTANCE,sizeof(InstanceLeaf)/64,0); - nodeMask |= instance.imask; - } - qnode->nodeMask = nodeMask; - - return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); - } - - /* finds the index of the child with largest surface area */ - int findChildWithLargestArea(BuildRecord children[BVH_WIDTH], size_t numChildren, size_t leafThreshold) - { - /*! find best child to split */ - float bestArea = neg_inf; - int bestChild = -1; - for (uint32_t i=0; i<(uint32_t)numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].prims.size() <= leafThreshold) continue; - - /* find child with largest surface area */ - const float area = halfArea(children[i].prims.geomBounds); - if (area > bestArea) - { - bestArea = area; - bestChild = i; - } - } - return bestChild; - } - - /* finds the index of the child with most primitives */ - int findChildWithMostPrimitives(BuildRecord children[BVH_WIDTH], size_t numChildren, size_t leafThreshold) - { - /* find best child with largest size */ - size_t bestSize = 0; - int bestChild = -1; - for (uint32_t i=0; i<(uint32_t)numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].prims.size() <= leafThreshold) continue; - - /* remember child with largest size */ - if (children[i].prims.size() > bestSize) - { - bestSize = children[i].size(); - bestChild = i; - } - } - return bestChild; - } - - /* finds the index of the child with most primitives */ - int findChildWithNonEqualTypes(BuildRecord children[BVH_WIDTH], size_t numChildren) - { - for (uint32_t i=0; i<(uint32_t)numChildren; i++) - if (!children[i].equalType()) - return i; - - return -1; - } - - void SAHSplit(size_t depth, size_t sahBlockSize, int bestChild, BuildRecord children[BVH_WIDTH], size_t& numChildren) - { - PrimInfoRange linfo, rinfo; - BuildRecord brecord = children[bestChild]; - - /* first perform centroid binning */ - CentroidBinner centroid_binner(prims.data()); - CentroidBinner::Split bestSplit = centroid_binner.find_block_size(brecord.prims,sahBlockSize); - - /* now split the primitive list */ - if (bestSplit.valid()) - centroid_binner.split(bestSplit,brecord.prims,linfo,rinfo); - - /* the above techniques may fail, and we fall back to some brute force split in the middle */ - else - centroid_binner.splitFallback(brecord.prims,linfo,rinfo); - - children[bestChild ] = BuildRecord(depth+1, linfo, brecord.type); - children[numChildren] = BuildRecord(depth+1, rinfo, brecord.type); - numChildren++; - } - - void TypeSplit(size_t depth, int bestChild, BuildRecord children[BVH_WIDTH], size_t& numChildren) - { - BuildRecord brecord = children[bestChild]; - - PrimInfoRange linfo, rinfo; - auto type = getType(prims[brecord.prims.begin()].geomID()); - performTypeSplit(getType,type,prims.data(),brecord.prims.get_range(),linfo,rinfo); - - for (size_t i=linfo.begin(); i cfg.maxDepth) - throw std::runtime_error("BVH too deep"); - - /* there should be at least one primitive and not too many */ - assert(curRecord.size() > 0); - assert(curRecord.size() <= cfg.leafSize[curRecord.type]); - - /* all primitives have to have the same type */ - Type ty = getType(prims[curRecord.begin()].geomID()); - for (size_t i=curRecord.begin(); i 3) - { - children[0] = curRecord; - numChildren = 1; - - /*! perform fallback splits until node is full */ - while (numChildren < BVH_WIDTH) - { - const int bestChild = findChildWithMostPrimitives(children,numChildren,1); - if (bestChild == -1) break; - FallbackSplit(curRecord.depth,bestChild,children,numChildren); - } - } - } - - /* sort build records for faster shadow ray traversal */ - std::sort(children,children+numChildren, [](const BuildRecord& a,const BuildRecord& b) { - return area(a.prims.geomBounds) > area(b.prims.geomBounds); - }); - - /* create leaf of proper type */ - if (ty == TRIANGLE || ty == QUAD) - return createFatQuadLeaf(ty, curRecord, curAddr, curBytes, children, numChildren); - else if (ty == PROCEDURAL) - return createProcedurals(curRecord,curAddr,curBytes); - else if (ty == INSTANCE) { - if (rtas_format == ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1) - return createInstances(curRecord,curAddr,curBytes); - } - - assert(false); - return ReductionTy(); - } - - const ReductionTy createLargeLeaf(const BuildRecord& curRecord, char* curAddr, size_t curBytes) - { - /* this should never occur but is a fatal error */ - if (curRecord.depth > cfg.maxDepth) - throw std::runtime_error("BVH too deep"); - - /* all primitives have to have the same type */ - Type ty MAYBE_UNUSED = getType(prims[curRecord.begin()].geomID()); - for (size_t i=curRecord.begin(); i= cfg.maxDepth; - - bool performTypeSplit = !curRecord.equalType() && (createLeaf || curRecord.size() <= cfg.typeSplitSize); - - /* check if types are really not equal when we attempt to split by type */ - if (performTypeSplit) - { - /* check if types are already equal */ - bool equalTy = true; - Type type = getType(prims[curRecord.begin()].geomID()); - for (size_t i=curRecord.begin()+1; i()); - - /*! allocate data for all children */ - size_t childrenBytes = numChildren*sizeof(QBVH6::InternalNode6); - char* childBase = (char*) allocator.malloc(childrenBytes, 64); - - if (!childBase) - return ReductionTy(); - - /* spawn tasks */ - if (curRecord.size() > 1024) // cfg.singleThreadThreshold - { - std::atomic success = true; - parallel_for(size_t(0), numChildren, [&] (const range& r) { - if (!success) return; - for (size_t i=r.begin(); i& r, size_t k, unsigned int geomID) - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j(prim.bounds(),dim,pos,v,left,right); - - if (pair != QUADIFIER_TRIANGLE) - { - const Triangle tri1 = getTriangle(geomID,primID+pair); - const Vec3fa v[4] = { tri1.p0, tri1.p1, tri1.p2, tri1.p0 }; - - BBox3fa left1, right1; - splitPolygon<3>(prim.bounds(),dim,pos,v,left1,right1); - - left.extend(left1); - right.extend(right1); - } - - left_o = PrimRef(left , geomID, primID); - right_o = PrimRef(right, geomID, primID); - } - - void splitQuad(const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const - { - const uint32_t geomID = prim.geomID(); - const uint32_t primID = prim.primID(); - const Quad quad = getQuad(geomID,primID); - const Vec3fa v[5] = { quad.p0, quad.p1, quad.p2, quad.p3, quad.p0 }; - splitPolygon<4>(prim,dim,pos,v,left_o,right_o); - } - - void splitTriangleOrQuad(const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const - { - switch (getType(prim.geomID())) { - case TRIANGLE: splitTrianglePair(prim,dim,pos,left_o,right_o); break; - case QUAD : splitQuad (prim,dim,pos,left_o,right_o); break; - default: assert(false); break; - } - } - - void openInstance(const PrimRef& prim, - const unsigned int splitprims, - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], - unsigned int& numSubPrims) - { - struct Item - { - QBVH6::InternalNode6* node; - float priority; - - Item () {} - - Item (QBVH6::InternalNode6* node) - : node(node), priority(halfArea(node->bounds())) - { - /* fat leaves cannot get opened */ - if (node->isFatLeaf()) - priority = 0.0f; - } - - inline bool operator< ( const Item& other) const { - return priority < other.priority; - } - }; - - const uint32_t targetSubPrims = splitprims; - const uint32_t geomID = prim.geomID(); - const uint32_t primID MAYBE_UNUSED = prim.primID(); - assert(primID == 0); // has to be zero as we encode root offset here - - const Instance instance = getInstance(geomID,0); - QBVH6::InternalNode6* root = static_cast(instance.accel)->root().innerNode(); - - darray_t heap; - heap.push_back(root); - - while (heap.size() + (QBVH6::InternalNode6::NUM_CHILDREN-1) <= MAX_PRESPLITS_PER_PRIMITIVE) - { - /* terminate when budget exceeded */ - if (heap.size() >= targetSubPrims) - break; - - /* get top heap element */ - std::pop_heap(heap.begin(), heap.end()); - auto top = heap.back(); - - /* if that happens there are only leaf nodes left that cannot get opened */ - if (top.priority == 0.0f) break; - heap.pop_back(); - - /* add all children to the heap */ - for (uint32_t i=0; ivalid(i)) continue; - heap.push_back(top.node->child(i).template innerNode()); - std::push_heap(heap.begin(), heap.end()); - } - } - - /* create primrefs */ - for (size_t i=0; ibounds()); - int64_t ofs = ((int64_t)node-(int64_t)root)/64; - assert(ofs >= INT_MIN && ofs <= INT_MAX); - subPrims[numSubPrims++] = PrimRef(bounds,geomID,(int32_t)ofs); - } - } - - float primitiveAreaTrianglePair(const PrimRef& prim) - { - const uint32_t geomID = prim.geomID(); - const uint32_t primID = prim.primID(); - - const uint16_t pair = quadification[geomID][primID]; - assert(pair != QUADIFIER_PAIRED); - - const Triangle tri0 = getTriangle(geomID,primID); - float A = areaProjectedTriangle(tri0.p0,tri0.p1,tri0.p2); - if (pair == QUADIFIER_TRIANGLE) - return A; - - const Triangle tri1 = getTriangle(geomID,primID+pair); - A += areaProjectedTriangle(tri1.p0,tri1.p1,tri1.p2); - return A; - } - - float primitiveAreaQuad(const PrimRef& prim) - { - const uint32_t geomID = prim.geomID(); - const uint32_t primID = prim.primID(); - const Quad quad = getQuad(geomID,primID); - const float A0 = areaProjectedTriangle(quad.p0,quad.p1,quad.p3); - const float A1 = areaProjectedTriangle(quad.p2,quad.p3,quad.p1); - return A0+A1; - } - - float primitiveAreaInstance(const PrimRef& prim) { - return halfArea(prim.bounds()); - } - - float primitiveArea(const PrimRef& prim) - { - switch (getType(prim.geomID())) { - case TRIANGLE: return primitiveAreaTrianglePair(prim); - case QUAD : return primitiveAreaQuad(prim); - case INSTANCE: return primitiveAreaInstance(prim); - default : return 0.0f; - } - } - - ReductionTy build(uint32_t numGeometries, PrimInfo& pinfo_o, char* root) - { - double t1 = verbose ? getSeconds() : 0.0; - - /* quadify all triangles */ - ParallelForForPrefixSumState pstate; - pstate.init(numGeometries,getSize,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k) -> PrimInfo { - if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) - return PrimInfo(pair_triangles(geomID,(QuadifierType*) quadification[geomID].data(), r.begin(), r.end(), getTriangleIndices)); - else - return PrimInfo(r.size()); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - double t2 = verbose ? getSeconds() : 0.0; - if (verbose) std::cout << "quadification: " << std::setw(10) << (t2-t1)*1000.0 << "ms, " << std::endl; //<< std::setw(10) << 1E-6*double(numTriangles)/(t2-t1) << " Mtris/s" << std::endl; - - size_t numPrimitives = pinfo.size(); - - /* first try */ - //pstate.init(numGeometries,getSize,size_t(1024)); - pinfo = parallel_for_for_prefix_sum1_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k, const PrimInfo& base) -> PrimInfo { - if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) - return createTrianglePairPrimRefArray(prims.data(),r,base.size(),(unsigned)geomID); - else - return createPrimRefArray(prims,BBox1f(0,1),r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - double t3 = verbose ? getSeconds() : 0.0; - if (verbose) std::cout << "primrefgen : " << std::setw(10) << (t3-t2)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t3-t2) << " Mprims/s" << std::endl; - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != numPrimitives) - { - numPrimitives = pinfo.size(); - - pinfo = parallel_for_for_prefix_sum1_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k, const PrimInfo& base) -> PrimInfo { - if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) { - return createTrianglePairPrimRefArray(prims.data(),r,base.size(),(unsigned)geomID); - } - else - return createPrimRefArray(prims,BBox1f(0,1),r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - assert(pinfo.size() == numPrimitives); - - double t4 = verbose ? getSeconds() : 0.0; - if (verbose) std::cout << "primrefgen2 : " << std::setw(10) << (t4-t3)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t4-t3) << " Mprims/s" << std::endl; - - /* perform pre-splitting */ - if (useSpatialSplits(build_quality,build_flags) && numPrimitives) - { - auto splitter = [this] (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) { - splitTriangleOrQuad(prim,dim,pos,left_o,right_o); - }; - - auto splitter1 = [&] (const PrimRef& prim, - const unsigned int splitprims, - const SplittingGrid& grid, - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], - unsigned int& numSubPrims) - { - if (getType(prim.geomID()) == QBVH6BuilderSAH::INSTANCE) { - openInstance(prim,splitprims,subPrims,numSubPrims); - } else { - splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims); - } - }; - - auto primitiveArea1 = [this] (const PrimRef& prim) -> float { - return primitiveArea(prim); - }; - - pinfo = createPrimRefArray_presplit(numPrimitives, prims, pinfo, splitter1, primitiveArea1); - } - - /* exit early if scene is empty */ - if (pinfo.size() == 0) { - pinfo_o = pinfo; - return createEmptyNode(root); - } - - /* build hierarchy */ - BuildRecord record(1,pinfo,UNKNOWN); - ReductionTy r = createInternalNode(record,root,sizeof(QBVH6::InternalNode6)); - - double t5 = verbose ? getSeconds() : 0.0; - if (verbose) std::cout << "bvh_build : " << std::setw(10) << (t5-t4)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t5-t4) << " Mprims/s" << std::endl; - - pinfo_o = pinfo; - return r; - } - - bool build(size_t numGeometries, char* accel, size_t bytes, BBox3f* boundsOut, size_t* accelBufferBytesOut, void* dispatchGlobalsPtr) - { - double t0 = verbose ? getSeconds() : 0.0; - - Stats stats; - size_t numPrimitives = 0; - quadification.resize(numGeometries); - for (size_t geomID=0; geomIDrtas_format = rtas_format; - qbvh->numPrims = 0; //numPrimitives; - uint64_t rootNodeOffset = QBVH6::Node((char*)(r.node - (char*)qbvh), r.type, r.primRange.cur_prim); - assert(rootNodeOffset == QBVH6::rootNodeOffset); - _unused(rootNodeOffset); - qbvh->bounds = bounds; - qbvh->numTimeSegments = 1; - qbvh->dispatchGlobalsPtr = (uint64_t) dispatchGlobalsPtr; - -#if 0 - BVHStatistics stats = qbvh->computeStatistics(); - stats.print(std::cout); - stats.print_raw(std::cout); - qbvh->print(); - - /*std::cout << "#define bvh_bytes " << bytes << std::endl; - std::cout << "const unsigned char bvh_data[bvh_bytes] = {"; - for (size_t i=0; i prims; - Allocator allocator; - std::vector> quadification; - ze_raytracing_accel_format_internal_t rtas_format; - ze_rtas_builder_build_quality_hint_exp_t build_quality; - ze_rtas_builder_build_op_exp_flags_t build_flags; - bool verbose; - - }; - - template - - static void estimateSize(size_t numGeometries, - const getSizeFunc& getSize, - const getTypeFunc& getType, - ze_rtas_format_exp_t rtas_format, - ze_rtas_builder_build_quality_hint_exp_t build_quality, - ze_rtas_builder_build_op_exp_flags_t build_flags, - size_t& expectedBytes, - size_t& worstCaseBytes, - size_t& scratchBytes) - { - Stats stats; - for (size_t geomID=0; geomID - - static bool build(size_t numGeometries, - Device* device, - const getSizeFunc& getSize, - const getTypeFunc& getType, - const createPrimRefArrayFunc& createPrimRefArray, - const getTriangleFunc& getTriangle, - const getTriangleIndicesFunc& getTriangleIndices, - const getQuadFunc& getQuad, - const getProceduralFunc& getProcedural, - const getInstanceFunc& getInstance, - char* accel_ptr, size_t accel_bytes, - void* scratch_ptr, size_t scratch_bytes, - BBox3f* boundsOut, - size_t* accelBufferBytesOut, - ze_rtas_format_exp_t rtas_format, - ze_rtas_builder_build_quality_hint_exp_t build_quality, - ze_rtas_builder_build_op_exp_flags_t build_flags, - bool verbose, - void* dispatchGlobalsPtr) - { - /* align scratch buffer to 64 bytes */ - bool scratchAligned = std::align(64,0,scratch_ptr,scratch_bytes); - if (!scratchAligned) - throw std::runtime_error("scratch buffer cannot get aligned"); - - BuilderT builder - (device, getSize, getType, createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance, scratch_ptr, scratch_bytes, rtas_format, build_quality, build_flags, verbose); - - return builder.build(numGeometries, accel_ptr, accel_bytes, boundsOut, accelBufferBytesOut, dispatchGlobalsPtr); - } - }; - } -} diff --git a/kernels/rthwif/rtbuild/qnode.h b/kernels/rthwif/rtbuild/qnode.h deleted file mode 100644 index 6cd775e164..0000000000 --- a/kernels/rthwif/rtbuild/qnode.h +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include - -#include "leaf.h" - -#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32) -inline float embree_frexp(float value, int* exp) -{ - // using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results - // in an unresolved external symbol "__imp_frexp" error and therefore we - // provide a the manual implemetation referenced here - // https://en.cppreference.com/w/c/numeric/math/frexp in this case - static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations"); - *exp = (value == 0) ? 0 : (int)(1 + logb(value)); - return scalbn(value, -(*exp)); -} -#endif - -namespace embree -{ - /* The NodeRef structure references a node of the BVH. It stores the - * pointer to that node as well as the node's type. If a leaf node - * is referenced the current primitive to intersect is also - * stored. */ - - struct NodeRef - { - NodeRef () - : node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {} - - NodeRef (void* node, NodeType type, uint8_t cur_prim) - : node((char*)node), type(type), cur_prim(cur_prim) - { - assert(cur_prim < 16); - } - - /* decode from 64 bit encoding used in MemRay and Instances */ - NodeRef (uint64_t nodePtr, uint64_t offset = 0) - { - node = (char*) (nodePtr & ~(uint64_t)0xF) + offset; - //type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances - type = (NodeType) (nodePtr & 0xF); - cur_prim = 0; - } - - /* 64 bit encoding used in MemRay and Instances */ - operator uint64_t() const - { - //assert(type == NODE_TYPE_INTERNAL); - assert(((uint64_t)node & 0xF) == 0); - assert(cur_prim == 0); - return (uint64_t)node + (uint64_t) type; - } - - /* returns the internal node that is referenced */ - template - InternalNode* innerNode() const { - assert(type == NODE_TYPE_INTERNAL); - return (InternalNode*)node; - } - - /* returns the instance leaf node that is referenced */ - InstanceLeaf* leafNodeInstance() const { - assert(type == NODE_TYPE_INSTANCE); - return (InstanceLeaf*)node; - } - - /* returns the quad leaf node that is referenced */ - QuadLeaf* leafNodeQuad() const { - assert(type == NODE_TYPE_QUAD); - return (QuadLeaf*)node; - } - - /* returns the procedural leaf node that is referenced */ - ProceduralLeaf* leafNodeProcedural() const { - assert(type == NODE_TYPE_PROCEDURAL); - return (ProceduralLeaf*)node; - } - - friend bool operator ==(const NodeRef& a, const NodeRef& b) { - return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim); - } - - friend bool operator !=(const NodeRef& a, const NodeRef& b) { - return !(a == b); - } - -#if !defined(__RTRT_GSIM) - friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) { - return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }"; - } -#endif - - public: - char* node; // pointer to the referenced node - NodeType type; // type of the node referenced - uint8_t cur_prim : 4; // current primitive referenced in the leaf - }; - - /* - - The internal nodes of the BVH store references to 6 children and - quantized bounds for each of these children. - - All children are stored consecutively in memory at a location - refered to by the childOffset. To calculate the relative - location of the i'th child the size (as encoded in blockIncr) of - all the children with index smaller than i has to get added to - that childOffset. The calculated offset specifies the signed - number of 64 bytes blocks relative to the node address to reach - the child. - - If the nodeType is INTERNAL we are in mixed mode and the type of - each child is encoded inside the startPrim member. Otherwise we - are in fat leaf mode and each child has the same type 'nodeType' - and startPrim identifies the primitive where the leaf - starts. The leaf spans all primitives from this start primitive - to the end primitive which is marked as 'last'. - - The bounding boxes of the children are quantized into a regular - 3D grid. The world space position of the origin of that grid is - stored at full precision in the lower member, while the step - size is encoded in the exp_x, exp_y, and exp_z members as power - of 2. Thus grid coordinates together with their exponent - (xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa - and exponent of a floating point number representation without - leading zero. Thus the world space position of the bounding - planes can get calculated as follows: - - x = lower.x + pow(2,exp_x) * 0.xi - y = lower.y + pow(2,exp_y) * 0.yi - z = lower.z + pow(2,exp_z) * 0.zi - - As the stored grid coordinates for child bounds are only - unsigned 8-bit values, ray/box intersections can get performed - with reduced precision. - - The node also stores a mask used for ray filtering. Only rays - with (node.nodeMask & ray.rayMask) != 0 are traversed, all - others are culled. - - */ - - struct InternalNode6Data - { - static constexpr uint32_t NUM_CHILDREN = 6; - - Vec3f lower; // world space origin of quantization grid - int32_t childOffset; // offset to all children in 64B multiples - - NodeType nodeType; // the type of the node - uint8_t pad; // unused byte - - int8_t exp_x; // 2^exp_x is the size of the grid in x dimension - int8_t exp_y; // 2^exp_y is the size of the grid in y dimension - int8_t exp_z; // 2^exp_z is the size of the grid in z dimension - uint8_t nodeMask; // mask used for ray filtering - - struct ChildData - { - uint8_t blockIncr : 2; // size of child in 64 byte blocks - uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode - uint8_t pad : 2; // unused bits - } childData[NUM_CHILDREN]; - - uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension - uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension - uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension - uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension - uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension - uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension - }; - - static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large"); - - template - struct InternalNodeCommon : public InternalNodeData - { - using InternalNodeData::NUM_CHILDREN; - - InternalNodeCommon() { - } - - InternalNodeCommon(NodeType type) - { - this->nodeType = type; - this->childOffset = 0; - this->nodeMask = 0xFF; - - for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) - this->childData[i] = { 0, 0, 0 }; - - this->lower = Vec3f(0.0f); - this->exp_x = 0; - this->exp_y = 0; - this->exp_z = 0; - - /* set all child bounds to invalid */ - for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) { - this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80; - this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00; - } - } - - /* this function slightly enlarges bounds in order to make traversal watertight */ - static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) { - const float err = ulps*std::numeric_limits::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper))); - return enlarge(box, Vec3f(err)); - } - - /* this function quantizes the provided bounds */ - const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const - { - const Vec3f lower = fbounds.lower-base; - const Vec3f upper = fbounds.upper-base; - float qlower_x = ldexpf(lower.x, -this->exp_x + 8); - float qlower_y = ldexpf(lower.y, -this->exp_y + 8); - float qlower_z = ldexpf(lower.z, -this->exp_z + 8); - float qupper_x = ldexpf(upper.x, -this->exp_x + 8); - float qupper_y = ldexpf(upper.y, -this->exp_y + 8); - float qupper_z = ldexpf(upper.z, -this->exp_z + 8); - assert(qlower_x >= 0.0f && qlower_x <= 255.0f); - assert(qlower_y >= 0.0f && qlower_y <= 255.0f); - assert(qlower_z >= 0.0f && qlower_z <= 255.0f); - assert(qupper_x >= 0.0f && qupper_x <= 255.0f); - assert(qupper_y >= 0.0f && qupper_y <= 255.0f); - assert(qupper_z >= 0.0f && qupper_z <= 255.0f); - qlower_x = min(max(floorf(qlower_x),0.0f),255.0f); - qlower_y = min(max(floorf(qlower_y),0.0f),255.0f); - qlower_z = min(max(floorf(qlower_z),0.0f),255.0f); - qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f); - qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f); - qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f); - BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z)); - - /* verify that quantized bounds are conservative */ - BBox3f dbounds = dequantize_bounds(qbounds, base); - dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8)); - dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8)); - dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8)); - dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8)); - dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8)); - dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8)); - assert(subset(fbounds, dbounds)); - - return qbounds; - } - - /* this function de-quantizes the provided bounds */ - const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const - { - const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8); - const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8); - const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8); - const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8); - const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8); - const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8); - return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z)); - } - - /* Determines if a child is valid. We have only to look at the - * topmost bit of lower_x and upper_x to determine if child is - * valid */ - bool valid(int i) const { - return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80); - } - - /* Determines if the node is in fat leaf mode. */ - bool isFatLeaf() const { - return this->nodeType != NODE_TYPE_MIXED; - } - - /* Sets the offset to the child memory. */ - void setChildOffset(void* childDataPtr) - { - int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0; - assert(childDataOffset % 64 == 0); - assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64)); - this->childOffset = (int32_t)(childDataOffset / 64); - } - - /* Sets the type, size, and current primitive of a child */ - void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim) - { - // there is no need to store block_delta for last child - if (child == NUM_CHILDREN-1) block_delta = 0; - - assert(block_delta < 4); - assert(cur_prim < 16); - - if (isFatLeaf()) - { - assert(this->nodeType == childType); - this->childData[child].startPrim = cur_prim; - this->childData[child].blockIncr = block_delta; - } - else - { - assert(cur_prim == 0); - this->childData[child].startPrim = childType; - this->childData[child].blockIncr = block_delta; - } - } - - void invalidateChild(uint32_t childID) - { - /* set child bounds to invalid */ - this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80; - this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00; - } - - /* Sets child bounds */ - void setChildBounds(uint32_t childID, const BBox3f& fbounds) - { - assert(fbounds.lower.x <= fbounds.upper.x); - assert(fbounds.lower.y <= fbounds.upper.y); - assert(fbounds.lower.z <= fbounds.upper.z); - const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower); - this->lower_x[childID] = (uint8_t)qbounds.lower.x; - this->lower_y[childID] = (uint8_t)qbounds.lower.y; - this->lower_z[childID] = (uint8_t)qbounds.lower.z; - this->upper_x[childID] = (uint8_t)qbounds.upper.x; - this->upper_y[childID] = (uint8_t)qbounds.upper.y; - this->upper_z[childID] = (uint8_t)qbounds.upper.z; - assert(valid(childID)); - } - - /* Sets an entire child, including bounds, type, size, and referenced primitive. */ - void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0) - { - setChildType(childID, type, block_delta, cur_prim); - setChildBounds(childID, fbounds); - } - - /* Calculates the byte offset to the child. The offset is - * relative to the address this node. */ - int64_t getChildOffset(uint32_t childID) const - { - int64_t ofs = this->childOffset; - for (uint32_t j = 0; j < childID; j++) - ofs += this->childData[j].blockIncr; - return 64 * ofs; - } - - /* Returns the type of the child. In fat leaf mode the type is - * shared between all children, otherwise a per-child type is - * encoded inside the startPrim member for each child. */ - NodeType getChildType(uint32_t childID) const - { - if (isFatLeaf()) - return this->nodeType; - - else - return (NodeType)(this->childData[childID].startPrim); - } - - /* Returns the start primitive of a child. In case of children - * in fat-leaf mode, all children are leaves, and the start - * primitive specifies the primitive in a leaf block where the - * leaf start. */ - uint32_t getChildStartPrim(uint32_t childID) const - { - if (isFatLeaf()) - return this->childData[childID].startPrim; - - else - return 0; - } - - /* Returns a node reference for the given child. This reference - * includes the node pointer, type, and start primitive. */ - NodeRef child(void* This, int childID) const { - return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID)); - } - - NodeRef child(int i) const { - return child((void*)this, i); - } - }; - - template - struct InternalNode : public InternalNodeCommon - { - using InternalNodeCommon::valid; - using InternalNodeCommon::getChildType; - using InternalNodeCommon::getChildOffset; - using InternalNodeCommon::getChildStartPrim; - using InternalNodeCommon::conservativeBox; - using InternalNodeCommon::dequantize_bounds; - using InternalNodeCommon::NUM_CHILDREN; - - InternalNode() { - } - - InternalNode (NodeType type) - : InternalNodeCommon(type) {} - - /* Constructs an internal node. The quantization grid gets - * initialized from the provided parent bounds. */ - InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED) - : InternalNode(type) - { - setNodeBounds(box); - } - - void setNodeBounds(BBox3f box) - { - /* initialize quantization grid */ - box = conservativeBox(box); - const float _ulp = std::numeric_limits::epsilon(); - const float up = 1.0f + float(_ulp); - Vec3f len = box.size() * up; - this->lower = box.lower; -#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32) - int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f); - int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f); - int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f); -#else - int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f); - int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f); - int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f); -#endif - _exp_x = max(-128,_exp_x); // enlarge too tight bounds - _exp_y = max(-128,_exp_y); - _exp_z = max(-128,_exp_z); - this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127); - this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127); - this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127); - } - - /* dequantizes the bounds of the specified child */ - const BBox3f bounds(uint32_t childID) const - { - return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]), - Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])), - this->lower); - } - - const BBox3f bounds() const - { - BBox3f b = empty; - for (size_t i=0; isetChildOffset((char*)this + getChildOffset(0)); - } - -#if !defined(__RTRT_GSIM) - - /* output of internal node */ - void print(std::ostream& cout, uint32_t depth, bool close) const - { - cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl; - cout << tab(depth) << " addr = " << this << std::endl; - cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl; - cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl; - cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl; - - for (uint32_t i = 0; i < NUM_CHILDREN; i++) - { - cout << tab(depth) << " child" << i << " = { "; - if (valid(i)) - { - cout << "type = " << getChildType(i); - cout << ", offset = " << getChildOffset(i); - cout << ", prim = " << getChildStartPrim(i); - cout << ", bounds = " << bounds(i); - } - else { - cout << "INVALID"; - } - cout << " }" << std::endl; - } - - if (close) - cout << tab(depth) << "}"; - } - - /* output operator for internal node */ - friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) { - node.print(cout, 0, true); return cout; - } -#endif - }; - - inline size_t GetInternalNodeSize(uint32_t numChildren) - { - if (numChildren <= 6) - return sizeof(InternalNode6Data); - else - assert(false); - return 0; - } - - typedef InternalNode InternalNode6; -} diff --git a/kernels/rthwif/rtbuild/quadifier.h b/kernels/rthwif/rtbuild/quadifier.h deleted file mode 100644 index fa5071474f..0000000000 --- a/kernels/rthwif/rtbuild/quadifier.h +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#if defined(ZE_RAYTRACING) -#include "sys/sysinfo.h" -#include "sys/vector.h" -#include "math/vec2.h" -#include "math/vec3.h" -#include "math/bbox.h" -#include "math/affinespace.h" -#else -#include "../../common/default.h" -#endif - -namespace embree -{ - enum QuadifierType : uint16_t - { - QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle - QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired - QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle - QUADIFIER_MAX_DISTANCE = 31, - }; - - template - struct static_deque - { - __forceinline Ty pop_front() { - assert(size()); - return operator[](begin++); - } - - __forceinline void push_back(const Ty& v) { - assert(size() < N); - operator[](end++) = v; - } - - __forceinline size_t size() const { - assert(end >= begin); - return end-begin; - } - - __forceinline bool full() const { - return size() == N; - } - - __forceinline void erase( size_t j ) - { - assert(j >= begin && j < end); - - /* fast path as we mostly just merge with the subsequent triangle */ - if (likely(j == begin)) - begin++; - - /* fastest when left side is small */ - else if (j-begin < end-j-1) { - for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1); - begin++; - } - - /* fastest if right side is small */ - else { - for (size_t i=j+1; i a, Vec3 b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2) - { - const vuint<4> va(a.x,a.y,a.z,0); - const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va; - const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va; - const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va; - lb0 = bsf(movemask(mb0)); - lb1 = bsf(movemask(mb1)); - lb2 = bsf(movemask(mb2)); - return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1; - } - - template - __forceinline void merge_triangle_window( uint32_t geomID, static_deque& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle ) - { - uint32_t primID0 = triangleWindow.pop_front(); - - /* load first triangle */ - Vec3 tri0 = getTriangle(geomID, primID0); - - /* find a second triangle in triangle window to pair with */ - for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot ) - { - /* load second triangle */ - uint32_t primID1 = triangleWindow[slot]; - Vec3 tri1 = getTriangle(geomID, primID1); - - /* try to pair triangles */ - uint8_t lb0,lb1,lb2; - bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2); - - /* the offset between the triangles cannot be too large as hardware limits bits for offset encode */ - uint32_t prim_offset = primID1 - primID0; - pair &= prim_offset <= QUADIFIER_MAX_DISTANCE; - - /* store pairing if successful */ - if (pair) - { - assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED); - quads_o[primID0] = (QuadifierType) prim_offset; - quads_o[primID1] = QUADIFIER_PAIRED; - triangleWindow.erase(slot); - return; - } - } - - /* make a triangle if we fail to find a candiate to pair with */ - quads_o[primID0] = QUADIFIER_TRIANGLE; - } - - template - inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle ) - { - static_deque triangleWindow; - - size_t numTrianglePairs = 0; - for (uint32_t primID=primID0; primIDtriangleCount); - return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride); - } - - inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) { - assert(vertexID < geom->vertexCount); - return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride); - } - - inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) { - assert(primID < geom->quadCount); - return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride); - } - - inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) { - assert(vertexID < geom->vertexCount); - return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride); - } - - inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom) - { - switch (geom->transformFormat) - { - case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: { - const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform; - return { - { xfm->vx_x, xfm->vx_y, xfm->vx_z }, - { xfm->vy_x, xfm->vy_y, xfm->vy_z }, - { xfm->vz_x, xfm->vz_y, xfm->vz_z }, - { xfm-> p_x, xfm-> p_y, xfm-> p_z } - }; - } - case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: { - const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform; - return { - { xfm->vx_x, xfm->vx_y, xfm->vx_z }, - { xfm->vy_x, xfm->vy_y, xfm->vy_z }, - { xfm->vz_x, xfm->vz_y, xfm->vz_z }, - { xfm-> p_x, xfm-> p_y, xfm-> p_z } - }; - } - case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: { - const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform; - return { - { xfm->vx_x, xfm->vx_y, xfm->vx_z }, - { xfm->vy_x, xfm->vy_y, xfm->vy_z }, - { xfm->vz_x, xfm->vz_y, xfm->vz_z }, - { xfm-> p_x, xfm-> p_y, xfm-> p_z } - }; - } - default: - throw std::runtime_error("invalid transform format"); - } - } - - inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom) - { - if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32) - throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32"); - - if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3) - throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3"); - - if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified"); - if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified"); - } - - inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom) - { - if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32) - throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32"); - - if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3) - throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3"); - - if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified"); - if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified"); - } - - inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom) - { - if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified"); - if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero"); - } - - inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom) - { - if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified"); - if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified"); - if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instanciate specified"); - } - - inline bool buildBounds(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) - { - if (primID >= geom->triangleCount) return false; - const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); - if (unlikely(tri.v0 >= geom->vertexCount)) return false; - if (unlikely(tri.v1 >= geom->vertexCount)) return false; - if (unlikely(tri.v2 >= geom->vertexCount)) return false; - - const Vec3f p0 = getVertex(geom,tri.v0); - const Vec3f p1 = getVertex(geom,tri.v1); - const Vec3f p2 = getVertex(geom,tri.v2); - if (unlikely(!isvalid(p0))) return false; - if (unlikely(!isvalid(p1))) return false; - if (unlikely(!isvalid(p2))) return false; - - bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2)); - return true; - } - - inline bool buildBounds(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) - { - if (primID >= geom->quadCount) return false; - const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID); - if (unlikely(tri.v0 >= geom->vertexCount)) return false; - if (unlikely(tri.v1 >= geom->vertexCount)) return false; - if (unlikely(tri.v2 >= geom->vertexCount)) return false; - if (unlikely(tri.v3 >= geom->vertexCount)) return false; - - const Vec3f p0 = getVertex(geom,tri.v0); - const Vec3f p1 = getVertex(geom,tri.v1); - const Vec3f p2 = getVertex(geom,tri.v2); - const Vec3f p3 = getVertex(geom,tri.v3); - if (unlikely(!isvalid(p0))) return false; - if (unlikely(!isvalid(p1))) return false; - if (unlikely(!isvalid(p2))) return false; - if (unlikely(!isvalid(p3))) return false; - - bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3)); - return true; - } - - inline bool buildBounds(const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) - { - if (primID >= geom->primCount) return false; - if (geom->pfnGetBoundsCb == nullptr) return false; - - BBox3f bounds; - ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS }; - params.primID = primID; - params.primIDCount = 1; - params.pGeomUserPtr = geom->pGeomUserPtr; - params.pBuildUserPtr = buildUserPtr; - params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds; - (geom->pfnGetBoundsCb)(¶ms); - - if (unlikely(!isvalid(bounds.lower))) return false; - if (unlikely(!isvalid(bounds.upper))) return false; - if (unlikely(bounds.empty())) return false; - - bbox = (BBox3f&) bounds; - return true; - } - - inline bool buildBounds(const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) - { - if (primID >= 1) return false; - if (geom->pAccelerationStructure == nullptr) return false; - if (geom->pTransform == nullptr) return false; - - const AffineSpace3fa local2world = getTransform(geom); - const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z); - const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z); - const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper)); - - if (unlikely(!isvalid(bounds.lower))) return false; - if (unlikely(!isvalid(bounds.upper))) return false; - if (unlikely(bounds.empty())) return false; - - bbox = bounds; - return true; - } - - template - PrimInfo createGeometryPrimRefArray(const GeometryType* geom, void* buildUserPtr, evector& prims, const range& r, size_t k, unsigned int geomID) - { - PrimInfo pinfo(empty); - for (uint32_t primID=r.begin(); primIDpNext == nullptr) return true; - desc = (zet_base_desc_t_*) desc->pNext; - } - return false; - } - - struct ze_rtas_builder - { - ze_rtas_builder () { - } - - ~ze_rtas_builder() { - magick = 0x0; - } - - bool verify() const { - return magick == MAGICK; - } - - enum { MAGICK = 0x45FE67E1 }; - uint32_t magick = MAGICK; - }; - - ze_result_t validate(ze_rtas_builder_exp_handle_t hBuilder) - { - if (hBuilder == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; - - if (!((ze_rtas_builder*)hBuilder)->verify()) - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - - return ZE_RESULT_SUCCESS; - } - - struct ze_rtas_parallel_operation_t - { - ze_rtas_parallel_operation_t() { - } - - ~ze_rtas_parallel_operation_t() { - magick = 0x0; - } - - ze_result_t verify() const - { - if (magick != MAGICK) - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - - return ZE_RESULT_SUCCESS; - } - - enum { MAGICK = 0xE84567E1 }; - uint32_t magick = MAGICK; - std::atomic object_in_use = false; - ze_result_t errorCode = ZE_RESULT_SUCCESS; - tbb::task_group group; - }; - - ze_result_t validate(ze_rtas_parallel_operation_exp_handle_t hParallelOperation) - { - if (hParallelOperation == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; - - return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify(); - } - - ze_result_t validate(const ze_rtas_builder_exp_desc_t* pDescriptor) - { - if (pDescriptor == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - if (!checkDescChain((zet_base_desc_t_*)pDescriptor)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties) - { - if (pProperties == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - if (!checkDescChain((zet_base_desc_t_*)pProperties)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t validate(ze_rtas_format_exp_t rtasFormat) - { - if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t validate(const ze_rtas_builder_build_op_exp_desc_t* args) - { - /* check for valid pointers */ - if (args == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - /* check if input descriptor has proper type */ - if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* check valid pNext chain */ - if (!checkDescChain((zet_base_desc_t_*)args)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* check if acceleration structure format is supported */ - VALIDATE(args->rtasFormat); - - /* check for valid geometries array */ - if (args->ppGeometries == nullptr && args->numGeometries > 0) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - /* validate that number of geometries are in range */ - if (args->numGeometries > 0x00FFFFFF) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* validate build quality */ - if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* validate build flags */ - if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t validate(ze_rtas_builder_exp_properties_t* pProp) - { - /* check for valid pointers */ - if (pProp == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - /* check if return property has proper type */ - if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* check valid pNext chain */ - if (!checkDescChain((zet_base_desc_t_*)pProp)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t validate(ze_rtas_parallel_operation_exp_properties_t* pProperties) - { - /* check for valid pointer */ - if (pProperties == nullptr) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - /* check for proper property */ - if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - /* check valid pNext chain */ - if (!checkDescChain((zet_base_desc_t_*)pProperties)) - return ZE_RESULT_ERROR_INVALID_ENUMERATION; - - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder) - { - /* input validation */ - VALIDATE(hDriver); - VALIDATE(pDescriptor); - VALIDATE_PTR(phBuilder); - - *phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder(); - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder) - { - VALIDATE(hBuilder); - delete (ze_rtas_builder*) hBuilder; - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver, - const ze_rtas_format_exp_t accelFormat, - const ze_rtas_format_exp_t otherAccelFormat ) - { - /* input validation */ - VALIDATE(hDriver); - VALIDATE(accelFormat); - VALIDATE(otherAccelFormat); - - /* check if rtas formats are compatible */ - if (accelFormat == otherAccelFormat) - return ZE_RESULT_SUCCESS; - - /* report incompatible format */ - return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE; - } - - uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom) - { - switch (geom->geometryType) { - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1; - default : return 0; - }; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder, - const ze_rtas_builder_build_op_exp_desc_t* args, - ze_rtas_builder_exp_properties_t* pProp) - { - /* input validation */ - VALIDATE(hBuilder); - VALIDATE(args); - VALIDATE(pProp); - - const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries; - const size_t numGeometries = args->numGeometries; - - auto getSize = [&](uint32_t geomID) -> size_t { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - if (geom == nullptr) return 0; - return getNumPrimitives(geom); - }; - - auto getType = [&](unsigned int geomID) - { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - assert(geom); - switch (geom->geometryType) { - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE; - default: throw std::runtime_error("invalid geometry type"); - }; - }; - - /* query memory requirements from builder */ - size_t expectedBytes = 0; - size_t worstCaseBytes = 0; - size_t scratchBytes = 0; - QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes); - - /* fill return struct */ - pProp->flags = 0; - pProp->rtasBufferSizeBytesExpected = expectedBytes; - pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes; - pProp->scratchBufferSizeBytes = scratchBytes; - return ZE_RESULT_SUCCESS; - } - - ze_result_t zeRTASBuilderBuildExpBody(const ze_rtas_builder_build_op_exp_desc_t* args, - void *pScratchBuffer, size_t scratchBufferSizeBytes, - void *pRtasBuffer, size_t rtasBufferSizeBytes, - void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try - { - const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries; - const uint32_t numGeometries = args->numGeometries; - - /* verify input descriptors */ - parallel_for(numGeometries,[&](uint32_t geomID) { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - if (geom == nullptr) return; - - switch (geom->geometryType) { - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break; - default: throw std::runtime_error("invalid geometry type"); - }; - }); - - auto getSize = [&](uint32_t geomID) -> size_t { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - if (geom == nullptr) return 0; - return getNumPrimitives(geom); - }; - - auto getType = [&](unsigned int geomID) - { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - assert(geom); - switch (geom->geometryType) { - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL; - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE; - default: throw std::runtime_error("invalid geometry type"); - }; - }; - - auto createPrimRefArray = [&] (evector& prims, BBox1f time_range, const range& r, size_t k, unsigned int geomID) -> PrimInfo - { - const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; - assert(geom); - - switch (geom->geometryType) { - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray((ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID); - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray((ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID); - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray((ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID); - case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray((ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID); - default: throw std::runtime_error("invalid geometry type"); - }; - }; - - auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags { - return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE; - }; - - auto getTriangle = [&](unsigned int geomID, unsigned int primID) - { - const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID]; - assert(geom); - - const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); - if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); - if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); - if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); - - const Vec3f p0 = getVertex(geom,tri.v0); - const Vec3f p1 = getVertex(geom,tri.v1); - const Vec3f p2 = getVertex(geom,tri.v2); - if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle(); - if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle(); - if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle(); - - const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags); - return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask); - }; - - auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) { - const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID]; - assert(geom); - const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); - return Vec3(tri.v0,tri.v1,tri.v2); - }; - - auto getQuad = [&](unsigned int geomID, unsigned int primID) - { - const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID]; - assert(geom); - - const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID); - const Vec3f p0 = getVertex(geom,quad.v0); - const Vec3f p1 = getVertex(geom,quad.v1); - const Vec3f p2 = getVertex(geom,quad.v2); - const Vec3f p3 = getVertex(geom,quad.v3); - - const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags); - return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask); - }; - - auto getProcedural = [&](unsigned int geomID, unsigned int primID) { - const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID]; - assert(geom); - return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags - }; - - auto getInstance = [&](unsigned int geomID, unsigned int primID) - { - assert(geometries[geomID]); - assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE); - const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID]; - void* accel = geom->pAccelerationStructure; - const AffineSpace3fa local2world = getTransform(geom); - return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags - }; - - /* dispatch globals ptr for debugging purposes */ - void* dispatchGlobalsPtr = nullptr; -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - if (args->pNext) { - zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext; - if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC) { - ze_rtas_builder_build_op_debug_exp_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_exp_desc_t*) next; - dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr; - } - } -#endif - - bool verbose = false; - bool success = QBVH6BuilderSAH::build(numGeometries, nullptr, - getSize, getType, - createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance, - (char*)pRtasBuffer, rtasBufferSizeBytes, - pScratchBuffer, scratchBufferSizeBytes, - (BBox3f*) pBounds, pRtasBufferSizeBytes, - args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr); - if (!success) { - return ZE_RESULT_EXP_RTAS_BUILD_RETRY; - } - return ZE_RESULT_SUCCESS; - } - catch (std::exception& e) { - //std::cerr << "caught exception during BVH build: " << e.what() << std::endl; - return ZE_RESULT_ERROR_UNKNOWN; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder, - const ze_rtas_builder_build_op_exp_desc_t* args, - void *pScratchBuffer, size_t scratchBufferSizeBytes, - void *pRtasBuffer, size_t rtasBufferSizeBytes, - ze_rtas_parallel_operation_exp_handle_t hParallelOperation, - void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) - { - /* input validation */ - VALIDATE(hBuilder); - VALIDATE(args); - VALIDATE_PTR(pScratchBuffer); - VALIDATE_PTR(pRtasBuffer); - - /* if parallel operation is provided then execute using thread arena inside task group ... */ - if (hParallelOperation) - { - VALIDATE(hParallelOperation); - - ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; - - if (op->object_in_use.load()) - return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; - - op->object_in_use.store(true); - - g_arena.execute([&](){ op->group.run([=](){ - op->errorCode = zeRTASBuilderBuildExpBody(args, - pScratchBuffer, scratchBufferSizeBytes, - pRtasBuffer, rtasBufferSizeBytes, - pBuildUserPtr, pBounds, pRtasBufferSizeBytes); - }); - }); - return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED; - } - /* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */ - else - { - ze_result_t errorCode = ZE_RESULT_SUCCESS; - g_arena.execute([&](){ errorCode = zeRTASBuilderBuildExpBody(args, - pScratchBuffer, scratchBufferSizeBytes, - pRtasBuffer, rtasBufferSizeBytes, - pBuildUserPtr, pBounds, pRtasBufferSizeBytes); - }); - return errorCode; - } - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation) - { - /* input validation */ - VALIDATE(hDriver); - VALIDATE_PTR(phParallelOperation); - - /* create parallel operation object */ - *phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t(); - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ) - { - /* input validation */ - VALIDATE(hParallelOperation); - - /* delete parallel operation */ - delete (ze_rtas_parallel_operation_t*) hParallelOperation; - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ) - { - /* input validation */ - VALIDATE(hParallelOperation); - VALIDATE(pProperties); - - ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; - if (!op->object_in_use.load()) - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - - /* return properties */ - pProperties->flags = 0; - pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency(); - return ZE_RESULT_SUCCESS; - } - - RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation) - { - /* check for valid handle */ - VALIDATE(hParallelOperation); - - ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; - g_arena.execute([&](){ op->group.wait(); }); - op->object_in_use.store(false); // this is slighty too early - return op->errorCode; - } -} diff --git a/kernels/rthwif/rtbuild/rtbuild.h b/kernels/rthwif/rtbuild/rtbuild.h deleted file mode 100644 index 832b15eeb0..0000000000 --- a/kernels/rthwif/rtbuild/rtbuild.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2009-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../../level_zero/ze_api.h" - -#if !defined(ZE_RTAS_BUILDER_EXP_NAME) -#include "../../level_zero/ze_rtas.h" -#endif - -#include -#include - -#if defined(__cplusplus) -# define RTHWIF_API_EXTERN_C extern "C" -#else -# define RTHWIF_API_EXTERN_C -#endif - -#if defined(_WIN32) -#if defined(EMBREE_RTHWIF_STATIC_LIB) -# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C -# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C -#else -# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport) -# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport) -#endif -#else -# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C -# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default"))) -#endif - -typedef enum _ze_raytracing_accel_format_internal_t { - ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format - ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1 - ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2 - ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2 -} ze_raytracing_accel_format_internal_t; - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver, - const ze_rtas_format_exp_t accelFormat, - const ze_rtas_format_exp_t otherAccelFormat); -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder, - const ze_rtas_builder_build_op_exp_desc_t* args, - ze_rtas_builder_exp_properties_t* pProp); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder, - const ze_rtas_builder_build_op_exp_desc_t* args, - void *pScratchBuffer, size_t scratchBufferSizeBytes, - void *pRtasBuffer, size_t rtasBufferSizeBytes, - ze_rtas_parallel_operation_exp_handle_t hParallelOperation, - void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ); - -RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation); - diff --git a/kernels/rthwif/rtbuild/statistics.cpp b/kernels/rthwif/rtbuild/statistics.cpp deleted file mode 100644 index dd11dc46dc..0000000000 --- a/kernels/rthwif/rtbuild/statistics.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "statistics.h" - -namespace embree -{ - class RestoreStreamState - { - public: - RestoreStreamState(std::ostream& iostream) - : iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) { - } - - ~RestoreStreamState() { - iostream.flags(flags); - iostream.precision(precision); - } - - private: - std::ostream& iostream; - std::ios::fmtflags flags; - std::streamsize precision; - }; - - double ratio(double a, double b) - { - if (b == 0.0) return 0.0f; - else return a/b; - } - - double percent(double a, double b) { - return 100.0*ratio(a,b); - } - - double ratio(size_t a, size_t b) { - return ratio(double(a), double(b)); - } - double percent(size_t a, size_t b) { - return percent(double(a), double(b)); - } - - void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const - { - RestoreStreamState iostate(cout); - cout << std::setw(7) << numNodes << " "; - cout << std::setw(7) << std::setprecision(3) << sah(); - cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% "; - cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB "; - cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% "; - cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " "; - cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " "; - cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% "; - cout << std::endl; - } - - void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const - { - RestoreStreamState iostate(cout); - size_t N = blocks ? numBlocks : numLeaves; - cout << std::setw(7) << N << " "; - cout << std::setw(7) << std::setprecision(3) << sah(); - cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% "; - cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB "; - cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% "; - cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " "; - cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " "; - cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " "; - cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% "; - cout << std::endl; - } - - void BVHStatistics::print (std::ostream& cout) const - { - RestoreStreamState iostate(cout); - cout.setf(std::ios::fixed, std::ios::floatfield); - cout.fill(' '); - - double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH; - size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes(); - size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves; - size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed; - - cout << std::endl; - cout << "BVH statistics:" << std::endl; - cout << "---------------" << std::endl; - cout << " numScenePrimitives = " << numScenePrimitives << std::endl; - cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl; - cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl; - cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl; - cout << " numBVHPrimitives = " << totalPrimitives << std::endl; - cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl; - cout << std::endl; - - cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl; - cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl; - cout << " total : "; - cout << std::setw(7) << totalNodes << " "; - cout << std::setw(7) << std::setprecision(3) << totalSAH; - cout << " 100.00% "; - cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB "; - cout << " 100.00% "; - cout << " 100.00% "; - cout << " "; - cout << " "; - cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl; - - LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf; - cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives); - cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives); - cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives); - cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives); - cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true); - cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives); - } - - void BVHStatistics::print_raw(std::ostream& cout) const - { - RestoreStreamState iostate(cout); - size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed; - cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl; - - cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl; - cout << "bvh_internal_num = " << internalNode.numNodes << std::endl; - cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl; - cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl; - cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl; - - cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl; - cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl; - cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl; - cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl; - cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl; - cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl; - - cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl; - cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl; - cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl; - cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl; - cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl; - cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl; - - cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl; - cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl; - cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl; - cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl; - cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl; - cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl; - } -} diff --git a/kernels/rthwif/rtbuild/statistics.h b/kernels/rthwif/rtbuild/statistics.h deleted file mode 100644 index b66d1a9ca2..0000000000 --- a/kernels/rthwif/rtbuild/statistics.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#if defined(ZE_RAYTRACING) -#include "sys/platform.h" -#else -#include "../../../common/sys/platform.h" -#endif - -namespace embree -{ - struct BVHStatistics - { - struct NodeStat - { - NodeStat ( double nodeSAH = 0, - size_t numNodes = 0, - size_t numChildrenUsed = 0, - size_t numChildrenTotal = 0, - size_t numBytes = 0) - : nodeSAH(nodeSAH), - numNodes(numNodes), - numChildrenUsed(numChildrenUsed), - numChildrenTotal(numChildrenTotal), - numBytes(numBytes) {} - - double sah() const { return nodeSAH; } - size_t bytes() const { return numBytes; } - size_t size() const { return numNodes; } - - double fillRateNom () const { return double(numChildrenUsed); } - double fillRateDen () const { return double(numChildrenTotal); } - double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; } - - friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b) - { - return NodeStat(a.nodeSAH + b.nodeSAH, - a.numNodes+b.numNodes, - a.numChildrenUsed+b.numChildrenUsed, - a.numChildrenTotal+b.numChildrenTotal, - a.numBytes+b.numBytes); - } - - void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const; - - public: - double nodeSAH; - size_t numNodes; - size_t numChildrenUsed; - size_t numChildrenTotal; - size_t numBytes; - }; - - struct LeafStat - { - LeafStat(double leafSAH = 0.0f, - size_t numLeaves = 0, - size_t numBlocks = 0, - size_t numPrimsUsed = 0, - size_t numPrimsTotal = 0, - size_t numBytesUsed = 0, - size_t numBytesTotal = 0) - : leafSAH(leafSAH), - numLeaves(numLeaves), - numBlocks(numBlocks), - numPrimsUsed(numPrimsUsed), - numPrimsTotal(numPrimsTotal), - numBytesUsed(numBytesUsed), - numBytesTotal(numBytesTotal) {} - - double sah() const { return leafSAH; } - size_t bytes() const { return numBytesTotal; } - size_t size() const { return numLeaves; } - - double fillRateNom () const { return double(numPrimsUsed); } - double fillRateDen () const { return double(numPrimsTotal); } - double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; } - - friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b) - { - return LeafStat(a.leafSAH + b.leafSAH, - a.numLeaves+b.numLeaves, - a.numBlocks+b.numBlocks, - a.numPrimsUsed+b.numPrimsUsed, - a.numPrimsTotal+b.numPrimsTotal, - a.numBytesUsed+b.numBytesUsed, - a.numBytesTotal+b.numBytesTotal); - } - - void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const; - - public: - double leafSAH; //!< SAH of the leaves only - size_t numLeaves; //!< Number of leaf nodes. - size_t numBlocks; //!< Number of blocks referenced - size_t numPrimsUsed; //!< Number of active primitives - size_t numPrimsTotal; //!< Number of active and inactive primitives - size_t numBytesUsed; //!< Number of used bytes - size_t numBytesTotal; //!< Number of total bytes of leaves. - }; - - BVHStatistics () - : numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {} - - void print (std::ostream& cout) const; - void print_raw(std::ostream& cout) const; - - size_t numScenePrimitives; - size_t numBuildPrimitives; - size_t numBuildPrimitivesPostSplit; - NodeStat internalNode; - LeafStat quadLeaf; - LeafStat proceduralLeaf; - LeafStat instanceLeaf; - }; -} diff --git a/kernels/rthwif/rttrace/rttrace_internal.h b/kernels/rthwif/rttrace/rttrace_internal.h deleted file mode 100644 index 9e55b5956f..0000000000 --- a/kernels/rthwif/rttrace/rttrace_internal.h +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -# define MemRay MemRayV1 -# define MemHit MemHitV1 -# define QuadLeaf QuadLeafV1 -# define InstanceLeaf InstanceLeafV1 - -#include - -enum TraceRayCtrl -{ - TRACE_RAY_INITIAL = 0, // Initializes hit and initializes traversal state - TRACE_RAY_INSTANCE = 1, // Loads committed hit and initializes traversal state - TRACE_RAY_COMMIT = 2, // Loads potential hit and loads traversal state - TRACE_RAY_CONTINUE = 3, // Loads committed hit and loads traversal state - TRACE_RAY_DONE = 256, // for internal use only -}; - -typedef __attribute__((opencl_global)) struct rtglobals_opaque_t* rtglobals_t; -typedef __attribute__((opencl_private)) struct rtfence_opaque_t* rtfence_t; - -#if defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION) - -SYCL_EXTERNAL extern "C" __attribute__((opencl_global)) void* intel_get_implicit_dispatch_globals(); -SYCL_EXTERNAL extern "C" void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals); -SYCL_EXTERNAL extern "C" void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals); -SYCL_EXTERNAL extern "C" void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals); -SYCL_EXTERNAL extern "C" rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl); -SYCL_EXTERNAL extern "C" void intel_rt_sync(rtfence_t fence); - -#else - -inline void* intel_get_implicit_dispatch_globals() { return nullptr; } -inline void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } -inline void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } -inline void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } -inline rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl) { return nullptr; } -inline void intel_rt_sync(rtfence_t fence) {} - -#endif - -enum NodeType -{ - NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type - NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children - NODE_TYPE_INSTANCE = 0x1, // instance leaf - NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf - NODE_TYPE_QUAD = 0x4, // quad leaf - NODE_TYPE_INVALID = 0x7 // indicates invalid node -}; - -struct __attribute__ ((packed,aligned(32))) MemRayV1 -{ - void init(intel_ray_desc_t ray, uint64_t rootNodePtr_i) - { - org[0] = ray.origin.x; - org[1] = ray.origin.y; - org[2] = ray.origin.z; - dir[0] = ray.direction.x; - dir[1] = ray.direction.y; - dir[2] = ray.direction.z; - tnear = ray.tmin; - tfar = ray.tmax; - rootNodePtr = rootNodePtr_i; - rayFlags = ray.flags; - hitGroupSRBasePtr = 0; - hitGroupSRStride = 0; - missSRPtr = 0; - pad0 = 0; - shaderIndexMultiplier = 0; - instLeafPtr = 0; - rayMask = ray.mask; - pad1 = 0; - } - - // 32 B - float org[3]; - float dir[3]; - float tnear; - float tfar; - - // 32 B - struct { // FIXME: removing these anonymous structs triggers IGC bug - uint64_t rootNodePtr : 48; // root node to start traversal at - uint64_t rayFlags : 16; // ray flags (see RayFlag structure) - }; - - struct { - uint64_t hitGroupSRBasePtr : 48; // base of hit group shader record array (16-bytes alignment) - uint64_t hitGroupSRStride : 16; // stride of hit group shader record array (16-bytes alignment) - }; - - struct { - uint64_t missSRPtr : 48; // pointer to miss shader record to invoke on a miss (8-bytes alignment) - uint64_t pad0 : 8; // padding byte (has to be zero) - uint64_t shaderIndexMultiplier : 8; // shader index multiplier - }; - - struct { - uint64_t instLeafPtr : 48; // the pointer to instance leaf in case we traverse an instance (64-bytes alignment) - uint64_t rayMask : 8; // ray mask used for ray masking - uint64_t pad1 : 8; // padding byte (has to be zero) - }; -}; - -struct __attribute__ ((packed,aligned(32))) MemHitV1 -{ - inline float getT() const { - return ft; - } - - inline void setT(float t) { - ft = t; - } - - inline float getU() const { - return fu; - } - - inline void setU(float u) { - fu = u; - } - - inline float getV() const { - return fv; - } - - inline void setV(float v) { - fv = v; - } - - inline void* getPrimLeafPtr() { - return sycl::global_ptr((void*)(uint64_t(primLeafPtr)*64)).get(); - } - - inline void* getInstanceLeafPtr() { - return sycl::global_ptr((void*)(uint64_t(instLeafPtr)*64)).get(); - } - -public: - float ft; // hit distance of current hit (or initial traversal distance) - float fu,fv; // barycentric hit coordinates - - union { - struct { - uint32_t primIndexDelta : 16; // prim index delta for compressed meshlets and quads - uint32_t valid : 1; // set if there is a hit - uint32_t leafType : 3; // type of node primLeafPtr is pointing to - uint32_t primLeafIndex : 4; // index of the hit primitive inside the leaf - uint32_t bvhLevel : 3; // the instancing level at which the hit occured - uint32_t frontFace : 1; // whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders) - uint32_t done : 1; // used in sync mode to indicate that traversal is done - uint32_t pad0 : 3; // unused bits - }; - uint32_t data; - }; - - struct { // FIXME: removing these anonymous structs triggers IGC bug - int64_t primLeafPtr : 42; // pointer to BVH leaf node (multiple of 64 bytes) - uint64_t hitGroupRecPtr0 : 22; // LSB of hit group record of the hit triangle (multiple of 16 bytes) - }; - - struct { - int64_t instLeafPtr : 42; // pointer to BVH instance leaf node (in multiple of 64 bytes) - uint64_t hitGroupRecPtr1 : 22; // MSB of hit group record of the hit triangle (multiple of 16 bytes) - }; - - void clear(bool _done, bool _valid) { - //*(sycl::int8*) this = sycl::int8(0x7F800000 /* INFINITY */, 0, 0, (_done ? 0x10000000 : 0) | (_valid ? 0x10000), 0, 0, 0, 0); - ft = fu = fv = 0.0f; - data = 0; - done = _done ? 1 : 0; - valid = _valid ? 1 : 0; - } -}; - -struct __attribute__ ((packed,aligned(64))) RTStack -{ - union { - struct { - struct MemHit committedHit; // stores committed hit - struct MemHit potentialHit; // stores potential hit that is passed to any hit shader - }; - struct MemHit hit[2]; // committedHit, potentialHit - }; - struct MemRay ray[2]; - char travStack[32*2]; -}; - -struct __attribute__ ((packed)) HWAccel -{ - uint64_t reserved; - float bounds[2][3]; // bounding box of the BVH - uint32_t reserved0[8]; - uint32_t numTimeSegments; - uint32_t reserved1[13]; - uint64_t dispatchGlobalsPtr; -}; - -struct __attribute__ ((packed,aligned(8))) PrimLeafDesc -{ - struct { - uint32_t shaderIndex : 24; // shader index used for shader record calculations - uint32_t geomMask : 8; // geometry mask used for ray masking - }; - - struct { - uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene - uint32_t type : 1; // enable/disable culling for procedurals and instances - uint32_t geomFlags : 2; // geometry flags of this geometry - }; -}; - -struct __attribute__ ((packed,aligned(64))) QuadLeafV1 -{ - struct PrimLeafDesc leafDesc; - unsigned int primIndex0; - struct { - uint32_t primIndex1Delta : 16; // delta encoded primitive index of second triangle - uint32_t j0 : 2; // specifies first vertex of second triangle - uint32_t j1 : 2; // specified second vertex of second triangle - uint32_t j2 : 2; // specified third vertex of second triangle - uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list - uint32_t pad : 9; // unused bits - }; - float v[4][3]; -}; - -struct __attribute__ ((packed,aligned(64))) ProceduralLeaf -{ - static const constexpr uint32_t N = 13; - - struct PrimLeafDesc leafDesc; // leaf header identifying the geometry - struct { - uint32_t numPrimitives : 4; // number of stored primitives - uint32_t pad : 32-4-N; - uint32_t last : N; // bit vector with a last bit per primitive - }; - uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf -}; - -struct __attribute__ ((packed,aligned(64))) InstanceLeafV1 -{ - /* first 64 bytes accessed during traversal by hardware */ - struct Part0 - { - public: - struct { - uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing - uint32_t geomMask : 8; // geometry mask used for ray masking - }; - - struct { - uint32_t instanceContributionToHitGroupIndex : 24; - uint32_t pad0 : 5; - - /* the following two entries are only used for procedural instances */ - uint32_t type : 1; // enables/disables opaque culling - uint32_t geomFlags : 2; // unused for instances - }; - - struct { - uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object - uint64_t instFlags : 8; // flags for the instance (see InstanceFlags) - uint64_t pad1 : 8; // unused bits - }; - - float world2obj_vx[3]; // 1st column of Worl2Obj transform - float world2obj_vy[3]; // 2nd column of Worl2Obj transform - float world2obj_vz[3]; // 3rd column of Worl2Obj transform - float obj2world_p[3]; // translation of Obj2World transform (on purpose in first 64 bytes) - } part0; - - /* second 64 bytes accessed during shading */ - struct Part1 - { - struct { - uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too - uint64_t pad : 16; // unused bits - }; - - uint32_t instanceID; // user defined value per DXR spec - uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) - - float obj2world_vx[3]; // 1st column of Obj2World transform - float obj2world_vy[3]; // 2nd column of Obj2World transform - float obj2world_vz[3]; // 3rd column of Obj2World transform - float world2obj_p[3]; // translation of World2Obj transform - } part1; -}; diff --git a/kernels/rthwif/rttrace/rttrace_validation.cpp b/kernels/rthwif/rttrace/rttrace_validation.cpp deleted file mode 100644 index c4307ca035..0000000000 --- a/kernels/rthwif/rttrace/rttrace_validation.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "rttrace_validation.h" -#include - -#define sizeof_QBVH6_InternalNode6 64 -#define QBVH6_rootNodeOffset 128 - - /*struct rayquery_impl_t { - rtfence_t fence; - rtglobals_t dispatchGlobalsPtr; - struct RTStack* rtStack; - TraceRayCtrl ctrl; - unsigned int bvh_level; - };*/ - -void use_rthwif_production() -{ -} - -SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag() -{ - return intel_raytracing_ext_flag_ray_query; -} - -SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i ) -{ - unsigned int bvh_level = 0; - - //intel_raytracing_acceleration_structure_t* accel_i = sycl::global_ptr(_accel_i).get(); - HWAccel* accel = (HWAccel*)accel_i; -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - rtglobals_t dispatchGlobalsPtr = (rtglobals_t) accel->dispatchGlobalsPtr; -#else - rtglobals_t dispatchGlobalsPtr = (rtglobals_t) intel_get_implicit_dispatch_globals(); -#endif - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)intel_get_rt_stack( (rtglobals_t)dispatchGlobalsPtr )).get(); - - /* init ray */ - rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset); - - rtStack->committedHit.setT(INFINITY); - rtStack->committedHit.setU(0.0f); - rtStack->committedHit.setV(0.0f); - rtStack->committedHit.data = 0; - - rtStack->potentialHit.setT(INFINITY); - rtStack->potentialHit.setU(0.0f); - rtStack->potentialHit.setV(0.0f); - rtStack->potentialHit.data = 0; - rtStack->potentialHit.done = 1; - rtStack->potentialHit.valid = 1; - - return { nullptr, (void*) dispatchGlobalsPtr, rtStack, TRACE_RAY_INITIAL, bvh_level }; -} - -SYCL_EXTERNAL void intel_ray_query_forward_ray( intel_ray_query_t& query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i) -{ - HWAccel* accel = (HWAccel*)accel_i; - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - /* init ray */ - unsigned int bvh_level = query.bvh_level+1; - rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset); - query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_INSTANCE, bvh_level }; -} - -SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( intel_ray_query_t& query ) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - unsigned int bvh_level = query.bvh_level; - unsigned int rflags = rtStack->ray[bvh_level].rayFlags; - if (rflags & intel_ray_flags_accept_first_hit_and_end_search) { - rtStack->committedHit = rtStack->potentialHit; - rtStack->committedHit.valid = 1; - query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_DONE, bvh_level }; - } else { - rtStack->potentialHit.valid = 1; // FIXME: is this required? - query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_COMMIT, bvh_level }; - } -} - -SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( intel_ray_query_t& query, float override_hit_distance, intel_float2 override_uv ) -{ - //struct RTStack* rtStack = (struct RTStack*) query.opaque2; - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - rtStack->potentialHit.setT(override_hit_distance); - rtStack->potentialHit.setU(override_uv.x); - rtStack->potentialHit.setV(override_uv.y); - intel_ray_query_commit_potential_hit(query); -} - -SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query ) -{ - rtglobals_t dispatchGlobalsPtr = (rtglobals_t) query.opaque1; - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - rtStack->potentialHit.done = 1; - rtStack->potentialHit.valid = 1; - - if (query.ctrl == TRACE_RAY_DONE) return; - rtfence_t fence = intel_dispatch_trace_ray_query(dispatchGlobalsPtr,query.bvh_level,query.ctrl); - query = { (void*) fence, query.opaque1, query.opaque2, TRACE_RAY_INITIAL, 0 }; -} - -SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query ) -{ - intel_rt_sync((rtfence_t)query.opaque0); - - /* continue is default behaviour */ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - unsigned int bvh_level = rtStack->potentialHit.bvhLevel; - query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level }; -} - -SYCL_EXTERNAL void intel_sync_ray_query( intel_ray_query_t& query ) -{ - intel_rt_sync((rtfence_t)query.opaque0); - - /* continue is default behaviour */ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - unsigned int bvh_level = rtStack->potentialHit.bvhLevel; - query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level }; -} - -SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query ) -{ - intel_ray_query_sync(query); - query = { nullptr, nullptr, nullptr, TRACE_RAY_INITIAL, 0 }; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ) { - return query.hit(hit_type).bvhLevel; -} - -SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ) { - return query.hit(hit_type).getT(); -} - -SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ) { - return { query.hit(hit_type).getU(), query.hit(hit_type).getV() }; -} - -SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ) { - return query.hit(hit_type).frontFace; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - struct PrimLeafDesc* __restrict leaf = (struct PrimLeafDesc*)query.hit(hit_type).getPrimLeafPtr(); - return leaf->geomIndex; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - void* __restrict leaf = hit.getPrimLeafPtr(); - - if (hit.leafType == NODE_TYPE_QUAD) - return ((QuadLeaf*)leaf)->primIndex0 + hit.primIndexDelta; - else - return ((ProceduralLeaf*)leaf)->_primIndex[hit.primLeafIndex]; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - QuadLeaf* __restrict leaf = (QuadLeaf*) hit.getPrimLeafPtr(); - - return leaf->primIndex0 + hit.primIndexDelta; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - ProceduralLeaf* __restrict leaf = (ProceduralLeaf*) hit.getPrimLeafPtr(); - return leaf->_primIndex[hit.primLeafIndex]; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); - if (leaf == nullptr) return -1; - return leaf->part1.instanceIndex; -} - -SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); - if (leaf == nullptr) return -1; - return leaf->part1.instanceID; -} - -SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); - if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } }; - return { - { leaf->part0.world2obj_vx[0], leaf->part0.world2obj_vx[1], leaf->part0.world2obj_vx[2] }, - { leaf->part0.world2obj_vy[0], leaf->part0.world2obj_vy[1], leaf->part0.world2obj_vy[2] }, - { leaf->part0.world2obj_vz[0], leaf->part0.world2obj_vz[1], leaf->part0.world2obj_vz[2] }, - { leaf->part1.world2obj_p [0], leaf->part1.world2obj_p [1], leaf->part1.world2obj_p [2] } - }; -} - -SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type ) -{ - MemHit& hit = query.hit(hit_type); - InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); - if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } }; - return { - { leaf->part1.obj2world_vx[0], leaf->part1.obj2world_vx[1], leaf->part1.obj2world_vx[2] }, - { leaf->part1.obj2world_vy[0], leaf->part1.obj2world_vy[1], leaf->part1.obj2world_vy[2] }, - { leaf->part1.obj2world_vz[0], leaf->part1.obj2world_vz[1], leaf->part1.obj2world_vz[2] }, - { leaf->part0.obj2world_p [0], leaf->part0.obj2world_p [1], leaf->part0.obj2world_p [2] } - }; -} - -SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 verts_out[3], intel_hit_type_t hit_type ) -{ - const QuadLeaf* __restrict leaf = (const QuadLeaf*) query.hit(hit_type).getPrimLeafPtr(); - - unsigned int j0 = 0, j1 = 1, j2 = 2; - if (query.hit(hit_type).primLeafIndex != 0) - { - j0 = leaf->j0; - j1 = leaf->j1; - j2 = leaf->j2; - } - - verts_out[0] = { leaf->v[j0][0], leaf->v[j0][1], leaf->v[j0][2] }; - verts_out[1] = { leaf->v[j1][0], leaf->v[j1][1], leaf->v[j1][2] }; - verts_out[2] = { leaf->v[j2][0], leaf->v[j2][1], leaf->v[j2][2] }; -} - -SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - - MemRay& ray = rtStack->ray[bvh_level]; - return { ray.org[0], ray.org[1], ray.org[2] }; -} - -SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - MemRay& ray = rtStack->ray[bvh_level]; - return { ray.dir[0], ray.dir[1], ray.dir[2] }; -} - -SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - return rtStack->ray[bvh_level].tnear; -} - -SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - return (intel_ray_flags_t) rtStack->ray[bvh_level].rayFlags; -} - -SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level) -{ - struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); - return rtStack->ray[bvh_level].rayMask; -} - -SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ) { - return query.hit(intel_hit_type_potential_hit).done; -} - -SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type) { - return query.hit(hit_type).leafType == NODE_TYPE_QUAD ? intel_candidate_type_triangle : intel_candidate_type_procedural; -} - -SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ) { - return query.hit(intel_hit_type_committed_hit).valid; -} - diff --git a/kernels/rthwif/rttrace/rttrace_validation.h b/kernels/rthwif/rttrace/rttrace_validation.h deleted file mode 100644 index 429a273655..0000000000 --- a/kernels/rthwif/rttrace/rttrace_validation.h +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#pragma clang diagnostic ignored "-W#pragma-messages" - -#include - -#pragma clang diagnostic pop - -enum intel_ray_flags_t -{ - intel_ray_flags_none = 0x00, - intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation) - intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader) - intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays) - intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader - intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit - intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit - intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit - intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit - intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses. - intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders -}; - -enum intel_hit_type_t -{ - intel_hit_type_committed_hit = 0, - intel_hit_type_potential_hit = 1, -}; - -enum intel_raytracing_ext_flag_t -{ - intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported -}; - -struct intel_float2 -{ - float x, y; - - intel_float2() {} - - intel_float2(float x, float y) - : x(x), y(y) {} - - intel_float2(sycl::float2 v) - : x(v.x()), y(v.y()) {} - - operator sycl::float2() { - return sycl::float2(x,y); - } -}; - -struct intel_float3 -{ - float x, y, z; - - intel_float3() {} - - intel_float3(float x, float y, float z) - : x(x), y(y), z(z) {} - - intel_float3(sycl::float3 v) - : x(v.x()), y(v.y()), z(v.z()) {} - - operator sycl::float3() { - return sycl::float3(x,y,z); - } -}; - -struct intel_float4x3 { - intel_float3 vx, vy, vz, p; -}; - -struct intel_ray_desc_t -{ - intel_float3 origin; - intel_float3 direction; - float tmin; - float tmax; - unsigned int mask; - intel_ray_flags_t flags; -}; - -#include "rttrace_internal.h" - -// opaque types -struct intel_ray_query_t { - void* opaque0; void* opaque1; void* opaque2; uint32_t ctrl; uint32_t bvh_level; - MemHit& hit(intel_hit_type_t ty) { - struct RTStack* rtStack = (struct RTStack*) opaque2; - return rtStack->hit[ty]; - } -}; -typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t; - -// check supported ray tracing features -SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag(); - -// initializes a ray query -SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init( - intel_ray_desc_t ray, - intel_raytracing_acceleration_structure_t accel -); - -// setup for instance traversal using a transformed ray and bottom-level AS -SYCL_EXTERNAL void intel_ray_query_forward_ray( - intel_ray_query_t& query, - intel_ray_desc_t ray, - intel_raytracing_acceleration_structure_t accel -); - -// commit the potential hit -SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( - intel_ray_query_t& query -); - -// commit the potential hit and override hit distance and UVs -SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( - intel_ray_query_t& query, - float override_hit_distance, - intel_float2 override_uv -); - -// start traversal of a ray query -SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query ); - -// synchronize rayquery execution. If a ray was dispatched, -// This must be called prior to calling any of the accessors below. -SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query ); - -// signal that a ray query will not be used further. This is the moral equaivalent of a delete -// this function does an implicit sync -SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query ); - -// read hit information during shader execution -SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for quad leaves -SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for procedural leaves -SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type ); -SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type ); - -// fetch triangle vertices for a hit -SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ); - -// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline -// during any-hit or intersection shader execution. -SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level ); -SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level ); -SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level ); -SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level ); -SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level ); - -// if traversal returns one can test if a triangle or procedural is hit -enum intel_candidate_type_t -{ - intel_candidate_type_triangle, - intel_candidate_type_procedural -}; - -SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type ); - -// test whether traversal has terminated. If false, the ray has reached -// a procedural leaf or a non-opaque triangle leaf, and requires shader processing -SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ); - -// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader -SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ); diff --git a/kernels/rthwif/testing/CMakeLists.txt b/kernels/rthwif/testing/CMakeLists.txt deleted file mode 100644 index efaad9f6d4..0000000000 --- a/kernels/rthwif/testing/CMakeLists.txt +++ /dev/null @@ -1,89 +0,0 @@ -## Copyright 2009-2022 Intel Corporation -## SPDX-License-Identifier: Apache-2.0 - -#PROJECT(rthwif_testing) -#CMAKE_MINIMUM_REQUIRED(VERSION 3.5.0) - -SET(CMAKE_CXX_STANDARD 17) - -IF (NOT WIN32) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries -ENDIF() - -IF (NOT DEFINED EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS) - OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON) -ENDIF() - -IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API) - OPTION(EMBREE_SYCL_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF) -ENDIF() - -IF (EMBREE_SYCL_RT_VALIDATION_API) - ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API") -ENDIF() - -IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS) - ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS") -ENDIF() - -IF (EMBREE_SYCL_RT_SIMULATION) - SET(RT_SIM_LIBRARY rtcore) -ENDIF() - -ADD_EXECUTABLE(embree_rthwif_cornell_box rthwif_cornell_box.cpp) -TARGET_LINK_LIBRARIES(embree_rthwif_cornell_box sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper ${EMBREE_RTHWIF_SYCL}) -SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT") -SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") -INSTALL(TARGETS embree_rthwif_cornell_box DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples) -SIGN_TARGET(embree_rthwif_cornell_box) - -ADD_EXECUTABLE(embree_rthwif_test rthwif_test.cpp) -TARGET_LINK_LIBRARIES(embree_rthwif_test sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper ${EMBREE_RTHWIF_SYCL}) -SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT") -SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") -INSTALL(TARGETS embree_rthwif_test DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples) -SIGN_TARGET(embree_rthwif_test) - -IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API OR EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS) - - IF (DEFINED EMBREE_MODEL_DIR) - SET(CORNELL_BOX_REFERENCE "${EMBREE_MODEL_DIR}/reference/cornell_box_reference.tga") - ELSE() - SET(CORNELL_BOX_REFERENCE "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga") - ENDIF() - - ADD_TEST(NAME rthwif_cornell_box - COMMAND embree_rthwif_cornell_box --compare "${CORNELL_BOX_REFERENCE}" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") -ENDIF() - -ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_expected) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_expected) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_expected) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_expected) - -ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_triangles embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_triangles) -ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_procedurals embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_procedurals) - -ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_worst_case) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_worst_case) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_worst_case) -ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_worst_case) - -ADD_EMBREE_TEST_ECS(rthwif_test_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-committed-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-potential-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-commit) -ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-reject) -ADD_EMBREE_TEST_ECS(rthwif_test_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --procedurals-committed-hit) - -ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-committed-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-potential-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-commit) -ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-reject) -ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --procedurals-committed-hit) - -ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-committed-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-potential-hit) -ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-commit) -ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-reject) -ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --procedurals-committed-hit) diff --git a/kernels/rthwif/testing/cornell_box_reference.tga b/kernels/rthwif/testing/cornell_box_reference.tga deleted file mode 100644 index b4b8fefbd1..0000000000 Binary files a/kernels/rthwif/testing/cornell_box_reference.tga and /dev/null differ diff --git a/kernels/rthwif/testing/rthwif_cornell_box.cpp b/kernels/rthwif/testing/rthwif_cornell_box.cpp deleted file mode 100644 index 1f088fa250..0000000000 --- a/kernels/rthwif/testing/rthwif_cornell_box.cpp +++ /dev/null @@ -1,630 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include -#include "tbb/tbb.h" - -#include "../rttrace/rttrace.h" - -#include - -#include -#include -#include - -void* dispatchGlobalsPtr = nullptr; - -static uint32_t global_width = 512; -static uint32_t global_height = 512; - -void exception_handler(sycl::exception_list exceptions) -{ - for (std::exception_ptr const& e : exceptions) { - try { - std::rethrow_exception(e); - } catch(sycl::exception const& e) { - std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; - } - } -}; - -inline void fwrite_uchar (unsigned char v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); } -inline void fwrite_ushort(unsigned short v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); } - -void storeTga(uint32_t* pixels, uint32_t width, uint32_t height, const std::string& fileName) try -{ - std::fstream file; - file.exceptions (std::fstream::failbit | std::fstream::badbit); - file.open (fileName.c_str(), std::fstream::out | std::fstream::binary); - - fwrite_uchar(0x00, file); - fwrite_uchar(0x00, file); - fwrite_uchar(0x02, file); - fwrite_ushort(0x0000, file); - fwrite_ushort(0x0000, file); - fwrite_uchar(0x00, file); - fwrite_ushort(0x0000, file); - fwrite_ushort(0x0000, file); - fwrite_ushort((unsigned short)width , file); - fwrite_ushort((unsigned short)height, file); - fwrite_uchar(0x18, file); - fwrite_uchar(0x20, file); - - for (size_t y=0; y>0)&0xFF), file); - fwrite_uchar((unsigned char)((c>>8)&0xFF), file); - fwrite_uchar((unsigned char)((c>>16)&0xFF), file); - } - } -} -catch (std::exception const& e) { - std::cout << "Error: Cannot write file " << fileName << std::endl; - throw; -} - -std::vector readFile(const std::string& fileName) try -{ - std::fstream file; - file.exceptions (std::fstream::failbit | std::fstream::badbit); - file.open (fileName.c_str(), std::fstream::in | std::fstream::binary); - - file.seekg (0, std::ios::end); - std::streampos size = file.tellg(); - std::vector data(size); - file.seekg (0, std::ios::beg); - file.read ((char*)data.data(), size); - file.close(); - - return data; -} -catch (std::exception const& e) { - std::cout << "Error: Cannot read file " << fileName << std::endl; - throw; -} - -size_t compareTga(const std::string& fileNameA, const std::string& fileNameB) -{ - const std::vector dataA = readFile(fileNameA); - const std::vector dataB = readFile(fileNameB); - if (dataA.size() != dataB.size()) - return false; - - size_t diff = 0; - for (int i=0; i= 3) diff+=100; - } - return diff; -} - -/* Properly allocates an acceleration structure buffer using ze_raytracing_mem_alloc_ext_desc_t property. */ -void* alloc_accel_buffer(size_t bytes, sycl::device device, sycl::context context) -{ - ze_context_handle_t hContext = sycl::get_native(context); - ze_device_handle_t hDevice = sycl::get_native(device); - - ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES }; - ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp }; - ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp ); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDeviceGetProperties failed"); - - ze_raytracing_mem_alloc_ext_desc_t rt_desc; - rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC; - rt_desc.pNext = nullptr; - rt_desc.flags = 0; - - ze_device_mem_alloc_desc_t device_desc; - device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; - device_desc.pNext = &rt_desc; - device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED; - device_desc.ordinal = 0; - - ze_host_mem_alloc_desc_t host_desc; - host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; - host_desc.pNext = nullptr; - host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED; - - void* ptr = nullptr; - ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("acceleration buffer allocation failed"); - - return ptr; -} - -void free_accel_buffer(void* ptr, sycl::context context) -{ - ze_context_handle_t hContext = sycl::get_native(context); - ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("acceleration buffer free failed"); -} - - -/* dispatch globals allocation is for debugging only */ - -enum Flags : uint32_t { - FLAGS_NONE, - DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise < -}; - -struct DispatchGlobals -{ - uint64_t rtMemBasePtr; // base address of the allocated stack memory - uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0 - uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks - uint32_t numDSSRTStacks : 16; // number of stacks per DSS - uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16 - unsigned _reserved_mbz : 12; - uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...) - Flags flags; // per context control flags -}; - -void* allocDispatchGlobals(sycl::device device, sycl::context context) -{ - size_t maxBVHLevels = 2; //RTC_MAX_INSTANCE_LEVEL_COUNT+1; - - size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64; - size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC - size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes; - - void* dispatchGlobalsPtr = alloc_accel_buffer(dispatchGlobalSize,device,context); - memset(dispatchGlobalsPtr, 0, dispatchGlobalSize); - - DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr; - dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize; - dg->callStackHandlerKSP = 0; - dg->asyncStackSize = 0; - dg->numDSSRTStacks = 0; - dg->syncRayQueryCount = 0; - dg->_reserved_mbz = 0; - dg->maxBVHLevels = maxBVHLevels; - dg->flags = DEPTH_TEST_LESS_EQUAL; - - return dispatchGlobalsPtr; -} - -/* vertex indices for cornell_box model */ -ze_rtas_triangle_indices_uint32_exp_t indices[] = { - { 0, 1, 2 }, - { 0, 2, 3 }, - { 4, 5, 6 }, - { 4, 6, 7 }, - { 8, 9, 10 }, - { 8, 10, 11 }, - { 12, 13, 14 }, - { 12, 14, 15 }, - { 16, 17, 18 }, - { 16, 18, 19 }, - { 20, 21, 22 }, - { 20, 22, 23 }, - { 24, 25, 26 }, - { 24, 26, 27 }, - { 28, 29, 30 }, - { 28, 30, 31 }, - { 32, 33, 34 }, - { 32, 34, 35 }, - { 36, 37, 38 }, - { 36, 38, 39 }, - { 40, 41, 42 }, - { 40, 42, 43 }, - { 44, 45, 46 }, - { 44, 46, 47 }, - { 48, 49, 50 }, - { 48, 50, 51 }, - { 52, 53, 54 }, - { 52, 54, 55 }, - { 56, 57, 58 }, - { 56, 58, 59 }, - { 60, 61, 62 }, - { 60, 62, 63 }, - { 64, 65, 66 }, - { 64, 66, 67 } -}; - -/* vertex positions for cornell_box model */ -ze_rtas_float3_exp_t vertices[] = { - { 552.8, 0, 0 }, - { 0, 0, 0 }, - { 0, 0, 559.2 }, - { 549.6, 0, 559.2 }, - { 290, 0, 114 }, - { 240, 0, 272 }, - { 82, 0, 225 }, - { 130, 0, 65 }, - { 472, 0, 406 }, - { 314, 0, 456 }, - { 265, 0, 296 }, - { 423, 0, 247 }, - { 556, 548.8, 0 }, - { 556, 548.8, 559.2 }, - { 0, 548.8, 559.2 }, - { 0, 548.8, 0 }, - { 549.6, 0, 559.2 }, - { 0, 0, 559.2 }, - { 0, 548.8, 559.2 }, - { 556, 548.8, 559.2 }, - { 0, 0, 559.2 }, - { 0, 0, 0 }, - { 0, 548.8, 0 }, - { 0, 548.8, 559.2 }, - { 552.8, 0, 0 }, - { 549.6, 0, 559.2 }, - { 556, 548.8, 559.2 }, - { 556, 548.8, 0 }, - { 130, 165, 65 }, - { 82, 165, 225 }, - { 240, 165, 272 }, - { 290, 165, 114 }, - { 290, 0, 114 }, - { 290, 165, 114 }, - { 240, 165, 272 }, - { 240, 0, 272 }, - { 130, 0, 65 }, - { 130, 165, 65 }, - { 290, 165, 114 }, - { 290, 0, 114 }, - { 82, 0, 225 }, - { 82, 165, 225 }, - { 130, 165, 65 }, - { 130, 0, 65 }, - { 240, 0, 272 }, - { 240, 165, 272 }, - { 82, 165, 225 }, - { 82, 0, 225 }, - { 423, 330, 247 }, - { 265, 330, 296 }, - { 314, 330, 456 }, - { 472, 330, 406 }, - { 423, 0, 247 }, - { 423, 330, 247 }, - { 472, 330, 406 }, - { 472, 0, 406 }, - { 472, 0, 406 }, - { 472, 330, 406 }, - { 314, 330, 456 }, - { 314, 0, 456 }, - { 314, 0, 456 }, - { 314, 330, 456 }, - { 265, 330, 296 }, - { 265, 0, 296 }, - { 265, 0, 296 }, - { 265, 330, 296 }, - { 423, 330, 247 }, - { 423, 0, 247 }, -}; - -/* builds acceleration structure */ -void* build_rtas(sycl::device device, sycl::context context) -{ - /* get L0 handles */ - ze_driver_handle_t hDriver = sycl::get_native(device.get_platform()); - ze_device_handle_t hDevice = sycl::get_native(device); - - /* create rtas builder object */ - ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC }; - ze_rtas_builder_exp_handle_t hBuilder = nullptr; - ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("ze_rtas_builder creation failed"); - - /* create geometry descriptor for single triangle mesh */ - ze_rtas_builder_triangles_geometry_info_exp_t mesh = {}; - mesh.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES; - mesh.geometryFlags = 0; - mesh.geometryMask = 0xFF; - - mesh.triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32; - mesh.triangleCount = sizeof(indices)/sizeof(ze_rtas_triangle_indices_uint32_exp_t); - mesh.triangleStride = sizeof(ze_rtas_triangle_indices_uint32_exp_t); - mesh.pTriangleBuffer = indices; - - mesh.vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3; - mesh.vertexCount = sizeof(vertices)/sizeof(ze_rtas_float3_exp_t); - mesh.vertexStride = sizeof(ze_rtas_float3_exp_t); - mesh.pVertexBuffer = vertices; - - /* fill geometry descriptor array with pointer to single geometry descriptor */ - std::vector descs; - descs.push_back((ze_rtas_builder_geometry_info_exp_t*)&mesh); - - /* get acceleration structure format for this device */ - ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES }; - ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp }; - err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp ); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDeviceGetProperties failed"); - - /* create parallel operation for parallel build */ - ze_rtas_parallel_operation_exp_handle_t hParallelOperation = nullptr; - err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASParallelOperationCreateExp failed"); - - /* create descriptor of build operation */ - size_t accelBufferBytesOut = 0; - ze_rtas_aabb_exp_t bounds; - ze_rtas_builder_build_op_exp_desc_t buildOp = {}; - buildOp.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC; - buildOp.pNext = nullptr; - buildOp.rtasFormat = rtasProp.rtasFormat; - buildOp.buildQuality = ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM; - buildOp.buildFlags = 0; - buildOp.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t **) descs.data(); - buildOp.numGeometries = descs.size(); - - /* just for debugging purposes */ -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC }; - buildOpDebug.dispatchGlobalsPtr = dispatchGlobalsPtr; - buildOp.pNext = &buildOpDebug; -#endif - - /* query required buffer sizes */ - ze_rtas_builder_exp_properties_t buildProps = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES }; - err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&buildOp,&buildProps); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASBuilderGetBuildPropertiesExp failed"); - - /* allocate scratch buffer */ - std::vector scratchBuffer(buildProps.scratchBufferSizeBytes); - memset(scratchBuffer.data(),0,scratchBuffer.size()); - - /* allocate acceleration structure buffer */ - size_t accelBytes = buildProps.rtasBufferSizeBytesMaxRequired; - void* accel = alloc_accel_buffer(accelBytes,device,context); - memset(accel,0,accelBytes); // optional - - /* build acceleration strucuture multi threaded */ - err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&buildOp, - scratchBuffer.data(),scratchBuffer.size(), - accel, accelBytes, - hParallelOperation, - nullptr, &bounds, &accelBufferBytesOut); - - if (err != ZE_RESULT_EXP_RTAS_BUILD_DEFERRED) - throw std::runtime_error("zeRTASBuilderBuildExp failed"); - - /* after the build is started one can query number of threads to use for the build */ - ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES }; - err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(hParallelOperation,&prop); - - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASParallelOperationGetPropertiesExp failed"); - - /* build in parallel using maximal number of build threads */ - tbb::parallel_for(0u, prop.maxConcurrency, 1u, [&](uint32_t) { - err = ZeWrapper::zeRTASParallelOperationJoinExp(hParallelOperation); - }); - - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASParallelOperationJoinExp failed"); - - /* destroy parallel operation again */ - err = ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASParallelOperationDestroyExp failed"); - - /* destroy rtas builder again */ - err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeRTASBuilderDestroyExp failed"); - - return accel; -} - -/* render using simple UV shading */ -void render(unsigned int x, unsigned int y, void* bvh, unsigned int* pixels, unsigned int width, unsigned int height) -{ - /* write zero image if ray tracing extension is not supported */ - intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag(); - if (!(flags & intel_raytracing_ext_flag_ray_query)) { - pixels[y*width+x] = 0; - return; - } - - /* fixed camera */ - sycl::float3 vx(-1.f, -0.f, -0.f); - sycl::float3 vy(-0.f, -1.f, -0.f); - sycl::float3 vz(32.f, 32.f, 95.6379f); - sycl::float3 p(278.f, 273.f, -800.f); - - /* compute primary ray */ - intel_ray_desc_t ray; - ray.origin = p; - ray.direction = float(x)*vx*64.0f/float(width) + float(y)*vy*64/float(height) + vz; - ray.tmin = 0.0f; - ray.tmax = INFINITY; - ray.mask = 0xFF; - ray.flags = intel_ray_flags_none; - - /* trace ray */ - intel_ray_query_t query = intel_ray_query_init(ray,(intel_raytracing_acceleration_structure_t)bvh); - intel_ray_query_start_traversal(query); - intel_ray_query_sync(query); - - /* get UVs of hit point */ - float u = 0, v = 0; - if (intel_has_committed_hit(query)) - { - sycl::float2 uv = intel_get_hit_barycentrics( query, intel_hit_type_committed_hit ); - u = uv.x(); - v = uv.y(); - } - - /* write color to framebuffer */ - sycl::float3 color(u,v,1.0f-u-v); - unsigned int r = (unsigned int) (255.0f * color.x()); - unsigned int g = (unsigned int) (255.0f * color.y()); - unsigned int b = (unsigned int) (255.0f * color.z()); - pixels[y*width+x] = (b << 16) + (g << 8) + r; -} - -int main(int argc, char* argv[]) try -{ - /* use can specify reference image to compare against */ -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) - ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO; -#else - ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL; -#endif - - char* reference_img = NULL; - for (int i=1; i= argc) throw std::runtime_error("--compare: filename expected"); - reference_img = argv[i]; - } - else if (strcmp(argv[i], "--internal-rtas-builder") == 0) { - rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL; - } - else if (strcmp(argv[i], "--level-zero-rtas-builder") == 0) { - rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO; - } - else if (strcmp(argv[i], "--default-rtas-builder") == 0) { - rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::AUTO; - } - else if (strcmp(argv[i], "--size") == 0) { - if (++i >= argc) throw std::runtime_error("--size: width expected"); - global_width = atoi(argv[i]); - if (++i >= argc) throw std::runtime_error("--size: height expected"); - global_height = atoi(argv[i]); - if (global_width == 0) throw std::runtime_error("--size: width is zero"); - if (global_height == 0) throw std::runtime_error("--size: height is zero"); - if (global_width > 4096) throw std::runtime_error("--size: width too large"); - if (global_height > 4096) throw std::runtime_error("--size: height too large"); - } - else { - throw std::runtime_error("unknown command line argument"); - } - } - - /* create SYCL objects */ - sycl::device device = sycl::device(sycl::gpu_selector_v); - sycl::queue queue = sycl::queue(device,exception_handler); - sycl::context context = queue.get_context(); - - if (ZeWrapper::init() != ZE_RESULT_SUCCESS) { - std::cerr << "ZeWrapper not successfully initialized" << std::endl; - return 1; - } - - ze_result_t result = ZE_RESULT_SUCCESS; - sycl::platform platform = device.get_platform(); - ze_driver_handle_t hDriver = sycl::get_native(platform); - - /* enable RTAS extension only when enabled */ - if (rtas_build_mode == ZeWrapper::RTAS_BUILD_MODE::AUTO) - { - uint32_t count = 0; - std::vector extensions; - result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDriverGetExtensionProperties failed"); - - extensions.resize(count); - result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDriverGetExtensionProperties failed"); - - bool ze_rtas_builder = false; - for (uint32_t i=0; i(0,height,0,width), - [&](const tbb::blocked_range2d& r) { - for (int y=r.rows().begin(); y range(width,height); - cgh.parallel_for(range, [=](sycl::item<2> item) { - const uint32_t x = item.get_id(0); - const uint32_t y = item.get_id(1); - render(x,y,bvh,pixels,width,height); - }); - }); - queue.wait_and_throw(); -#endif - - /* free acceleration structure again */ - free_accel_buffer(bvh,context); - -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - free_accel_buffer(dispatchGlobalsPtr, context); -#endif - -#if defined(ZE_RAYTRACING_RT_SIMULATION) - RTCore::Cleanup(); -#endif - - /* store image to disk */ - storeTga(pixels,width,height,"cornell_box.tga"); - if (!reference_img) return 0; - - /* compare to reference image */ - const size_t err = compareTga("cornell_box.tga", "cornell_box_reference.tga"); - std::cout << "difference to reference image is " << err << std::endl; - const bool ok = err < 32; - std::cout << "cornell_box "; - if (ok) std::cout << "[PASSED]" << std::endl; - else std::cout << "[FAILED]" << std::endl; - - return ok ? 0 : 1; -} -catch (std::runtime_error e) { - std::cerr << "std::runtime_error: " << e.what() << std::endl; - return 1; -} diff --git a/kernels/rthwif/testing/rthwif_test.cpp b/kernels/rthwif/testing/rthwif_test.cpp deleted file mode 100644 index 4e739e2133..0000000000 --- a/kernels/rthwif/testing/rthwif_test.cpp +++ /dev/null @@ -1,2267 +0,0 @@ -// Copyright 2009-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#define NOMINMAX - -// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wignored-attributes" - -#include -#include "tbb/tbb.h" - -#if defined(ZE_RAYTRACING) -#include "../rtbuild/sys/sysinfo.h" -#include "../rtbuild/sys/vector.h" -#include "../rtbuild/math/vec2.h" -#include "../rtbuild/math/vec3.h" -#include "../rtbuild/math/bbox.h" -#include "../rtbuild/math/affinespace.h" -#else -#include "../../../common/sys/sysinfo.h" -#include "../../../common/sys/vector.h" -#include "../../../common/math/vec2.h" -#include "../../../common/math/vec3.h" -#include "../../../common/math/bbox.h" -#include "../../../common/math/lbbox.h" -#include "../../../common/math/affinespace.h" -#endif - -#define _USE_MATH_DEFINES -#include - -#include "../rttrace/rttrace.h" - -#include - -#include -#include -#include -#include - -namespace embree { - double getSeconds(); -} - -sycl::device device; -sycl::context context; -void* dispatchGlobalsPtr = nullptr; - -struct RandomSampler { - unsigned int s; -}; - -unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k) -{ - const unsigned int c1 = 0xcc9e2d51; - const unsigned int c2 = 0x1b873593; - const unsigned int r1 = 15; - const unsigned int r2 = 13; - const unsigned int m = 5; - const unsigned int n = 0xe6546b64; - - k *= c1; - k = (k << r1) | (k >> (32 - r1)); - k *= c2; - - hash ^= k; - hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; - - return hash; -} - -unsigned int MurmurHash3_finalize(unsigned int hash) -{ - hash ^= hash >> 16; - hash *= 0x85ebca6b; - hash ^= hash >> 13; - hash *= 0xc2b2ae35; - hash ^= hash >> 16; - return hash; -} - -unsigned int LCG_next(unsigned int value) -{ - const unsigned int m = 1664525; - const unsigned int n = 1013904223; - return value * m + n; -} - -void RandomSampler_init(RandomSampler& self, int id) -{ - unsigned int hash = 0; - hash = MurmurHash3_mix(hash, id); - hash = MurmurHash3_finalize(hash); - self.s = hash; -} - -int RandomSampler_getInt(RandomSampler& self) { - self.s = LCG_next(self.s); return self.s >> 1; -} - -unsigned int RandomSampler_getUInt(RandomSampler& self) { - self.s = LCG_next(self.s); return self.s; -} - -float RandomSampler_getFloat(RandomSampler& self) { - return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f; -} - -sycl::float3 RandomSampler_getFloat3(RandomSampler& self) -{ - const float x = RandomSampler_getFloat(self); - const float y = RandomSampler_getFloat(self); - const float z = RandomSampler_getFloat(self); - return sycl::float3(x,y,z); -} - -RandomSampler rng; - -ze_rtas_builder_exp_handle_t hBuilder = nullptr; -ze_rtas_parallel_operation_exp_handle_t parallelOperation = nullptr; - -enum class InstancingType -{ - NONE, - SW_INSTANCING, - HW_INSTANCING -}; - -enum class TestType -{ - TRIANGLES_COMMITTED_HIT, // triangles - TRIANGLES_POTENTIAL_HIT, // triangles + filter + check potential hit - TRIANGLES_ANYHIT_SHADER_COMMIT, // triangles + filter + commit - TRIANGLES_ANYHIT_SHADER_REJECT, // triangles + filter + reject - PROCEDURALS_COMMITTED_HIT, // procedural triangles - BUILD_TEST_TRIANGLES, // test BVH builder with triangles - BUILD_TEST_PROCEDURALS, // test BVH builder with procedurals - BUILD_TEST_INSTANCES, // test BVH builder with instances - BUILD_TEST_MIXED, // test BVH builder with mixed scene (triangles, procedurals, and instances) - BENCHMARK_TRIANGLES, // benchmark BVH builder with triangles - BENCHMARK_PROCEDURALS, // benchmark BVH builder with procedurals -}; - -enum class BuildMode -{ - BUILD_EXPECTED_SIZE, - BUILD_WORST_CASE_SIZE -}; - -struct TestInput -{ - sycl::float3 org; - sycl::float3 dir; - float tnear; - float tfar; - uint32_t mask; - uint32_t flags; -}; - -enum TestHitType { - TEST_COMMITTED_HIT, - TEST_POTENTIAL_HIT, - TEST_MISS -}; - -struct TestOutput -{ - // Ray data at level 0 - sycl::float3 ray0_org; - sycl::float3 ray0_dir; - float ray0_tnear; - uint32_t ray0_mask; - uint32_t ray0_flags; - - // Ray data at hit bvh_level - sycl::float3 rayN_org; - sycl::float3 rayN_dir; - float rayN_tnear; - uint32_t rayN_mask; - uint32_t rayN_flags; - - // Hit data - TestHitType hit_type; - uint32_t bvh_level; - uint32_t hit_candidate; - float t; - float u; - float v; - bool front_face; - uint32_t geomID; - uint32_t primID; - uint32_t instID; - uint32_t instUserID; - sycl::float3 v0; - sycl::float3 v1; - sycl::float3 v2; - - intel_float4x3 world_to_object; - intel_float4x3 object_to_world; -}; - -std::ostream& operator<<(std::ostream& out, const intel_float3& v) { - return out << "(" << v.x << "," << v.y << "," << v.z << ")"; -} - -void compareTestOutput(uint32_t tid, uint32_t& errors, const TestOutput& test, const TestOutput& expected) -{ -#define COMPARE(member) \ - if (test.member != expected.member) { \ - if (errors < 16) \ - std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \ - errors++; \ - } -#define COMPARE1(member,eps) \ - if (fabs(test.member-expected.member) > eps) { \ - if (errors < 16) \ - std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \ - errors++; \ - } -#define COMPARE3(member,eps) { \ - const bool x = fabs(test.member.x()-expected.member.x()) > eps; \ - const bool y = fabs(test.member.y()-expected.member.y()) > eps; \ - const bool z = fabs(test.member.z()-expected.member.z()) > eps; \ - if (x || y || z) { \ - if (errors < 16) \ - std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \ - errors++; \ - } \ - } -#define COMPARE3I(member,eps) { \ - const bool x = test.member.x != expected.member.x; \ - const bool y = test.member.y != expected.member.y; \ - const bool z = test.member.z != expected.member.z; \ - if (x || y || z) { \ - if (errors < 16) \ - std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \ - errors++; \ - } \ - } - - float eps = 2E-4; - - COMPARE3(ray0_org,0); - COMPARE3(ray0_dir,0); - COMPARE1(ray0_tnear,0); - COMPARE(ray0_mask); - COMPARE(ray0_flags); - COMPARE3(rayN_org,eps); - COMPARE3(rayN_dir,eps); - COMPARE1(rayN_tnear,eps); - COMPARE(rayN_mask); - COMPARE(rayN_flags); - COMPARE(hit_type); - COMPARE(bvh_level); - COMPARE(hit_candidate); - COMPARE1(t,eps); - COMPARE1(u,eps); - COMPARE1(v,eps); - COMPARE(front_face); - COMPARE(geomID); - COMPARE(primID); - COMPARE(instID); - COMPARE(instUserID); - COMPARE3(v0,eps); - COMPARE3(v1,eps); - COMPARE3(v2,eps); - COMPARE3I(world_to_object.vx,eps); - COMPARE3I(world_to_object.vy,eps); - COMPARE3I(world_to_object.vz,eps); - COMPARE3I(world_to_object.p ,eps); - COMPARE3I(object_to_world.vx,eps); - COMPARE3I(object_to_world.vy,eps); - COMPARE3I(object_to_world.vz,eps); - COMPARE3I(object_to_world.p ,eps); -} - -struct LinearSpace3f -{ - /*! matrix construction from column vectors */ - LinearSpace3f(const sycl::float3& vx, const sycl::float3& vy, const sycl::float3& vz) - : vx(vx), vy(vy), vz(vz) {} - - /*! matrix construction from row mayor data */ - LinearSpace3f(const float m00, const float m01, const float m02, - const float m10, const float m11, const float m12, - const float m20, const float m21, const float m22) - : vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {} - - /*! compute the determinant of the matrix */ - const float det() const { return sycl::dot(vx,sycl::cross(vy,vz)); } - - /*! compute adjoint matrix */ - const LinearSpace3f adjoint() const { return LinearSpace3f(sycl::cross(vy,vz),sycl::cross(vz,vx),sycl::cross(vx,vy)).transposed(); } - - /*! compute inverse matrix */ - const LinearSpace3f inverse() const - { - const float d = det(); - const LinearSpace3f a = adjoint(); - return { a.vx/d, a.vy/d, a.vz/d }; - } - - /*! compute transposed matrix */ - const LinearSpace3f transposed() const { return LinearSpace3f(vx.x(),vx.y(),vx.z(),vy.x(),vy.y(),vy.z(),vz.x(),vz.y(),vz.z()); } - - /*! return matrix for rotation around arbitrary axis */ - static LinearSpace3f rotate(const sycl::float3 _u, const float r) { - sycl::float3 u = normalize(_u); - float s = sinf(r), c = cosf(r); - return LinearSpace3f(u.x()*u.x()+(1-u.x()*u.x())*c, u.x()*u.y()*(1-c)-u.z()*s, u.x()*u.z()*(1-c)+u.y()*s, - u.x()*u.y()*(1-c)+u.z()*s, u.y()*u.y()+(1-u.y()*u.y())*c, u.y()*u.z()*(1-c)-u.x()*s, - u.x()*u.z()*(1-c)-u.y()*s, u.y()*u.z()*(1-c)+u.x()*s, u.z()*u.z()+(1-u.z()*u.z())*c); - } - -public: - sycl::float3 vx,vy,vz; -}; - -sycl::float3 xfmPoint (const LinearSpace3f& m, const sycl::float3& p) { - return p.x()*m.vx + (p.y()*m.vy + p.z()*m.vz); -} - -struct Transform -{ - Transform () - : vx(1,0,0), vy(0,1,0), vz(0,0,1), p(0,0,0) {} - - Transform ( sycl::float3 vx, sycl::float3 vy, sycl::float3 vz, sycl::float3 p ) - : vx(vx), vy(vy), vz(vz), p(p) {} - - Transform ( intel_float4x3 xfm ) - : vx(xfm.vx), vy(xfm.vy), vz(xfm.vz), p(xfm.p) {} - - operator intel_float4x3 () const { - return { vx, vy, vz, p }; - } - - sycl::float3 vx,vy,vz,p; -}; - -std::ostream& operator<<(std::ostream& out, const Transform& t) { - return out << " Transform {" << t.vx << ", " << t.vy << ", " << t.vz << ", " << t.p << "}"; -} - -sycl::float3 xfmPoint (const Transform& m, const sycl::float3& p) { - return p.x()*m.vx + (p.y()*m.vy + (p.z()*m.vz + m.p)); -} - -sycl::float3 xfmVector (const Transform& m, const sycl::float3& v) { - return v.x()*m.vx + (v.y()*m.vy + v.z()*m.vz); -} - -Transform operator* (const Transform& a, const Transform& b) { - return Transform(xfmVector(a,b.vx),xfmVector(a,b.vy),xfmVector(a,b.vz),xfmPoint(a,b.p)); -} - -Transform rcp( const Transform& a ) -{ -#if 1 // match builder math for rcp to have bit accurate data to compare against - embree::Vec3f vx(a.vx.x(), a.vx.y(), a.vx.z()); - embree::Vec3f vy(a.vy.x(), a.vy.y(), a.vy.z()); - embree::Vec3f vz(a.vz.x(), a.vz.y(), a.vz.z()); - embree::Vec3f p(a. p.x(), a. p.y(), a. p.z()); - embree::AffineSpace3f l(embree::LinearSpace3f(vx,vy,vz),p); - embree::AffineSpace3f il = rcp(l); - sycl::float3 ivx(il.l.vx.x, il.l.vx.y, il.l.vx.z); - sycl::float3 ivy(il.l.vy.x, il.l.vy.y, il.l.vy.z); - sycl::float3 ivz(il.l.vz.x, il.l.vz.y, il.l.vz.z); - sycl::float3 ip(il.p.x, il.p.y, il.p.z); - return Transform(ivx,ivy,ivz,ip); -#else - const LinearSpace3f l = { a.vx, a.vy, a.vz }; - const LinearSpace3f il = l.inverse(); - return Transform(il.vx, il.vy, il.vz, -xfmPoint(il,a.p)); -#endif -} - -Transform RandomSampler_getTransform(RandomSampler& self) -{ - const sycl::float3 u = RandomSampler_getFloat3(self) + sycl::float3(0.01f); - const float r = 2.0f*M_PI*RandomSampler_getFloat(self); - const sycl::float3 p = 10.0f*RandomSampler_getFloat3(self); - const LinearSpace3f xfm = LinearSpace3f::rotate(u,r); - return Transform(xfm.vx,xfm.vy,xfm.vz,p); -} - -struct Bounds3f -{ - void extend( sycl::float3 p ) { - lower = sycl::min(lower,p); - upper = sycl::max(upper,p); - } - - static Bounds3f empty() { - return { sycl::float3(INFINITY), sycl::float3(-INFINITY) }; - } - - operator ze_rtas_aabb_exp_t () const { - return { { lower.x(), lower.y(), lower.z() }, { upper.x(), upper.y(), upper.z() } }; - } - - sycl::float3 lower; - sycl::float3 upper; -}; - -std::ostream& operator<<(std::ostream& out, const Bounds3f& b) { - return out << "Bounds3f {" << b.lower << "," << b.upper << "}"; -} - -const Bounds3f xfmBounds(const Transform& m, const Bounds3f& b) -{ - Bounds3f dst = Bounds3f::empty(); - const sycl::float3 p0(b.lower.x(),b.lower.y(),b.lower.z()); dst.extend(xfmPoint(m,p0)); - const sycl::float3 p1(b.lower.x(),b.lower.y(),b.upper.z()); dst.extend(xfmPoint(m,p1)); - const sycl::float3 p2(b.lower.x(),b.upper.y(),b.lower.z()); dst.extend(xfmPoint(m,p2)); - const sycl::float3 p3(b.lower.x(),b.upper.y(),b.upper.z()); dst.extend(xfmPoint(m,p3)); - const sycl::float3 p4(b.upper.x(),b.lower.y(),b.lower.z()); dst.extend(xfmPoint(m,p4)); - const sycl::float3 p5(b.upper.x(),b.lower.y(),b.upper.z()); dst.extend(xfmPoint(m,p5)); - const sycl::float3 p6(b.upper.x(),b.upper.y(),b.lower.z()); dst.extend(xfmPoint(m,p6)); - const sycl::float3 p7(b.upper.x(),b.upper.y(),b.upper.z()); dst.extend(xfmPoint(m,p7)); - return dst; -} - -struct Triangle -{ - Triangle() - : v0(0.f,0.f,0.f), v1(0.f,0.f,0.f), v2(0.f,0.f,0.f), index(0) {} - - Triangle (sycl::float3 v0, sycl::float3 v1, sycl::float3 v2, uint32_t index) - : v0(v0), v1(v1), v2(v2), index(index) {} - - sycl::float3 sample(float u, float v) const { - return (1.0f-u-v)*v0 + u*v1 + v*v2; - } - - sycl::float3 center() const { - return (v0+v1+v2)/3.0f; - } - - Bounds3f bounds() const - { - const sycl::float3 lower = sycl::min(v0,sycl::min(v1,v2)); - const sycl::float3 upper = sycl::max(v0,sycl::max(v1,v2)); - return { lower, upper }; - } - - const Triangle transform( Transform xfm ) const { - return Triangle(xfmPoint(xfm,v0), xfmPoint(xfm,v1), xfmPoint(xfm,v2), index); - } - - sycl::float3 v0; - sycl::float3 v1; - sycl::float3 v2; - uint32_t index; -}; - -struct less_float3 { - bool operator() ( const sycl::float3& a, const sycl::float3& b ) const { - if (a.x() != b.x()) return a.x() < b.x(); - if (a.y() != b.y()) return a.y() < b.y(); - if (a.z() != b.z()) return a.z() < b.z(); - return false; - } -}; - -std::ostream& operator<<(std::ostream& out, const Triangle& tri) { - return out << "Triangle {" << tri.v0 << "," << tri.v1 << "," << tri.v2 << "}"; -} - -struct Hit -{ - Transform local_to_world; - Triangle triangle; - bool procedural_triangle = false; - bool procedural_instance = false; - uint32_t instUserID = -1; - uint32_t instID = -1; - uint32_t geomID = -1; - uint32_t primID = -1; -}; - - -struct GEOMETRY_INSTANCE_DESC : ze_rtas_builder_instance_geometry_info_exp_t -{ - ze_rtas_transform_float3x4_aligned_column_major_exp_t xfmdata; -}; - -typedef union GEOMETRY_DESC -{ - ze_rtas_builder_geometry_type_exp_t geometryType; - ze_rtas_builder_triangles_geometry_info_exp_t Triangles; - ze_rtas_builder_quads_geometry_info_exp_t Quads; - ze_rtas_builder_procedural_geometry_info_exp_t AABBs; - GEOMETRY_INSTANCE_DESC Instance; - -} GEOMETRY_DESC; - -struct Geometry -{ - enum Type { - TRIANGLE_MESH, - INSTANCE - }; - - Geometry (Type type) - : type(type) {} - - virtual void getDesc(GEOMETRY_DESC* desc) = 0; - - virtual void transform( const Transform xfm) { - throw std::runtime_error("Geometry::transform not implemented"); - } - - virtual void buildAccel(sycl::device& device, sycl::context& context, BuildMode buildMode, ze_rtas_builder_build_quality_hint_exp_t quality) { - }; - - virtual void buildTriMap(Transform local_to_world, std::vector id_stack, uint32_t instUserID, bool procedural_instance, std::vector& tri_map) = 0; - - virtual size_t getNumPrimitives() const = 0; - - Type type; -}; - -struct TriangleMesh : public Geometry -{ -public: - - TriangleMesh (ze_rtas_builder_geometry_exp_flags_t gflags = 0, bool procedural = false) - : Geometry(Type::TRIANGLE_MESH), - gflags(gflags), procedural(procedural), - triangles_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), triangles(0,triangles_alloc), - vertices_alloc (context,device,sycl::ext::oneapi::property::usm::device_read_only()), vertices(0,vertices_alloc) {} - - virtual ~TriangleMesh() {} - - void* operator new(size_t size) { - return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only()); - } - void operator delete(void* ptr) { - sycl::free(ptr,context); - } - - size_t size() const { - return triangles.size(); - } - - virtual void transform( const Transform xfm) override - { - for (size_t i=0; istype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS); - const TriangleMesh* mesh = (TriangleMesh*) params->pGeomUserPtr; - - for (uint32_t i=0; iprimIDCount; i++) - { - const uint32_t primID = params->primID+i; - const Bounds3f bounds = mesh->getBounds(primID); - ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut; - boundsOut[i].lower.x = bounds.lower.x(); - boundsOut[i].lower.y = bounds.lower.y(); - boundsOut[i].lower.z = bounds.lower.z(); - boundsOut[i].upper.x = bounds.upper.x(); - boundsOut[i].upper.y = bounds.upper.y(); - boundsOut[i].upper.z = bounds.upper.z(); - } - } - - virtual void getDesc(GEOMETRY_DESC* desc) override - { - if (procedural) - { - ze_rtas_builder_procedural_geometry_info_exp_t& out = desc->AABBs; - memset(&out,0,sizeof(out)); - out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; - out.geometryFlags = gflags; - out.geometryMask = 0xFF; - out.primCount = triangles.size(); - out.pfnGetBoundsCb = TriangleMesh::getBoundsCallback; - out.pGeomUserPtr = this; - } - else - { - ze_rtas_builder_triangles_geometry_info_exp_t& out = desc->Triangles; - memset(&out,0,sizeof(out)); - out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES; - out.geometryFlags = gflags; - out.geometryMask = 0xFF; - out.triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32; - out.vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3; - out.pTriangleBuffer = (ze_rtas_triangle_indices_uint32_exp_t*) triangles.data(); - out.triangleCount = triangles.size(); - out.triangleStride = sizeof(sycl::int4); - out.pVertexBuffer = (ze_rtas_float3_exp_t*) vertices.data(); - out.vertexCount = vertices.size(); - out.vertexStride = sizeof(sycl::float3); - } - } - - Triangle getTriangle( const uint32_t primID ) const - { - const sycl::float3 v0 = vertices[triangles[primID].x()]; - const sycl::float3 v1 = vertices[triangles[primID].y()]; - const sycl::float3 v2 = vertices[triangles[primID].z()]; - const uint32_t index = triangles[primID].w(); - return Triangle(v0,v1,v2,index); - } - - Bounds3f getBounds( const uint32_t primID ) const { - return getTriangle(primID).bounds(); - } - - uint32_t addVertex( const sycl::float3& v ) - { - auto e = vertex_map.find(v); - if (e != vertex_map.end()) return e->second; - vertices.push_back(v); - vertex_map[v] = vertices.size()-1; - return vertices.size()-1; - } - - void addTriangle( const Triangle& tri ) - { - const uint32_t v0 = addVertex(tri.v0); - const uint32_t v1 = addVertex(tri.v1); - const uint32_t v2 = addVertex(tri.v2); - triangles.push_back(sycl::int4(v0,v1,v2,tri.index)); - } - - void split(const sycl::float3 P, const sycl::float3 N, std::shared_ptr& mesh0, std::shared_ptr& mesh1) - { - mesh0 = std::shared_ptr(new TriangleMesh(gflags,procedural)); - mesh1 = std::shared_ptr(new TriangleMesh(gflags,procedural)); - - for (uint32_t primID=0; primID<(uint32_t) size(); primID++) - { - const Triangle tri = getTriangle(primID); - if (sycl::dot(tri.center()-P,N) < 0.0f) mesh0->addTriangle(tri); - else mesh1->addTriangle(tri); - } - } - - void split(std::shared_ptr& mesh0, std::shared_ptr& mesh1) - { - uint32_t N = (uint32_t) size(); - mesh0 = std::shared_ptr(new TriangleMesh(gflags,procedural)); - mesh1 = std::shared_ptr(new TriangleMesh(gflags,procedural)); - mesh0->triangles.reserve(triangles.size()/2+1); - mesh1->triangles.reserve(triangles.size()/2+1); - mesh0->vertices.reserve(vertices.size()/2+8); - mesh1->vertices.reserve(vertices.size()/2+8); - - for (uint32_t primID=0; primIDaddTriangle(tri); - else mesh1->addTriangle(tri); - } - } - - /* selects random sub-set of triangles */ - void selectRandom(const uint32_t numTriangles) - { - assert(numTriangles <= size()); - - /* first randomize triangles */ - for (size_t i=0; i id_stack, uint32_t instUserID, bool procedural_instance, std::vector& tri_map) override - { - uint32_t instID = -1; - uint32_t geomID = -1; - - if (id_stack.size()) { - geomID = id_stack.back(); - id_stack.pop_back(); - } - - if (id_stack.size()) { - instID = id_stack.back(); - id_stack.pop_back(); - } - - assert(id_stack.size() == 0); - - for (uint32_t primID=0; primID triangles_alloc_ty; - triangles_alloc_ty triangles_alloc; - std::vector triangles; - - typedef sycl::usm_allocator vertices_alloc_ty; - vertices_alloc_ty vertices_alloc; - std::vector vertices; - - std::map vertex_map; -}; - -template -struct InstanceGeometryT : public Geometry -{ - InstanceGeometryT(const Transform& local2world, std::shared_ptr scene, bool procedural, uint32_t instUserID) - : Geometry(Type::INSTANCE), procedural(procedural), instUserID(instUserID), local2world(local2world), scene(scene) {} - - virtual ~InstanceGeometryT() {} - - void* operator new(size_t size) { - return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only()); - } - void operator delete(void* ptr) { - sycl::free(ptr,context); - } - - static void getBoundsCallback (ze_rtas_geometry_aabbs_exp_cb_params_t* params) - { - assert(params->stype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS); - assert(params->primID == 0); - assert(params->primIDCount == 1); - const InstanceGeometryT* inst = (InstanceGeometryT*) params->pGeomUserPtr; - const Bounds3f scene_bounds = inst->scene->getBounds(); - const Bounds3f bounds = xfmBounds(inst->local2world, scene_bounds); - ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut; - boundsOut->lower.x = bounds.lower.x(); - boundsOut->lower.y = bounds.lower.y(); - boundsOut->lower.z = bounds.lower.z(); - boundsOut->upper.x = bounds.upper.x(); - boundsOut->upper.y = bounds.upper.y(); - boundsOut->upper.z = bounds.upper.z(); - } - - virtual void getDesc(GEOMETRY_DESC* desc) override - { - if (procedural) - { - ze_rtas_builder_procedural_geometry_info_exp_t& out = desc->AABBs; - memset(&out,0,sizeof(out)); - out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; - out.geometryFlags = 0; - out.geometryMask = 0xFF; - out.primCount = 1; - out.pfnGetBoundsCb = InstanceGeometryT::getBoundsCallback; - out.pGeomUserPtr = this; - } - else - { - GEOMETRY_INSTANCE_DESC& out = desc->Instance; - memset(&out,0,sizeof(GEOMETRY_INSTANCE_DESC)); - out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE; - out.instanceFlags = 0; - out.geometryMask = 0xFF; - out.instanceUserID = instUserID; - out.transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR; - out.pTransform = (float*)&out.xfmdata; - out.xfmdata.vx_x = local2world.vx.x(); - out.xfmdata.vx_y = local2world.vx.y(); - out.xfmdata.vx_z = local2world.vx.z(); - out.xfmdata.pad0 = 0.0f; - out.xfmdata.vy_x = local2world.vy.x(); - out.xfmdata.vy_y = local2world.vy.y(); - out.xfmdata.vy_z = local2world.vy.z(); - out.xfmdata.pad1 = 0.0f; - out.xfmdata.vz_x = local2world.vz.x(); - out.xfmdata.vz_y = local2world.vz.y(); - out.xfmdata.vz_z = local2world.vz.z(); - out.xfmdata.pad2 = 0.0f; - out.xfmdata.p_x = local2world.p.x(); - out.xfmdata.p_y = local2world.p.y(); - out.xfmdata.p_z = local2world.p.z(); - out.xfmdata.pad3 = 0.0f; - out.pBounds = &scene->bounds; - out.pAccelerationStructure = scene->getAccel(); - } - } - - virtual void buildAccel(sycl::device& device, sycl::context& context, BuildMode buildMode, ze_rtas_builder_build_quality_hint_exp_t quality) override { - scene->buildAccel(device,context,buildMode); - } - - virtual void buildTriMap(Transform local_to_world_in, std::vector id_stack, uint32_t instUserID, bool procedural_instance, std::vector& tri_map) override { - instUserID = this->instUserID; - scene->buildTriMap(local_to_world_in * local2world, id_stack, instUserID, procedural, tri_map); - } - - size_t getNumPrimitives() const override { - return 1; - } - - bool procedural; - uint32_t instUserID = -1; - Transform local2world; - std::shared_ptr scene; -}; - -std::shared_ptr createTrianglePlane (const sycl::float3& p0, const sycl::float3& dx, const sycl::float3& dy, size_t width, size_t height) -{ - std::shared_ptr mesh(new TriangleMesh); - mesh->triangles.resize(2*width*height); - mesh->vertices.resize((width+1)*(height+1)); - - for (size_t y=0; y<=height; y++) { - for (size_t x=0; x<=width; x++) { - sycl::float3 p = p0+float(x)/float(width)*dx+float(y)/float(height)*dy; - size_t i = y*(width+1)+x; - mesh->vertices[i] = p; - } - } - for (size_t y=0; ytriangles[i+0] = sycl::int4((int)p00,(int)p01,(int)p10,i+0); - mesh->triangles[i+1] = sycl::int4((int)p11,(int)p10,(int)p01,i+1); - } - } - return mesh; -} - -void* alloc_accel_buffer_internal(size_t bytes, sycl::device device, sycl::context context) -{ - ze_context_handle_t hContext = sycl::get_native(context); - ze_device_handle_t hDevice = sycl::get_native(device); - - ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES }; - ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp }; - ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp ); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDeviceGetProperties failed"); - - ze_raytracing_mem_alloc_ext_desc_t rt_desc; - rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC; - rt_desc.pNext = nullptr; - rt_desc.flags = 0; - - ze_device_mem_alloc_desc_t device_desc; - device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; - device_desc.pNext = &rt_desc; - device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED; - device_desc.ordinal = 0; - - ze_host_mem_alloc_desc_t host_desc; - host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; - host_desc.pNext = nullptr; - host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED; - - void* ptr = nullptr; - ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("accel allocation failed"); - return ptr; -} - -void free_accel_buffer_internal(void* ptr, sycl::context context) -{ - if (ptr == nullptr) return; - ze_context_handle_t hContext = sycl::get_native(context); - ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("accel free failed"); -} - -struct Block { - Block (size_t bytes, sycl::device device, sycl::context context) - : base((char*)alloc_accel_buffer_internal(bytes,device,context)), total(bytes), cur(0) {} - ~Block() { - free_accel_buffer_internal((void*)base,context); - } - void* alloc(size_t bytes) { - bytes &= -128; - if (cur+bytes > total) return nullptr; - void* ptr = &base[cur]; - cur += bytes; - return ptr; - } - char* base = nullptr; - size_t total = 0; - size_t cur = 0; - -}; - -bool g_use_accel_blocks = true; -std::vector> g_blocks; - -void* alloc_accel_buffer(size_t bytes, sycl::device device, sycl::context context) -{ - if (!g_use_accel_blocks) - return alloc_accel_buffer_internal(bytes,device,context); - - if (g_blocks.size() == 0) - g_blocks.push_back(std::shared_ptr(new Block(1024*1024,device,context))); - - if (bytes > 1024*1024) { - g_blocks.push_back(std::shared_ptr(new Block(bytes,device,context))); - void* ptr = g_blocks.back()->alloc(bytes); - assert(ptr); - return ptr; - } - - void* ptr = g_blocks.back()->alloc(bytes); - if (ptr) return ptr; - - g_blocks.push_back(std::shared_ptr(new Block(1024*1024,device,context))); - ptr = g_blocks.back()->alloc(bytes); - assert(ptr); - return ptr; -} - -void free_accel_buffer(void* ptr, sycl::context context) -{ - if (!g_use_accel_blocks) - return free_accel_buffer_internal(ptr,context); -} - -struct Scene -{ - typedef InstanceGeometryT InstanceGeometry; - - Scene() - : geometries_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), geometries(0,geometries_alloc), bounds(Bounds3f::empty()), accel(nullptr) {} - - Scene(uint32_t width, uint32_t height, bool opaque, bool procedural) - : geometries_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), geometries(0,geometries_alloc), bounds(Bounds3f::empty()), accel(nullptr) - { - std::shared_ptr plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,height,0), width, height); - plane->gflags = opaque ? (ze_rtas_builder_geometry_exp_flag_t) 0 : ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE; - plane->procedural = procedural; - geometries.push_back(plane); - } - - ~Scene() { - free_accel_buffer(accel,context); - } - - void* operator new(size_t size) { - return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only()); - } - - void operator delete(void* ptr) { - sycl::free(ptr,context); - } - - void add(std::shared_ptr mesh) { - geometries.push_back(mesh); - } - - void splitIntoGeometries(uint32_t numGeometries) - { - bool progress = true; - while (progress) - { - size_t N = geometries.size(); - progress = false; - for (uint32_t i=0; i mesh = std::dynamic_pointer_cast(geometries[i])) - { - if (mesh->size() <= 1) continue; - progress = true; - - /*const Triangle tri = mesh->getTriangle(RandomSampler_getUInt(rng)%mesh->size()); - const float u = 2.0f*M_PI*RandomSampler_getFloat(rng); - const sycl::float3 P = tri.center(); - const sycl::float3 N(cosf(u),sinf(u),0.0f); - - std::shared_ptr mesh0, mesh1; - mesh->split(P,N,mesh0,mesh1);*/ - - std::shared_ptr mesh0, mesh1; - mesh->split(mesh0,mesh1); - geometries[i] = std::dynamic_pointer_cast(mesh0); - geometries.push_back(std::dynamic_pointer_cast(mesh1)); - - if (geometries.size() >= numGeometries) - return; - } - } - } - assert(geometries.size() == numGeometries); - } - - /* splits each primitive into a geometry */ - void splitIntoGeometries() - { - /* count number of triangles */ - uint32_t numTriangles = 0; - for (uint32_t i=0; i mesh = std::dynamic_pointer_cast(geometries[i])) { - numTriangles++; - } - } - - std::vector, geometries_alloc_ty> new_geometries(0,geometries_alloc); - new_geometries.reserve(numTriangles); - - for (uint32_t i=0; i mesh = std::dynamic_pointer_cast(geometries[i])) - { - if (mesh->size() <= 1) { - new_geometries.push_back(geometries[i]); - continue; - } - - for (uint32_t j=0; jsize(); j++) { - std::shared_ptr mesh0(new TriangleMesh(mesh->gflags,mesh->procedural)); - mesh0->triangles.reserve(1); - mesh->vertices.reserve(3); - mesh0->addTriangle(mesh->getTriangle(j)); - new_geometries.push_back(mesh0); - } - } - } - - geometries = new_geometries; - } - - void createInstances(uint32_t maxInstances, uint32_t blockSize = 1, bool procedural = false) - { - std::vector, geometries_alloc_ty> instances(0,geometries_alloc); - - for (uint32_t i=0; i= maxInstances) - { - for (uint32_t j=begin; j scene(new Scene); - for (size_t j=begin; jtransform(world2local); - scene->geometries.push_back(geometries[j]); - } - - //std::shared_ptr instance = std::make_shared(local2world,scene,procedural); - uint32_t instUserID = RandomSampler_getUInt(rng); - std::shared_ptr instance(new InstanceGeometry(local2world,scene,procedural,instUserID)); - instances.push_back(instance); - } - - geometries = instances; - } - - void mixTrianglesAndProcedurals() - { - for (uint32_t i=0; i mesh = std::dynamic_pointer_cast(geometries[i])) - mesh->procedural = i%2; - } - - void addNullGeometries(uint32_t D) - { - size_t N = geometries.size(); - geometries.resize(N+D); - if (N == 0) return; - - for (size_t g=N; g desc(size()); - std::vector geom(size()); - size_t numPrimitives = 0; - for (size_t geomID=0; geomID& g = geometries[geomID]; - - /* skip NULL geometries */ - if (g == nullptr) { - geom[geomID] = nullptr; - continue; - } - - numPrimitives += g->getNumPrimitives(); - g->buildAccel(device,context,buildMode,quality); - g->getDesc(&desc[geomID]); - geom[geomID] = (const ze_rtas_builder_geometry_info_exp_t*) &desc[geomID]; - } - - ze_device_handle_t hDevice = sycl::get_native(device); - - ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES }; - ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp }; - ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp ); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDeviceGetProperties failed"); - - /* estimate accel size */ - size_t accelBufferBytesOut = 0; - ze_rtas_aabb_exp_t bounds; - ze_rtas_builder_build_op_exp_desc_t args; - memset(&args,0,sizeof(args)); - args.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC; - args.pNext = nullptr; - args.rtasFormat = rtasProp.rtasFormat; - args.buildQuality = quality; - args.buildFlags = 0; - args.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t**) geom.data(); - args.numGeometries = geom.size(); - - /* just for debugging purposes */ -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC }; - buildOpDebug.dispatchGlobalsPtr = dispatchGlobalsPtr; - args.pNext = &buildOpDebug; -#endif - - ze_rtas_builder_exp_properties_t size = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES }; - err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&args,&size); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("BVH size estimate failed"); - - if (size.rtasBufferSizeBytesExpected > size.rtasBufferSizeBytesMaxRequired) - throw std::runtime_error("expected larger than worst case"); - - /* allocate scratch buffer */ - size_t sentinelBytes = 1024; // add that many zero bytes to catch buffer overruns - std::vector scratchBuffer(size.scratchBufferSizeBytes+sentinelBytes); - memset(scratchBuffer.data(),0,scratchBuffer.size()); - - accel = nullptr; - size_t accelBytes = 0; - - /* build with different modes */ - switch (buildMode) - { - case BuildMode::BUILD_WORST_CASE_SIZE: { - - accelBytes = size.rtasBufferSizeBytesMaxRequired; - accel = alloc_accel_buffer(accelBytes+sentinelBytes,device,context); - memset(accel,0,accelBytes+sentinelBytes); - - /* build accel */ - double t0 = embree::getSeconds(); - size_t numIterations = benchmark ? 16 : 1; - - for (size_t i=0; ibounds = bounds; - - if (!benchmark) - { - /* scratch buffer bounds check */ - for (size_t i=size.scratchBufferSizeBytes; i id_stack, uint32_t instUserID, bool procedural_instance, std::vector& tri_map) - { - for (uint32_t geomID=0; geomIDbuildTriMap(local_to_world,id_stack,instUserID,procedural_instance,tri_map); - id_stack.pop_back(); - } - } - - size_t size() const { - return geometries.size(); - } - - Bounds3f getBounds() { - return { - { bounds.lower.x, bounds.lower.y, bounds.lower.z }, - { bounds.upper.x, bounds.upper.y, bounds.upper.z } - }; - } - - void* getAccel() { - return accel; - } - - std::shared_ptr operator[] ( size_t i ) { return geometries[i]; } - - typedef sycl::usm_allocator, sycl::usm::alloc::shared> geometries_alloc_ty; - geometries_alloc_ty geometries_alloc; - std::vector, geometries_alloc_ty> geometries; - - ze_rtas_aabb_exp_t bounds; - void* accel; -}; - -void exception_handler(sycl::exception_list exceptions) -{ - for (std::exception_ptr const& e : exceptions) { - try { - std::rethrow_exception(e); - } catch(sycl::exception const& e) { - std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; - } - } -}; - -void render(uint32_t i, const TestInput& in, TestOutput& out, intel_raytracing_acceleration_structure_t accel) -{ - intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag(); - if (!(flags & intel_raytracing_ext_flag_ray_query)) - return; - - /* setup ray */ - intel_ray_desc_t ray; - ray.origin = in.org; - ray.direction = in.dir; - ray.tmin = in.tnear; - ray.tmax = in.tfar; - ray.mask = in.mask; - ray.flags = (intel_ray_flags_t) in.flags; - - /* trace ray */ - intel_ray_query_t query = intel_ray_query_init(ray,accel); - intel_ray_query_start_traversal(query); - intel_ray_query_sync(query); - - /* return ray data of level 0 */ - out.ray0_org = intel_get_ray_origin(query,0); - out.ray0_dir = intel_get_ray_direction(query,0); - out.ray0_tnear = intel_get_ray_tmin(query,0); - out.ray0_mask = intel_get_ray_mask(query,0); - out.ray0_flags = intel_get_ray_flags(query,0); - - /* clear ray data of level N */ - out.rayN_org = sycl::float3(0.f,0.f,0.f); - out.rayN_dir = sycl::float3(0.f,0.f,0.f); - out.rayN_tnear = 0.0f; - out.rayN_mask = 0; - out.rayN_flags = 0; - - /* potential hit */ - if (!intel_is_traversal_done(query)) - { - out.hit_type = TEST_POTENTIAL_HIT; - out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_potential_hit ); - out.t = intel_get_hit_distance(query, intel_hit_type_potential_hit); - out.u = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).x; - out.v = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).y; - out.front_face = intel_get_hit_front_face( query, intel_hit_type_potential_hit ); - out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_potential_hit ); - out.instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit ); - out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit ); - if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_potential_hit ); - else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_potential_hit ); - intel_float3 vertex_out[3]; - intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_potential_hit); - out.v0 = vertex_out[0]; - out.v1 = vertex_out[1]; - out.v2 = vertex_out[2]; - - /* return ray data at current level */ - uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - out.rayN_org = intel_get_ray_origin(query,bvh_level); - out.rayN_dir = intel_get_ray_direction(query,bvh_level); - out.rayN_tnear = intel_get_ray_tmin(query,bvh_level); - out.rayN_mask = intel_get_ray_mask(query,bvh_level); - out.rayN_flags = intel_get_ray_flags(query,bvh_level); - - /* return instance transformations */ - out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_potential_hit); - out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_potential_hit); - } - - /* committed hit */ - else if (intel_has_committed_hit(query)) - { - out.hit_type = TEST_COMMITTED_HIT; - out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_committed_hit ); - out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_committed_hit ); - out.t = intel_get_hit_distance(query, intel_hit_type_committed_hit); - out.u = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).x; - out.v = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).y; - out.front_face = intel_get_hit_front_face( query, intel_hit_type_committed_hit ); - out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_committed_hit ); - out.instID = intel_get_hit_instance_id( query, intel_hit_type_committed_hit ); - out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_committed_hit ); - if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_committed_hit ); - else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_committed_hit ); - intel_float3 vertex_out[3]; - intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_committed_hit); - out.v0 = vertex_out[0]; - out.v1 = vertex_out[1]; - out.v2 = vertex_out[2]; - - /* return instance transformations */ - out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit); - out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit); - } - - /* miss */ - else { - out.hit_type = TEST_MISS; - } - - /* abandon ray query */ - intel_ray_query_abandon(query); -} - -void render_loop(uint32_t i, const TestInput& in, TestOutput& out, size_t scene_in, intel_raytracing_acceleration_structure_t accel, TestType test) -{ - intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag(); - if (!(flags & intel_raytracing_ext_flag_ray_query)) - return; - - /* setup ray */ - intel_ray_desc_t ray; - ray.origin = in.org; - ray.direction = in.dir; - ray.tmin = in.tnear; - ray.tmax = in.tfar; - ray.mask = in.mask; - ray.flags = (intel_ray_flags_t) in.flags; - - /* trace ray */ - intel_ray_query_t query = intel_ray_query_init(ray,accel); - intel_ray_query_start_traversal(query); - intel_ray_query_sync(query); - - /* return ray data of level 0 */ - out.ray0_org = intel_get_ray_origin(query,0); - out.ray0_dir = intel_get_ray_direction(query,0); - out.ray0_tnear = intel_get_ray_tmin(query,0); - out.ray0_mask = intel_get_ray_mask(query,0); - out.ray0_flags = intel_get_ray_flags(query,0); - - /* clear ray data of level N */ - out.rayN_org = sycl::float3(0.f,0.f,0.f); - out.rayN_dir = sycl::float3(0.f,0.f,0.f); - out.rayN_tnear = 0.0f; - out.rayN_mask = 0; - out.rayN_flags = 0; - - Scene* scenes[2]; - scenes[0] = (Scene*) scene_in; - scenes[1] = nullptr; - - /* traversal loop */ - while (!intel_is_traversal_done(query)) - { - const intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit); - - if (candidate == intel_candidate_type_triangle) - { - if (test == TestType::TRIANGLES_POTENTIAL_HIT) - { - out.hit_type = TEST_POTENTIAL_HIT; - out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_potential_hit ); - out.t = intel_get_hit_distance(query, intel_hit_type_potential_hit); - out.u = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).x; - out.v = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).y; - out.front_face = intel_get_hit_front_face( query, intel_hit_type_potential_hit ); - out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_potential_hit ); - out.instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit ); - out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit ); - if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_potential_hit ); - else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_potential_hit ); - intel_float3 vertex_out[3]; - intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_potential_hit); - out.v0 = vertex_out[0]; - out.v1 = vertex_out[1]; - out.v2 = vertex_out[2]; - - /* return instance transformations */ - out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit); - out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit); - - /* return ray data at current level */ - uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - out.rayN_org = intel_get_ray_origin(query,bvh_level); - out.rayN_dir = intel_get_ray_direction(query,bvh_level); - out.rayN_tnear = intel_get_ray_tmin(query,bvh_level); - out.rayN_mask = intel_get_ray_mask(query,bvh_level); - out.rayN_flags = intel_get_ray_flags(query,bvh_level); - return; - } - - if (test == TestType::TRIANGLES_ANYHIT_SHADER_COMMIT) - intel_ray_query_commit_potential_hit(query); - } - - else if (candidate == intel_candidate_type_procedural) - { - const uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - - const uint32_t instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit ); - const uint32_t geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit ); - const uint32_t primID = intel_get_hit_primitive_id( query, intel_hit_type_potential_hit ); - - Geometry* geom = nullptr; - if (instID != -1) { - Scene::InstanceGeometry* instance = (Scene::InstanceGeometry*) (scenes[0]->geometries.data() + instID)->get(); - geom = (instance->scene->geometries.data() + geomID)->get(); - } else { - geom = (scenes[bvh_level]->geometries.data() + geomID)->get(); - } - - if (geom->type == Geometry::TRIANGLE_MESH) - { - const TriangleMesh* mesh = (TriangleMesh*) geom; - - const sycl::int4 tri = *(mesh->triangles.data() + primID); - const sycl::float3 tri_v0 = *(mesh->vertices.data() + tri.x()); - const sycl::float3 tri_v1 = *(mesh->vertices.data() + tri.y()); - const sycl::float3 tri_v2 = *(mesh->vertices.data() + tri.z()); - - /* calculate vertices relative to ray origin */ - const sycl::float3 O = intel_get_ray_origin(query,bvh_level); - const sycl::float3 D = intel_get_ray_direction(query,bvh_level); - const float tnear = intel_get_ray_tmin(query,bvh_level); - const float tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit); - const sycl::float3 v0 = tri_v0-O; - const sycl::float3 v1 = tri_v1-O; - const sycl::float3 v2 = tri_v2-O; - - /* calculate triangle edges */ - const sycl::float3 e0 = v2-v0; - const sycl::float3 e1 = v0-v1; - const sycl::float3 e2 = v1-v2; - - /* perform edge tests */ - const float U = sycl::dot(cross(e0,v2+v0),D); - const float V = sycl::dot(cross(e1,v0+v1),D); - const float W = sycl::dot(cross(e2,v1+v2),D); - const float UVW = U+V+W; - bool valid = (std::min(U,std::min(V,W)) >= -0.0f) || (std::max(U,std::max(V,W)) <= 0.0f); - - /* calculate geometry normal and denominator */ - const sycl::float3 Ng = sycl::cross(e2,e1); - const float den = 2.0f*(dot(Ng,D)); - - /* perform depth test */ - const float T = 2.0f*dot(v0,Ng); - const float t = T/den; - const float u = U/UVW; - const float v = V/UVW; - valid &= tnear <= t & t <= tfar; - valid &= den != 0.0f; - - /* commit hit */ - if (valid) - intel_ray_query_commit_potential_hit_override(query,t,sycl::float2(u,v)); - } - else if (geom->type == Geometry::INSTANCE) - { - const Scene::InstanceGeometry* inst = (Scene::InstanceGeometry*) geom; - const Transform local2world = inst->local2world; - const Transform world2local = rcp(local2world); - - /* load ray */ - const uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ); - const sycl::float3 O = intel_get_ray_origin(query,bvh_level); - const sycl::float3 D = intel_get_ray_direction(query,bvh_level); - - /* transform ray */ - const sycl::float3 O1 = xfmPoint(world2local, O); - const sycl::float3 D1 = xfmVector(world2local, D); - - scenes[bvh_level+1] = inst->scene.get(); - intel_raytracing_acceleration_structure_t inst_accel = (intel_raytracing_acceleration_structure_t) inst->scene->getAccel(); - - /* continue traversal */ - intel_ray_desc_t ray; - ray.origin = O1; - ray.direction = D1; - ray.tmin = intel_get_ray_tmin(query,bvh_level); - ray.tmax = 0.0f; // unused - ray.mask = intel_get_ray_mask(query,bvh_level); - ray.flags = intel_get_ray_flags(query,bvh_level); - intel_ray_query_forward_ray(query, ray, inst_accel); - } - } - - intel_ray_query_start_traversal(query); - intel_ray_query_sync(query); - } - - /* committed hit */ - if (intel_has_committed_hit(query)) - { - out.hit_type = TEST_COMMITTED_HIT; - out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_committed_hit ); - out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_committed_hit ); - out.t = intel_get_hit_distance(query, intel_hit_type_committed_hit); - out.u = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).x; - out.v = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).y; - out.front_face = intel_get_hit_front_face( query, intel_hit_type_committed_hit ); - out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_committed_hit ); - out.instID = intel_get_hit_instance_id( query, intel_hit_type_committed_hit ); - out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_committed_hit ); - out.primID = intel_get_hit_primitive_id( query, intel_hit_type_committed_hit ); - - out.v0 = sycl::float3(0.f,0.f,0.f); - out.v1 = sycl::float3(0.f,0.f,0.f); - out.v2 = sycl::float3(0.f,0.f,0.f); - if (intel_get_hit_candidate( query, intel_hit_type_committed_hit ) == intel_candidate_type_triangle) - { - intel_float3 vertex_out[3]; - intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_committed_hit); - out.v0 = vertex_out[0]; - out.v1 = vertex_out[1]; - out.v2 = vertex_out[2]; - } - - /* return instance transformations */ - out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit); - out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit); - } - - /* miss */ - else { - out.hit_type = TEST_MISS; - } - - /* abandon ray query */ - intel_ray_query_abandon(query); -} - -void buildTestExpectedInputAndOutput(std::shared_ptr scene, size_t numTests, TestType test, TestInput* in, TestOutput* out_expected) -{ - std::vector tri_map; - tri_map.resize(numTests); - std::vector id_stack; - Transform local_to_world; - scene->buildTriMap(local_to_world,id_stack,-1,false,tri_map); - - TestHitType hit_type = TEST_MISS; - switch (test) { - case TestType::TRIANGLES_COMMITTED_HIT: hit_type = TEST_COMMITTED_HIT; break; - case TestType::TRIANGLES_POTENTIAL_HIT: hit_type = TEST_POTENTIAL_HIT; break; - case TestType::TRIANGLES_ANYHIT_SHADER_COMMIT: hit_type = TEST_COMMITTED_HIT; break; - case TestType::TRIANGLES_ANYHIT_SHADER_REJECT: hit_type = TEST_MISS; break; - case TestType::PROCEDURALS_COMMITTED_HIT: hit_type = TEST_COMMITTED_HIT; break; - default: assert(false); break; - }; - - //for (size_t y=0; y scene = std::make_shared(width,height,opaque,procedural); - std::shared_ptr scene(new Scene(width,height,opaque,procedural)); - scene->splitIntoGeometries(16); - if (inst != InstancingType::NONE) - scene->createInstances(scene->size(),3, inst == InstancingType::SW_INSTANCING); - - scene->addNullGeometries(16); - - scene->buildAccel(device,context,BuildMode::BUILD_EXPECTED_SIZE,false); - - /* calculate test input and expected output */ - TestInput* in = (TestInput*) sycl::aligned_alloc(64,numTests*sizeof(TestInput),device,context,sycl::usm::alloc::shared); - memset(in, 0, numTests*sizeof(TestInput)); - TestOutput* out_test = (TestOutput*) sycl::aligned_alloc(64,numTests*sizeof(TestOutput),device,context,sycl::usm::alloc::shared); - memset(out_test, 0, numTests*sizeof(TestOutput)); - TestOutput* out_expected = (TestOutput*) sycl::aligned_alloc(64,numTests*sizeof(TestOutput),device,context,sycl::usm::alloc::shared); - memset(out_expected, 0, numTests*sizeof(TestOutput)); - - buildTestExpectedInputAndOutput(scene,numTests,test,in,out_expected); - - /* execute test */ - intel_raytracing_acceleration_structure_t accel = (intel_raytracing_acceleration_structure_t) scene->getAccel(); - size_t scene_ptr = (size_t) scene.get(); - - if (inst != InstancingType::SW_INSTANCING && - (test == TestType::TRIANGLES_COMMITTED_HIT || test == TestType::TRIANGLES_POTENTIAL_HIT)) - { -#if defined(ZE_RAYTRACING_RT_SIMULATION) - tbb::parallel_for(size_t(0),numTests, [&](size_t i) { - render(i,in[i],out_test[i],accel); - }); -#else - queue.submit([&](sycl::handler& cgh) { - const sycl::range<1> range(numTests); - cgh.parallel_for(range, [=](sycl::item<1> item) { - const uint32_t i = item.get_id(0); - render(i,in[i],out_test[i],accel); - }); - }); - queue.wait_and_throw(); -#endif - } - else - { -#if defined(ZE_RAYTRACING_RT_SIMULATION) - tbb::parallel_for(size_t(0),numTests, [&](size_t i) { - render_loop(i,in[i],out_test[i],scene_ptr,accel,test); - }); -#else - queue.submit([&](sycl::handler& cgh) { - const sycl::range<1> range(numTests); - cgh.parallel_for(range, [=](sycl::item<1> item) { - const uint32_t i = item.get_id(0); - render_loop(i,in[i],out_test[i],scene_ptr,accel,test); - }); - }); - queue.wait_and_throw(); -#endif - } - - /* verify result */ - uint32_t numErrors = 0; - for (size_t tid=0; tid plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,width,0), width, width); - if (test == TestType::BUILD_TEST_PROCEDURALS) plane->procedural = true; - plane->selectRandom(numPrimitives); - if (testID%2) plane->unshareVertices(); - - std::shared_ptr scene(new Scene); - scene->add(plane); - - if (test == TestType::BUILD_TEST_PROCEDURALS) { - if (testID%3==0) - scene->splitIntoGeometries(); - } - else if (test == TestType::BUILD_TEST_MIXED) { - scene->splitIntoGeometries(std::max(1u,std::min(1024u,numPrimitives))); - scene->mixTrianglesAndProcedurals(); - scene->createInstances(scene->size()/2); - } - else if (test == TestType::BUILD_TEST_INSTANCES) { - scene->splitIntoGeometries(std::max(1u,std::min(1024u,numPrimitives))); - scene->createInstances(scene->size()); - } - - scene->addNullGeometries(16); - scene->buildAccel(device,context,buildMode,false); - - /* calculate test input and expected output */ - TestInput* in = (TestInput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestInput),device,context,sycl::usm::alloc::shared); - memset(in, 0, numPrimitives*sizeof(TestInput)); - TestOutput* out_test = (TestOutput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestOutput),device,context,sycl::usm::alloc::shared); - memset(out_test, 0, numPrimitives*sizeof(TestOutput)); - TestOutput* out_expected = (TestOutput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestOutput),device,context,sycl::usm::alloc::shared); - memset(out_expected, 0, numPrimitives*sizeof(TestOutput)); - - buildTestExpectedInputAndOutput(scene,numPrimitives,TestType::TRIANGLES_COMMITTED_HIT,in,out_expected); - - /* execute test */ - intel_raytracing_acceleration_structure_t accel = (intel_raytracing_acceleration_structure_t) scene->getAccel(); - size_t scene_ptr = (size_t) scene.get(); - - if (numPrimitives) - { -#if defined(ZE_RAYTRACING_RT_SIMULATION) - tbb::parallel_for(size_t(0),size_t(numPrimitives), [&](size_t i) { - render_loop(i,in[i],out_test[i],scene_ptr,accel,TestType::TRIANGLES_COMMITTED_HIT); - }); -#else - queue.submit([&](sycl::handler& cgh) { - const sycl::range<1> range(numPrimitives); - cgh.parallel_for(range, [=](sycl::item<1> item) { - const uint32_t i = item.get_id(0); - render_loop(i,in[i],out_test[i],scene_ptr,accel,TestType::TRIANGLES_COMMITTED_HIT); - }); - }); - queue.wait_and_throw(); -#endif - } - - /* verify result */ - uint32_t numErrors = 0; - for (size_t tid=0; tid10 ? i*i : i; - std::cout << "testing " << numPrimitives << " primitives" << std::endl; - numErrors += executeBuildTest(device,queue,context,test,buildMode,numPrimitives,i); - } - return numErrors; -} - -uint32_t executeBenchmark(sycl::device& device, sycl::queue& queue, sycl::context& context, TestType test) -{ - for (uint32_t i=0; i<=20; i++) - { - const uint32_t numPrimitives = 1< plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,width,0), width, width); - if (test == TestType::BENCHMARK_PROCEDURALS) plane->procedural = true; - plane->selectSequential(numPrimitives); - - std::shared_ptr scene(new Scene); - scene->add(plane); - - scene->buildAccel(device,context,BuildMode::BUILD_WORST_CASE_SIZE,true); - } - return 0; -} - -enum Flags : uint32_t { - FLAGS_NONE, - DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise < -}; - -struct DispatchGlobals -{ - uint64_t rtMemBasePtr; // base address of the allocated stack memory - uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0 - uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks - uint32_t numDSSRTStacks : 16; // number of stacks per DSS - uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16 - unsigned _reserved_mbz : 12; - uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...) - Flags flags; // per context control flags -}; - -void* allocDispatchGlobals(sycl::device device, sycl::context context) -{ - size_t maxBVHLevels = 2; //RTC_MAX_INSTANCE_LEVEL_COUNT+1; - - size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64; - size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC - size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes; - - void* dispatchGlobalsPtr = alloc_accel_buffer(dispatchGlobalSize,device,context); - memset(dispatchGlobalsPtr, 0, dispatchGlobalSize); - - DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr; - dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize; - dg->callStackHandlerKSP = 0; - dg->asyncStackSize = 0; - dg->numDSSRTStacks = 0; - dg->syncRayQueryCount = 0; - dg->_reserved_mbz = 0; - dg->maxBVHLevels = maxBVHLevels; - dg->flags = DEPTH_TEST_LESS_EQUAL; - - return dispatchGlobalsPtr; -} - -int main(int argc, char* argv[]) try -{ - TestType test = TestType::TRIANGLES_COMMITTED_HIT; - InstancingType inst = InstancingType::NONE; - BuildMode buildMode = BuildMode::BUILD_EXPECTED_SIZE; - -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) - ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO; -#else - ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL; -#endif - - bool jit_cache = false; - uint32_t numThreads = tbb::this_task_arena::max_concurrency(); - - /* command line parsing */ - if (argc == 1) { - std::cout << "ERROR: no test specified" << std::endl; - return 1; - } - - /* parse all command line options */ - for (size_t i=1; i= argc) throw std::runtime_error("Error: --jit-cache : syntax error"); - jit_cache = atoi(argv[i]); - } - else if (strcmp(argv[i], "--threads") == 0) { - if (++i >= argc) throw std::runtime_error("Error: --threads : syntax error"); - numThreads = atoi(argv[i]); - } - else { - std::cout << "ERROR: invalid command line option " << argv[i] << std::endl; - return 1; - } - } - - if (jit_cache) - std::cout << "WARNING: JIT caching is not supported!" << std::endl; - - if (ZeWrapper::init() != ZE_RESULT_SUCCESS) { - std::cerr << "ZeWrapper not successfully initialized" << std::endl; - return 1; - } - -#if defined(ZE_RAYTRACING_RT_SIMULATION) - RTCore::Init(); - RTCore::SetXeVersion((RTCore::XeVersion)ZE_RAYTRACING_DEVICE); -#endif - -#if TBB_INTERFACE_VERSION >= 11005 - tbb::global_control tbb_threads(tbb::global_control::max_allowed_parallelism,numThreads); -#else - tbb::task_scheduler_init tbb_threads(tbb::task_scheduler_init::deferred); - tbb_threads.initialize(int(numThreads)); -#endif - - /* initialize SYCL device */ - device = sycl::device(sycl::gpu_selector_v); - sycl::queue queue = sycl::queue(device,exception_handler); - context = queue.get_context(); - -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - dispatchGlobalsPtr = allocDispatchGlobals(device,context); -#endif - - /* execute test */ - RandomSampler_init(rng,0x56FE238A); - - ze_result_t result = ZE_RESULT_SUCCESS; - sycl::platform platform = device.get_platform(); - ze_driver_handle_t hDriver = sycl::get_native(platform); - - /* enable RTAS extension only when enabled */ - if (rtas_build_mode == ZeWrapper::RTAS_BUILD_MODE::AUTO) - { - uint32_t count = 0; - std::vector extensions; - result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDriverGetExtensionProperties failed"); - - extensions.resize(count); - result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); - if (result != ZE_RESULT_SUCCESS) - throw std::runtime_error("zeDriverGetExtensionProperties failed"); - - bool ze_rtas_builder = false; - for (uint32_t i=0; i= TestType::BENCHMARK_TRIANGLES) - numErrors = executeBenchmark(device,queue,context,test); - else if (test >= TestType::BUILD_TEST_TRIANGLES) - numErrors = executeBuildTest(device,queue,context,test,buildMode); - else - numErrors = executeTest(device,queue,context,inst,test); - - err = ZeWrapper::zeRTASParallelOperationDestroyExp(parallelOperation); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("parallel operation destruction failed"); - - /* destroy rtas builder again */ - err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder); - if (err != ZE_RESULT_SUCCESS) - throw std::runtime_error("ze_rtas_builder destruction failed"); - -#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) - free_accel_buffer(dispatchGlobalsPtr, context); -#endif - -#if defined(ZE_RAYTRACING_RT_SIMULATION) - RTCore::Cleanup(); -#endif - - return numErrors ? 1 : 0; -} -catch (std::runtime_error e) { - std::cerr << "std::runtime_error: " << e.what() << std::endl; - return 1; -} - -#pragma clang diagnostic pop diff --git a/kernels/sycl/rthwif_embree.cpp b/kernels/sycl/rthwif_embree.cpp index ce65cfede7..00a581b60f 100644 --- a/kernels/sycl/rthwif_embree.cpp +++ b/kernels/sycl/rthwif_embree.cpp @@ -23,7 +23,7 @@ #include "../geometry/triangle_intersector_pluecker.h" #include "rthwif_embree.h" -#include "../rthwif/rttrace/rttrace.h" +#include "rthwif_rttrace.h" using namespace embree; diff --git a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp index 4711172d87..4810ead8db 100644 --- a/kernels/sycl/rthwif_embree_builder.cpp +++ b/kernels/sycl/rthwif_embree_builder.cpp @@ -109,9 +109,7 @@ namespace embree if (result != ZE_RESULT_SUCCESS) return -1; bool ze_extension_ray_tracing = false; -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) bool ze_rtas_builder = false; -#endif for (uint32_t i=0; i(sycl_device); diff --git a/kernels/rthwif/rttrace/rttrace.h b/kernels/sycl/rthwif_rttrace.h similarity index 99% rename from kernels/rthwif/rttrace/rttrace.h rename to kernels/sycl/rthwif_rttrace.h index 11c7d6b007..cf699c787b 100644 --- a/kernels/rthwif/rttrace/rttrace.h +++ b/kernels/sycl/rthwif_rttrace.h @@ -7,10 +7,6 @@ #include "rtcore.h" #endif -#if defined(EMBREE_SYCL_RT_VALIDATION_API) -# include "rttrace_validation.h" -#else - #include #pragma clang diagnostic push @@ -262,5 +258,3 @@ inline bool intel_has_committed_hit( intel_ray_query_t query ) { return false; } #endif #pragma clang diagnostic pop - -#endif diff --git a/kernels/sycl/scene_sycl.cpp b/kernels/sycl/scene_sycl.cpp index bf37537159..d2a68bc572 100644 --- a/kernels/sycl/scene_sycl.cpp +++ b/kernels/sycl/scene_sycl.cpp @@ -9,11 +9,6 @@ #include "../common/scene.h" #include "../common/context.h" #include "../geometry/filter.h" -#include "rthwif_embree.h" - -#if defined(EMBREE_SYCL_SUPPORT) -# include "../sycl/rthwif_embree_builder.h" -#endif #include "../../common/algorithms/parallel_for.h" diff --git a/scripts/cmake-presets/continuous.json b/scripts/cmake-presets/continuous.json index 5d0e3d916e..59a8383135 100644 --- a/scripts/cmake-presets/continuous.json +++ b/scripts/cmake-presets/continuous.json @@ -18,7 +18,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -33,7 +32,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -51,7 +49,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithAssert", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } @@ -66,7 +63,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithAssert", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -77,7 +73,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithAssert", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -88,7 +83,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -99,7 +93,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } diff --git a/scripts/cmake-presets/integrate.json b/scripts/cmake-presets/integrate.json index 9a1fc84560..09904eb13a 100644 --- a/scripts/cmake-presets/integrate.json +++ b/scripts/cmake-presets/integrate.json @@ -115,7 +115,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } @@ -127,7 +126,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } diff --git a/scripts/cmake-presets/nightly.json b/scripts/cmake-presets/nightly.json index c493884795..e74ca3f896 100644 --- a/scripts/cmake-presets/nightly.json +++ b/scripts/cmake-presets/nightly.json @@ -18,30 +18,17 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2", "EMBREE_MAX_INSTANCE_LEVEL_COUNT": "2" } }, - { - "name": "nightly-linux-DG2-INTERNAL-L0RTAS", - "inherits": ["package-linux", "env", "dpcpp", "ispc_NEWEST", "tbb_NEWEST"], - "cacheVariables": { - "CMAKE_BUILD_TYPE": "RelWithDebInfo", - "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", - "EMBREE_SYCL_AOT_DEVICES": "none", - "EMBREE_MAX_ISA": "SSE2" - } - }, { "name": "nightly-linux-PVC-JIT", "inherits": ["package-linux", "env", "dpcpp", "ispc_NEWEST", "tbb_NEWEST"], "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -52,7 +39,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2", "EMBREE_MAX_INSTANCE_LEVEL_COUNT": "2" @@ -64,7 +50,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "SSE2" } @@ -75,7 +60,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } @@ -86,7 +70,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "ON", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } diff --git a/scripts/cmake-presets/performance.json b/scripts/cmake-presets/performance.json index 410034c3d6..7fe6fbe398 100644 --- a/scripts/cmake-presets/performance.json +++ b/scripts/cmake-presets/performance.json @@ -29,7 +29,6 @@ "binaryDir": "${sourceDir}/build", "cacheVariables": { "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none" } } diff --git a/scripts/cmake-presets/release.json b/scripts/cmake-presets/release.json index b878cda015..8322ea19c4 100644 --- a/scripts/cmake-presets/release.json +++ b/scripts/cmake-presets/release.json @@ -58,7 +58,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } @@ -70,7 +69,6 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "EMBREE_SYCL_SUPPORT": "ON", - "EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF", "EMBREE_SYCL_AOT_DEVICES": "none", "EMBREE_MAX_ISA": "AVX512" } diff --git a/tutorials/common/sycl/util.h b/tutorials/common/sycl/util.h index d8c5e9006c..8f2d829524 100644 --- a/tutorials/common/sycl/util.h +++ b/tutorials/common/sycl/util.h @@ -30,7 +30,7 @@ namespace embree { has_raytracing = true; if (has_raytracing && strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0) { - ze_result_t result_rtas_builder = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO); + ze_result_t result_rtas_builder = ZeWrapper::initRTASBuilder(hDriver); if (result_rtas_builder == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) { return; } @@ -72,11 +72,9 @@ namespace embree { throw std::runtime_error("No raytracing capable SYCL platform found. Please install a recent driver. On Linux, make sure that the intel-level-zero-gpu package is installed."); } -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) if (!has_accel_builder_support) { throw std::runtime_error("No driver support for acceleration structure building found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed."); } -#endif } inline void printDeviceInfo(sycl::device const& device)